/* $Id: cct.c,v 1.3 2004/11/26 11:41:32 zlb Exp $ */

#define GBK

#if defined(WIN32) && defined(_DEBUG)
#	define DEBUG
#endif

#define SKIP_COMMENTS	/* undefine it may cause unmatched '}''s */
#define CLOSECC_AT_PAR	/* Terminate hanzi mode if blank lines */

typedef unsigned char byte;

#define CCT_VERSION	"5.17"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#if !defined(UNIX) && !defined(GO32)
#  include <process.h>
#  ifdef WIN32
#    include <malloc.h>
#  else
#    include <alloc.h>
#  endif
#  define EOL	10
#else
#  include <unistd.h>
#  define EOL	10
#endif

#ifdef WIN32
#  include <io.h>
#  ifndef __MINGW32__
#    define PATH_MAX      _MAX_PATH+1
#  else
#    include <limits.h>   /* PATH_MAX */
#  endif
#else
#  include <limits.h>   /* PATH_MAX */
#endif

#include "proto.h"
#include "fullpath.h"

#define ExASCII		0	/* !=0 ==> use ASCII code above 128 for cc */

struct CHAR_INFO {
  unsigned break_before	: 1;	/* indicate if breakable before it */
  unsigned break_after	: 1;	/* indicate if breakable after it */
  /* flags only meaningful for Chinese characters */
  unsigned glue_before	: 1;	/* add special glue before it */
  unsigned glue_after	: 1;	/* add special glue after it */
  /* flags only meaningful for ASCII characters */
  unsigned trans_to_cc	: 1;	/* to be transformed to Chinese in HanZi mode */
  unsigned tex_reserved	: 1;	/* TeX's reserved chars */
};

#ifndef GBK
# define NumberOfCC	(94*94)
# define CHARNO(c1,c2)	((c1-161)*94+c2-161)
#else
# define NumberOfCC	((254-129+1)*(254-64+1))
# define CHARNO(c1,c2)	((c1-129)*(254-64+1)+c2-64)
#endif

#ifdef GBK
#  if !defined(UNIX) && !defined(GO32) && !defined(WIN32)
     struct CHAR_INFO far *cc_info;
#  else
     struct CHAR_INFO *cc_info;
#  endif
#else
  struct CHAR_INFO cc_info[NumberOfCC];
#endif
struct CHAR_INFO ascii_info[256];

FILE *f_in=NULL,*f_out=NULL,*f_map=NULL,*f_ini=NULL;
int map_flag;
int verbose = 1;
byte infilename[PATH_MAX],outfilename[PATH_MAX],mapfilename[PATH_MAX];
int inlineno;		/* Current line no. of the input file */
int outlineno;		/* Current line no. of the output file */

#define INPUT_BUFFER_LEN	16
byte input_buffer[INPUT_BUFFER_LEN],*input_buffer_ptr;

void closefiles _VOID
/* close all files */
{
  if (f_in!=NULL && f_in!=stdin) fclose(f_in);
  f_in=NULL;
  if (f_out!=NULL) {
     if (f_out!=stdout) fclose(f_out); else fflush(f_out);
  }
  f_out=NULL;
  if (f_map!=NULL) {fclose(f_map); f_map=NULL;}
  if (f_ini!=NULL) {fclose(f_ini); f_ini=NULL;}
}

void ErrExit _P1(byte *,s)
{
  closefiles();
  fprintf(stderr,"Error: %s\n",s);
  exit(1);
}

int testfileext _P1(byte *,filename)
/* tests if the filename has an  extension. returns the position of
   '.' just before the extension if the extension exists and returns
     -1 if there is no extension */
{
  register int i,l;
  l=strlen(filename); i=l-1;
  while (i>=0 && filename[i]!='.' && filename[i]!='\\' && filename[i]!='/') --i;
  if (i<0) return -1;
  if (filename[i]!='.') return(-1);
  return(i);
}

void unreadchar _P1(register int,c)
/* unread a character */
{
  if (input_buffer_ptr-input_buffer>=INPUT_BUFFER_LEN)
    ErrExit("Input buffer overflow!");
  *(input_buffer_ptr++)=c;
}

int readchar _VOID
/* reads a char. from f_in */
{
  register int c;

  if (input_buffer_ptr>input_buffer) return(*--input_buffer_ptr);
  do {
    switch (c=fgetc(f_in)) {
      case EOF :
        return EOF;
      case EOL :
#ifndef GBK
      case EOL+128 :
#endif
        inlineno++;
        if (verbose && !(inlineno&31)) fprintf(stderr,"Line %d%c",inlineno,13);
        if (map_flag) fprintf(f_map,"\n%d ===> %d",inlineno,++outlineno);
        break;
    }
  } while (c == '\r');
  return(c);
}

void openfiles _VOID
{
  byte wkstr[PATH_MAX];

  if (verbose)
    fprintf(stderr,"Transforming %s ===> %s\n", infilename, outfilename);

  if (infilename[0] == '-')
    f_in = stdin;
  else {
    if ((f_in=fopen(infilename,"rb"))==NULL) ErrExit("Cannot open input file");
    if (setvbuf(f_in,NULL,_IOFBF,4*1024)) {
buff_err: ErrExit("Cannot allocate I/O buffer");
    }
  }
  inlineno=1; input_buffer_ptr=input_buffer;

  if (outfilename[0] == '-')
    f_out = stdout;
  else {
    if ((f_out=fopen(outfilename,"rb"))!=NULL) {
      register int l=testfileext(outfilename);
      fclose(f_out); f_out=NULL;
      strcpy(wkstr,outfilename);
      strcpy(wkstr+l,".bak");
      unlink(wkstr); rename(outfilename,wkstr);
    }
    if ((f_out=fopen(outfilename,"w+t"))==NULL)
      ErrExit("Cannot open output file");
    if (setvbuf(f_out,NULL,_IOFBF,30*1024)) goto buff_err;
  }

  if (map_flag) {
    if ((f_map=fopen(mapfilename,"w+t"))==NULL) ErrExit("Cannot open map file");
    if (setvbuf(f_map,NULL,_IOFBF,8*1024)) goto buff_err;
    fprintf(f_map,"%d ===> %d",inlineno,outlineno=1);
  }

  fprintf(f_out, "\\def\\CCTpreproc{CCT V" CCT_VERSION "}");
}

int seekeoln _VOID
/* skip next space chars. returns TRUE if the next non space
   character is EOL */
{
  register int c;
  do {
    c=readchar();
  } while (c==' ' || c=='\t');
  unreadchar(c);
  return(c==EOL);
}

#define ADD(p0) {				\
  byte *p,*p1=p0;				\
  for (p=p1+strlen(p1); p>p1; *(--s)=*(--p));	\
}

void process _VOID
/* processing */
{
#ifdef SKIP_COMMENTS
  int backslashflag;
#endif
  register int c,c1;
  byte *s,s0[PATH_MAX];
  int i, j, charcount, lastchar;
#ifdef CLOSECC_AT_PAR
  int newline_count=0;
#endif
  int hanzimode=0;
  int breakable=1;	/* indicate if we can break line after last char */
  int glue_flag=0;	/* indicate if we should add \CCA before curr. char */
  int userfontflag=0;	/* indicates if user-defined char */

#ifdef SKIP_COMMENTS
  backslashflag=0;
#endif

  lastchar = charcount = 0;

  while ((c=readchar())!=EOF) {
    s=s0+128;
    if (c=='\t') c=' ';
#ifndef UNIX
    if (c == 13) continue;	/* ignore CR in DOS */
#endif

    /* Added on Apr. 18 1998: work-arround for a LaTeX2e bug which
       causes the parindent after the following pattern to be removed:
       	\begin{list env}
       	   ... ...
       	\end{list env}
       	... ...
       	CCCCCCCCCC

       	... ...
       (where 'CCCC' represent Chinese chars) */
#ifdef CLOSECC_AT_PAR
    i=c;
    if (isspace(i) && i!=EOL && !charcount && hanzimode) continue;

    if (i!=EOL && newline_count) {
      if (newline_count>=2 && hanzimode && !charcount) {
        hanzimode=0;
        fputc('}',f_out);	/* Endgroup char */
      }

      while (newline_count) {fputc('\n', f_out); newline_count--;}
      lastchar = 0;
    }
#endif

    if (c>=32 && c<=127) {		/* ASCII char */
      if (c=='#') {	/* Check if user defined character */
        if ((c1=readchar())!='[') {unreadchar(c1); goto ASCII_char;}
        s[0]=c1; j=1; i=0;
        do {
          s[j++]=c1=readchar();
          if (j>=100 || c1<'0' || c1>'9') break;
          i=10*i+(c1-'0');
        } while (1);
        if (j<3 || c1!=']') {
     Push_s:
          while (--j>=0) unreadchar(s[j]);
          goto ASCII_char;
        }
        if (i>=3760 || i<0) {
          fprintf(stderr,"Warning: invalid user-char selection (line %d).\n",inlineno);
          goto Push_s;
        }
        userfontflag=1;
#ifdef SKIP_COMMENTS
        backslashflag=0;
#endif
	/* User defined characters are always mapped to GB2312-1 */
        unreadchar((i%94)+161); unreadchar((i/94)+176);
        continue;
      }
   ASCII_char:
      if (hanzimode && ascii_info[c].trans_to_cc) {
        /* Transforms BiaoDian to corresponding Chinese one */
        unreadchar(c-'!'+161); unreadchar(163);
#ifdef SKIP_COMMENTS
        backslashflag=0;
#endif
        continue;
      }
      if (c!=' ' && hanzimode) {	/* switch to ASCII mode */
	hanzimode=0;
	fputc('}',f_out);	/* Endgroup char */
	charcount++;
	if (c != '~' && ascii_info[c].break_before) {
	  fputc(breakable ? ' ' : '~', f_out);
	  charcount++;
	}
      }
      if (glue_flag) {			/* Add CC->ASCII glue */
        if (breakable && ascii_info[c].break_before) {
          fputs("\\CCA ",f_out); charcount+=5;
        } else {
          fputs("\\CCAS ",f_out); charcount+=6;
        }
      }
      /* copy the character to output file */
      fputc(c,f_out); charcount++;
#ifdef SKIP_COMMENTS
      /* Test if '%' encountered */
      if (c=='%' && !backslashflag) {	/* Copy comments to output file */
        while ((c1=readchar())!=EOL) {
          if (c1==EOF) break;
#ifndef UNIX
	  if (c1 == 13) continue;
#endif
          fputc(c1,f_out);
        }
#  ifdef CLOSECC_AT_PAR
        newline_count++;
#  else
        fputc('\n',f_out);
#  endif
        charcount=0;
      }
#endif
      breakable=ascii_info[c].break_after;
      glue_flag=0;
#ifdef SKIP_COMMENTS
      backslashflag=(c=='\\');
#endif
      lastchar = c;
    } else if (c>=128) {		/* Chinese character */
      struct CHAR_INFO ci;

#ifdef SKIP_COMMENTS
      backslashflag=0;
#endif
#ifndef GBK
      if (c>254 || c<161) {		/* Invalid code */
#else
      if (c>254 || c<129) {		/* Invalid code */
#endif
        fprintf(stderr,"Warning: Invalid character ignored (line %d).\n",inlineno);
        continue;
      }
      c1=readchar();
#ifndef GBK
      if (c1<161 || c1>254) {
#else
      if (c1<64 || c1>254) {
#endif
        fprintf(stderr,"Warning: incomplete Chinese character ignored (line %d).\n",
               inlineno);
        continue;
      }
      ci=cc_info[CHARNO(c,c1)];

      if (!hanzimode) {		/* switch into HanZi Mode */
        if (breakable && lastchar!='~')
	  strcpy(s, ci.break_before ? " {" : "~{");
        else
	  strcpy(s,"{");
        if (!userfontflag) strcat(s,"\\CC ");
      } else {			/* we are already in HanZi Mode */
        if (ci.break_before && breakable) {
          i=seekeoln();
          if (charcount<254 || i) strcpy(s," "); else {
            /* insert a newline char */
#ifdef CLOSECC_AT_PAR
            newline_count++;
#else
            fputc('\n',f_out);
#endif
            charcount=0;
            if (map_flag) fprintf(f_map," %d",++outlineno);
            s[0]='\0';
          }
        } else strcpy(s,"~");
      }

      if (userfontflag) strcat(s,"{\\YH ");
      c=(c+128)&255; c1=(c1+128)&255;
#if !ExASCII
      {
        int k;
        if (ascii_info[c].tex_reserved) sprintf(s+strlen(s),"{\\char%d}",c);
        else {k=strlen(s); s[k++]=c; s[k]='\0';}

        if (ascii_info[c1].tex_reserved) sprintf(s+strlen(s),"{\\char%d}",c1);
        else {k=strlen(s); s[k++]=c1; s[k]='\0';}
      }
#else
      {
        int k;
        k=strlen(s); s[k++]=c; s[k++]=c1; s[k]='\0';
      }
#endif
      if (userfontflag) {
        strcat(s,"}\\CC{}"); userfontflag=0;
      }
/*------------------ Adjust spacing of special biaodians -----------------*/
      if (glue_flag) {
        if (breakable && ci.break_before) ADD("\\CCA ") else ADD("\\CCAS ")
      }
      if (!ci.break_after && ci.glue_before) {
        if (breakable) ADD("\\CCB ") else ADD("\\CCBS ")
      }
      glue_flag=(!ci.break_before && ci.glue_after);
/*------------------------------------------------------------------------*/
      breakable=ci.break_after;
      fputs(s,f_out); charcount+=strlen(s);
      hanzimode=1;
      lastchar = 0;
    } else if (c==EOL) {		/* new line */
      if (glue_flag) {
        fputs(breakable ? "\\CCA":"\\CCAS",f_out);
        glue_flag=0;
      }

      charcount=0;

      /* Added on Apr. 18 1998 */
#ifdef CLOSECC_AT_PAR
      newline_count++;
#else
      fputc('\n',f_out);
#endif

#ifdef SKIP_COMMENTS
      backslashflag=0;
#endif
      breakable=1;
      lastchar=0;
    }
  }

#ifdef CLOSECC_AT_PAR
  while (newline_count) {fputc('\n', f_out); newline_count--;}
#endif

  if (hanzimode) fprintf(f_out,"}%c\n",breakable ? ' ':'~');
#if 0 /*ndef UNIX*/
  fputc(0x1a,f_out);
#endif
  if (map_flag) fputc('\n',f_map);
}

void init _P2(int,argc, byte **,argv)
/* init. variables and process command-line arguments */
{
  register int i,n,l;
# define BUFFLEN    2048
  byte s[PATH_MAX], ch, work[BUFFLEN+1], s_ini[PATH_MAX], *op_line;
  struct CHAR_INFO ci;
  /* list of characters which should not appear at bol.
     and flags indicating if a special glue should be added after them */
  byte *LBDSTR="ݣ";
  byte *LBDFLG="1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 0 1 1 ";
  /* list of characters which should not appear at eol.
     and flags indicating if a special glue should be added before them */
  byte *RBDSTR="ۣ";
  byte *RBDFLG="1 1 1 0 0 0 0 1 1 1 ";
  /* list of characters to be transformed to corresponding Chinese
     characters within HanZi mode */
  byte *BIAODIAN="!,.:;?";
  /* ASCII chars which should not appear at bol */
  byte *AscLBD="!'),.:;>?]}";
  /* ASCII chars which should not appear at eol */
  byte *AscRBD="(<[`{";
  /* TeX's reserved characters */
  byte *ComChars=" #$%&@\\^_{}~" /*"!;:."*/;

  /* initialization */
  s[0]='\0';
  ci.break_before=1;
  ci.break_after=1;
  ci.glue_before=0;
  ci.glue_after=0;
  ci.trans_to_cc=0;
  ci.tex_reserved=0;

  for (i=0;i<NumberOfCC;i++) cc_info[i]=ci;
  for (i=0;i<256;i++) {
    ascii_info[i]=ci;
    if (i<=32 || i>=128) ascii_info[i].tex_reserved=1;
  }

  for (i=0,n=strlen(LBDSTR);i<n;i+=2) {
    l=CHARNO(LBDSTR[i],LBDSTR[i+1]);
    cc_info[l].break_before=0;
    cc_info[l].glue_after=LBDFLG[i]-'0';
  }
  for (i=0,n=strlen(RBDSTR);i<n;i+=2) {
    l=CHARNO(RBDSTR[i],RBDSTR[i+1]);
    cc_info[l].break_after=0;
    cc_info[l].glue_before=RBDFLG[i]-'0';
  }
  for (i=0,n=strlen(BIAODIAN);i<n;i++) ascii_info[BIAODIAN[i]].trans_to_cc=1;
  for (i=0,n=strlen(AscLBD);i<n;i++) ascii_info[AscLBD[i]].break_before=0;
  for (i=0,n=strlen(AscRBD);i<n;i++) ascii_info[AscRBD[i]].break_after=0;
  for (i=0,n=strlen(ComChars);i<n;i++) ascii_info[ComChars[i]].tex_reserved=1;

  /* process command-line arguments */
  infilename[0]=outfilename[0]='\0';
  map_flag=0;
  /* get initialization file name */
  strcpy(s_ini, fullpath(argv[0]));
#ifndef UNIX
  i = strlen(s_ini) - 1;
  while (i > 0 && s_ini[i] != '.' && s_ini[i] != '\\'
	        && s_ini[i] != '/' && s_ini[i] != ':') i--;
  ((s_ini[i] == '.') ? strcpy : strcat)(s_ini + i, ".ini");
#else
  strcat(s_ini,".ini");
#endif
  f_ini=fopen(s_ini,"rt");
#ifdef DEBUG
  if (f_ini!=NULL)
    fprintf(stderr, "Processing options in \"%s\"...\n", s_ini);
#endif
  op_line=NULL;
  for (i=1;i<argc || f_ini!=NULL;) {
    byte *p;

    if (f_ini!=NULL) {
      if (op_line==NULL) {
        if ((op_line=fgets(work,BUFFLEN,f_ini))==NULL) {
          fclose(f_ini); f_ini=NULL;
          continue;
        }
        /* check if line begins with '%' */
        if (op_line[0]=='%') {	/* comment line */
          op_line=NULL;
          continue;
        }
      }
      /* get next string from op_line */
      while (isspace(*op_line)) op_line++;
      if (!*op_line) {
        op_line=NULL;
        continue;
      }
      p=s;
      while (!isspace(*op_line) && *op_line && p-s<BUFFLEN-1)
        *(p++)=*(op_line++); *p='\0';
    } else {
      strcpy(s,argv[i]);
      i++;
    }
    p=s;
    /* accept "-" alone as stdin/stdout */
    if (*p!='-' || p[1] == '\0') {
      if (!infilename[0]) strcpy(infilename,s);
      else if (!outfilename[0]) strcpy(outfilename,s);
      continue;
    }
    ch=toupper(p[1]);
    switch (ch) {
      case 'Q' :
	verbose = 0;
	break;
      case 'M' :
        map_flag=1;
        break;
      case 'S' :
        n=atoi(p+2);
        if (n<0 || n>127) goto opt_err;
        ascii_info[n].tex_reserved=1;
        break;
      case '?':
      case 'H':
        goto help;
      default  :
      opt_err:
      	sprintf(work,"Invalid option \"%s\" ",s);
      	if (f_ini!=NULL) {
          fclose(f_ini); f_ini=NULL;
          sprintf(s,"in the initialization file \"%s\".",s_ini);
          strcat(work,s);
        } else strcat(work,"in the command line.");
        fprintf(stderr,"%s\n",work);
      help:
        fprintf(stderr,"Usage:\n");
        fprintf(stderr,"  cct [options] [inputfile[.ctx] [options] [outputfile[.tex]]] [options]\n");
        fprintf(stderr,"Valid options are:\n");
        fprintf(stderr,"  -q   \tRun quietly.\n");
        fprintf(stderr,"  -m   \tGenerate map file.\n");
        fprintf(stderr,"  -s#  \tConvert the code '#' (decimal) in a Chinese char into '\\char#'\n");
        fprintf(stderr,"       \t(i.e., treat it as a special char as '\\', '$', etc.).\n");
        fprintf(stderr,"       \tMultiple '-S#' options may be present.\n");
        fprintf(stderr,"  -?,-h\tDisplay this message.\n");
        fprintf(stderr,"The program also processes the file \"cct.ini\" as other programs of CCT do.\n");
        fprintf(stderr,"'-' may be used at the place of a filename which stands for stdin or stdout.\n");
        closefiles();
        exit(1);
    }
  }

  if (!infilename[0]) {
    fprintf(stderr,"Name of input file ? ");
    fgets(infilename, sizeof(infilename), stdin);
    fprintf(stderr,"Name of output file ? ");
    fgets(outfilename, sizeof(outfilename), stdin);
    /* by hooklee: remove '\n' or '\r' at EOL */
    i = strlen(infilename) - 1;
    if (infilename[i] == '\n' || infilename[i] == '\r') infilename[i] = '\0';
  }

  /* by hooklee: remove '\n' or '\r' at EOL */
  i = strlen(outfilename) - 1;
  if (outfilename[i] == '\n' || outfilename[i] == '\r') outfilename[i] = '\0';

  if (infilename[0] != '-' && testfileext(infilename)==-1)
    strcat(infilename,".ctx");

  if (!outfilename[0]) {
    l=testfileext(infilename);
    strcpy(outfilename,infilename);
    if (outfilename[0] != '-') strcpy(outfilename+l,".tex");
  }
  if (outfilename[0] != '-') {
    if (testfileext(outfilename)==-1) strcat(outfilename,".tex");
    l=testfileext(outfilename);
    strcpy(mapfilename,outfilename);
    strcpy(mapfilename+l,".map");
  } else {
    mapfilename[0] = '\0';
    map_flag = 0;
  }
#ifndef UNIX
  strlwr(infilename); strlwr(outfilename); strlwr(mapfilename);
#endif

  if (infilename[0] != '-' && !strcmp(infilename,outfilename))
    ErrExit("Input file name and Output file name are the same");

}

int main _P2(int,argc, char **,argv)
{
#ifdef GBK
#  if !defined(UNIX) && !defined(GO32) && !defined(WIN32)
     cc_info=farmalloc(NumberOfCC*sizeof(*cc_info));
#  else
     cc_info=malloc(NumberOfCC*sizeof(*cc_info));
#  endif
  if (cc_info==NULL) ErrExit("memory allocation error");
#endif

  init(argc, (byte **)argv);
  if (verbose)
#ifndef GBK
    fprintf(stderr,"CCT - Preprocessor V"CCT_VERSION", February 2003.\n");
#else
    fprintf(stderr,"CCT - Preprocessor V"CCT_VERSION" (GBK), February 2003.\n");
#endif

  openfiles();
  process();
  closefiles();
  if (verbose) fprintf(stderr,"Line %d\nDone.\n",inlineno);
  return(0);
}
