/* $Id: cctconv.c,v 1.3 2004/11/26 11:41:32 zlb Exp $ */

/* Converts '{', '{', '\', '^',  '_', and '~' in the second byte of GBK chars.
 *
 * Note:
 * 	'^' (0x5e) might be followed by, say,  '^^M', which may confuse TeX.
 * 	'_' and '~' seem to cause trouble in the .out file.
 *
 * 'cctconv.tex' is a sample test file. */

#if defined(WIN32) && !defined(useCJKform)
#  define useCJKform	/* WIN32 guys don't like to use '-Dxxxx' */
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

static FILE *fin, *fout;
static int force = 0;	/* true ==> force transforming all chars to '^^xx' */

#define PUTCHAR(c)	fputc(c, fout)

int main(int argc, char *argv[])
{
    int ch1, ch2, i;
    char *infile=NULL, *outfile=NULL;

    for (i = 1; i < argc; i++) {
	if (argv[i][0] == '-' && argv[i][1] != '\0') {
	    if (toupper(argv[i][1]) != 'F' || argv[i][2] != '\0') {
	usage:
		fprintf(stderr, "Usage: %s [-f] [inputfile|-] [outputfile|-]\n",
				argv[0]);
		fprintf(stderr, "('-f' forces transforming all GBK chars to"
			        " '^^xx' form)\n");
		exit(1);
	    }
	    force = 1;
	    continue;
	}
	if (infile == NULL) infile = argv[i];
	else if (outfile == NULL) outfile = argv[i];
	else goto usage;
    }

    fin = (infile == NULL || !strcmp(infile, "-")) ? 
	    stdin : fopen(infile, "rt");
    if (fin == NULL) {
	fprintf(stderr, "Error: cannot open input file \"%s\".\n", infile);
	exit(2);
    }

    fout = (outfile == NULL || !strcmp(outfile, "-")) ? 
	    stdout : fopen(outfile, "w+t");
    if (fout == NULL) {
	fprintf(stderr, "Error: cannot open output file \"%s\".\n", outfile);
	exit(3);
    }

    while (1) {
	if ((ch1 = fgetc(fin)) == EOF) break;
	ch1 &= 255;

	if (ch1 < 0x7f) {
	    PUTCHAR(ch1);
	    continue;
	}

	if (ch1 == 0x80 || ch1 == 0xff) {
	    fprintf(stderr, "Warning: invalid code ignored.\n");
	    continue;
	}

	if (ch1 == 0x7f) {
#if 1
	    PUTCHAR(ch1);
	    continue;
#else
	    if ((ch1 = fgetc(fin)) == EOF || (ch2 = fgetc(fin)) == EOF)
		goto error2;
	    ch1 &= 255;
	    ch2 &= 255;
    	    if (ch2 != 0x7f) {
	error1:
		fprintf(stderr, "Warning: invalid code ignored.\n");
		continue;
	    }
	    i = 0;
	    while (1) {
		if ((ch2 = fgetc(fin)) == EOF) {
		    fprintf(stderr, "Warning: incomplete GBK char at "
				    "end of file.\n");
		    break;
		}
		ch2 &= 255;
		if (ch2 == 0x7f) break;
		if (ch2 < '0' || ch2 > '9') goto error1;
		i = 10 * i + ch2 - '0'; 
	    }
	    ch2 = i;
#endif
	}
	else ch2 = fgetc(fin);

	if (ch2 == EOF) {
	    fprintf(stderr, "Warning: incomplete GBK char at end of file.\n");
	    break;
	}
	ch2 &= 255;

	if ((ch2 < 0x40 && (ch2 > 0x35 || ch2 < 0x30))
			|| ch2 > 0xfe || ch2 == 0x7f) {
	    fprintf(stderr, "Warning: invalid GBK char (0x%x 0x%x) ignored.\n",
			    ch1, ch2);
	    continue;
	}

	switch (ch2) {
#ifndef useCJKform
	    case '\\':		/* '\' ==> '0' (0x30) */
		force ? fprintf(fout, "^^%02x0", ch1) :
			fprintf(fout, "%c0", ch1);
		break;
	    case '{':		/* '{' ==> '1' (0x31) */
		force ? fprintf(fout, "^^%02x1", ch1) :
			fprintf(fout, "%c1", ch1);
		break;
	    case '}':		/* '}' ==> '2' (0x32) */
		force ? fprintf(fout, "^^%02x2", ch1) :
			fprintf(fout, "%c2", ch1);
		break;
	    case '^':		/* '^' ==> '3' (0x33) */
		force ? fprintf(fout, "^^%02x3", ch1) :
			fprintf(fout, "%c3", ch1);
		break;
	    case '_':		/* '_' ==> '4' (0x34) */
		force ? fprintf(fout, "^^%02x4", ch1) :
			fprintf(fout, "%c4", ch1);
		break;
	    case '~':		/* '~' ==> '5' (0x35) */
		force ? fprintf(fout, "^^%02x5", ch1) :
			fprintf(fout, "%c5", ch1);
		break;
	    case 0x80:		/* 0x80 ==> '6' (0x36) */
		force ? fprintf(fout, "^^%02x6", ch1) :
			fprintf(fout, "%c6", ch1);
		break;
	    case '|':		/* '|' ==> '7' (0x37) */
		force ? fprintf(fout, "^^%02x7", ch1) :
			fprintf(fout, "%c7", ch1);
		break;
#else
	    case '\\': case '{': case '}': case '^': case '_': case '~':
	    case 0x80: case '|':
		/* for pdflatex + hyperref with unicode option */
		fprintf(fout, "%c%c%c%d%c", 0x7f, ch1, 0x7f, ch2, 0x7f);
		break;
#endif
	    default:
		if (force) {
		    fprintf(fout, "^^%02x^^%02x", ch1, ch2);
		}
		else {
		    PUTCHAR(ch1);
		    PUTCHAR(ch2);
		}
	}
    }

    if (fin != stdin && fin != NULL) fclose(fin);
    if (fout != NULL) {
	if (fout != stdout) fclose(fout); else fflush(fout);
    }

    exit(0);
}
