/* $Id: gbk2uni-cxterm.c,v 1.2 2004/11/26 11:41:33 zlb Exp $
 *
 * Transforms GBK chars in the .out file generated by hyperref to unicode.
 *
 * Author: cxterm
 * Jan 25 2003.  */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gbk2uni.h"

#define GBK(a)  ((0x81<=(a)) && ((a)<=0xfe))
/*
16 KB will be big enough.
*/
#define RDBUF 16384

static int w_bk_relax=0;
static FILE	*Fout;
static FILE	*Fin;
static char	inname[256];
static char	outname[256];

static void usage(void)
{
  printf("Usage 1 : gbk2uni filename.out\n");
  printf("Usage 2 : gbk2uni -u filename.out\n"
         "\t Use -u option, filename.out can not be overwrited in next latex run\n");
}

/*
write unicode into the file Fout
*/
static void putucode(unsigned int u)
{
  unsigned int a, b;

  a = u & 0xff;
  b = (u >> 8) & 0xff;

  fprintf(Fout,"\\%03o\\%03o", b, a);
}

static unsigned char GetBookmarkChar(unsigned char **q)
/* return next char pointed to by *q, taking into account the
 * ^^xx form, but not the \ooo form */
{
    unsigned char c;

    if (**q == '\0') return '\0';
    c = *((*q)++);
    if (c=='^' && **q == '^' && *((*q) + 1) != '\0'
		    && ((*((*q) + 1) >= '0' && *((*q) + 1) <= '9') ||
			(*((*q) + 1) >= 'a' && *((*q) + 1) <= 'f'))
		    && ((*((*q) + 2) >= '0' && *((*q) + 2) <= '9') ||
			(*((*q) + 2) >= 'a' && *((*q) + 2) <= 'f'))) {
	(*q)++;
	c = 16 * (**q > '9' ? **q - 'a' + 10 : **q - '0') +
	    (*(*(q) + 1) > '9' ? *((*q) + 1) - 'a' + 10 : *((*q) + 1) - '0');
	*q += 2;
    }
#if 0
    else if (c == '\\') {
	if (**q <='7' && **q >= '0') {
	    c = 0;
	    while (**q <='7' && **q >= '0')
		c = 8 * c + *((*q)++) - '0';
	}
	else c = *((*q)++);
    }
#endif
    return c;
}

/*
{string} or {}
 ^^^^^^^     ^
*/
static void dopar(unsigned char **q)
{
  unsigned int a, b, u;

  if (**q != '}' && **q) {
    putucode(0xfeff);
  }

  if (!strncmp(*q, "\\376\\377", 8)) {
    fprintf(stderr, "This file has already been processed by gbk2uni.\n");
    fclose(Fout);
    fclose(Fin);
    remove(outname);
    exit(0);
  }

  while(**q != '}' && **q) {
    a = GetBookmarkChar(q);
    if(GBK(a) && **q) {
       b = GetBookmarkChar(q);
       /* CCT hook */
       switch (b) {
	  case '0': b = '\\'; break;
	  case '1': b = '{';  break;
	  case '2': b = '}';  break;
	  case '3': b = '^';  break;
	  case '4': b = '_';  break;
	  case '5': b = '~';  break;
	  case '6': b = 0x80;  break;
	  case '7': b = '|';  break;
       }
       /* sanity check to avoid buffer overflow */
       if (b < 0x40 || b > 0xfe) {
	  fprintf(stderr, "Invalid character ^^%02x^^%02x ignored.\n", a, b);
	  continue;
       }
       u = gbk2uni[(a-0x81)*192 + (b-0x40)];
    }
    else
      u = a;
    putucode(u);
  }
  putc('}', Fout);
  *q += 1;
}

int main(int argc, char **argv)
{
  unsigned char *q;
  int           len;
  char          *p;
  unsigned char *b_in;

#ifdef WIN32
  strcpy(argv[0], "out2uni");
#endif

  if((argc == 1) || ((argc == 2) && !strncmp(argv[1], "-h", 2))) {
    usage();
    return 0;
  }
  else if((argc == 3) && !strncmp(argv[1], "-u", 2)) {
    w_bk_relax = 1;
    strcpy(inname, argv[2]);
  }
  else if((argc == 2) && strncmp(argv[1], "-h", 2)) {
    strcpy(inname, argv[1]);
  }
  else {
    usage();
    return 1;
  }

  b_in = (unsigned char *)malloc(RDBUF);
  if(!b_in) {
    fprintf(stderr, "Memory allocation error.\n");
    exit (2);
  }

  p = strrchr(inname, '.');
#ifdef WIN32
  if((p == NULL) || stricmp(p, ".out"))
#else
  if((p == NULL) || strcmp(p, ".out"))
#endif
  strcat(inname, ".out");
  strcpy(outname, inname);
  strcat(outname, ".tmp");
  Fin = fopen(inname, "r");
  if(!Fin) {
    fprintf(stderr, "Cannot open %s to read.\n", inname);
    exit(1);
  }
  Fout = fopen(outname, "wb");
  if(!Fout) {
    fprintf(stderr, "Cannot open %s to write.\n", outname);
    exit(1);
  }

  if(w_bk_relax) {
    fprintf(Fout,"\\let\\WriteBookmarks\\relax\n");
  }

  while(fgets(b_in, RDBUF, Fin)) {
    if(*b_in == '\n') continue;
    len = strlen(b_in);
    if((b_in[len-2] == '\\') && (b_in[len-1] == '\n')) {
      do {
	b_in[len-2] = b_in[len-1] = '\0';
	if(!fgets(b_in + len - 2, RDBUF - len + 2, Fin)) {
	  fprintf(stderr, "Syntax error.\n");
	  exit (3);
	}
	len = strlen(b_in);
      } while((b_in[len-2] == '\\') && (b_in[len-1] == '\n'));
    }

    q = b_in;
    while(*q == ' ' || *q == '\t') q++;
    if(!strncmp(q,"@def",4)) {
      while(*q && *q != '{')
	putc(*q++, Fout);
    }
    else {
      /* skip to second '{}' pair. (\BOOKMARK[][]{}{}{})
       * Note ZLB: can there be nested '{}' pairs in the .out file? */
      if(*q) putc(*q++, Fout);
      while(*q) {
	if(*q == '}' && *(q-1) != '\\') {
	  putc(*q++, Fout);
	  break;
	}
	putc(*q++, Fout);
      }
      while(*q == ' ' || *q == '\t') q++;
    }
    if(*q) putc(*q++, Fout);
    if(*q) dopar(&q);
    while(*q)
      putc(*q++, Fout);
  }
  fclose(Fin);
  fclose(Fout);
  remove(inname);
  rename(outname,inname);
  return 0;
}

