/* Transform addr.txt to addr.ctx. The characters are sorted according to their
   pinyin initials.

   Format of input file:

   Each line contains two fields seperated by <tab> (the second field
   can contain tab's). If the first field in an input line is empty,
   then it continues the last line.

   Lines beginning with '%' are ignored.

$Id: addr2tex.c,v 1.3 2005/06/28 13:53:00 zlb Exp $ */

typedef unsigned char byte;

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>

#if !defined(WIN32) && !defined(GO32)
#  define stricmp	strcasecmp
#endif

#include "url.h"

#define COMPILE_DATA

#define NbrOfCC_GBK	((255 - 129) * (255 - 64 - 1))

#ifdef COMPILE_DATA
  extern int NbrOfCC;
  extern byte *pytable[], *head;
#else
  int NbrOfCC_GBK = NbrOfCC_GBK;
  static byte *pytable[NbrOfCC_GBK];
#endif

typedef struct {
  byte *name;
  byte *addr;
  byte *key;
} Entry;

static Entry *entries = NULL;
static size_t entries_used = 0, entries_allocated = 0;

static void getkey(Entry *entry);
static int cccomp(Entry *s1, Entry *s2);

static byte buffer[8192];

int main(int argc, char *argv[])
{
  register int i, lineno;
  FILE *f_in = NULL, *f_out = NULL;
  static byte s[8192];
  byte *p, *q;
  byte label[1024];
  Entry *current;
  int dvipdfmx = 0;

  if (argc > 1  && !strcmp(argv[1], "-pdf")) {
    dvipdfmx = 1;
    argc--; argv++;
  }

  if (argc != 2 && argc != 3) {
usage:
    fprintf(stderr,"Usage: %s [-pdf] inputfile [outputfile]\n", argv[0]);
    fprintf(stderr,"(use '-pdf' option if you want to run dvipdfmx later)\n");
    return 1;
  }

#ifdef COMPILE_DATA
  assert(NbrOfCC == NbrOfCC_GBK);
#else
  fprintf(stderr,"Reading pinyin table...\n");
  if ((f_in=fopen("addr2tex.py","rt"))==NULL) {
    fprintf(stderr,"Can't open file \"addr2tex.py\"!\n");
    return 2;
  }
  for (i=0; i<NbrOfCC; i++) {
    fgets(s, sizeof(s), f_in);
    if (strlen(s)<3) strcpy(s,"   zzzzzzz");
    if ((pytable[i]=malloc(strlen(s)-2))==NULL) goto mem_err;
    strcpy(pytable[i],s+3);
  }
  fclose(f_in);
#endif

  if (argc == 3) {
    strcpy(s, argv[2]);
  } else {
    strcpy(s, argv[1]);
    p = s + strlen(s) - 1;
#ifdef WIN32
    while (p >= s && *p != '.' && *p != '/' && *p != '\\' && *p != ':') p--;
#else
    while (p >= s && *p != '.' && *p != '/') p--;
#endif
    if (p < s || *p != '.') p = s + strlen(s);
    strcpy(p, ".tex");
  }

  fprintf(stderr, "Input file:  %s\nOutput file: %s\n", argv[1], s);
  if ((f_in=fopen(argv[1],"rt"))==NULL) {
    fprintf(stderr, "Cannot open input file \"%s\".\n", argv[1]);
    goto usage;
  }

  if ((f_out=fopen(s, "rt")) != NULL) {
    fclose(f_out);
    fprintf(stderr, "File \"%s\" exists, overwrite it (y/n) ? ", s);
    if (scanf("%s", buffer) != 1 ||
	(stricmp(buffer, "y") && stricmp(buffer, "yes"))) {
      fprintf(stderr, "Abort.\n");
      fclose(f_in);
      exit(1);
    }
  }
  if ((f_out=fopen(s, "w+t")) == NULL) {
    fprintf(stderr, "Cannot open output file \"%s\".\n", s);
    exit(1);
  }

  current=NULL;
  fprintf(stderr,"Reading input file...\n");
  lineno=0;
  while (fgets(s, sizeof(s), f_in)!=NULL) {
    lineno++;
    p = s;
    while (isspace(*p)) p++;
    if (*p=='%') continue;
    p=s+strlen(s)-1;
    while (isspace(*p) && p>s) p--;
    if (p==s) continue;
    *(++p)='\0';
    /* Split: first field=name, second field=address, seperator=<tab> */
    p=strchr(s, '\t');
    if (p==NULL) {
      // fprintf(stderr, "warning: line %d ignored (no <tab>).\n", lineno);
      // continue;
      p="";
    }
    else {
      *p='\0'; p++;
    }
    q=s; while (isspace(*q)) q++;
    if (*q=='\0') {
      /* append addr to current entry */
      if (current==NULL) {
        fprintf(stderr, "warning: line %d ignored (no previous line).\n", lineno);
        continue;
      }
      current->addr=realloc(current->addr, strlen(current->addr)+strlen(p)+2);
      if (current->addr==NULL) goto mem_err;
      strcat(current->addr, "\n");
      strcat(current->addr, p);
      continue;
    }
    if (entries_used >= entries_allocated) {
      entries = realloc(entries, (entries_allocated+=1024) * sizeof(*entries));
      if (entries == NULL) {
        fprintf(stderr,"\nWarning: memory allocation failed.\n");
        break;
      }
    }
    current=entries+(entries_used++);
    current->name=strdup(q);
    current->addr=strdup(p);
    if (current->name==NULL || current->addr==NULL) goto mem_err;
    getkey(current);
  }
  fclose(f_in);
  f_in = NULL;

  fprintf(stderr,"Sorting...");
  qsort(entries, entries_used, sizeof(Entry), (void *)cccomp);
  fprintf(stderr,"Done.\n");

  /* output results */
  if (dvipdfmx) fprintf(f_out, "\\let \\dvipdfmx 1\n");
  fprintf(f_out,
	"\\documentclass[CJK]{cctart}\n"
	"\\openin0 addr2tex.sty\n"
	"\\ifeof0 \\let\\LoadUserFile0\\else \\let\\LoadUserFile1\\fi\n"
	"\\closein0 \n"
	"\\if\\LoadUserFile1\n"
	"  \\usepackage{addr2tex}\n"
	"\\else\n"
	"  \\makeatletter\n"
	);
  fprintf(f_out, "%s", head);
  fprintf(f_out,
	"  \\makeatother\n"
	"\\fi\n"
	"\\begin{document}\n\n"
	);
  label[0]='\0';
  q="";
  for (i=0; i < entries_used; i++) {
    int j;
    if (!strcmp(q, entries[i].name)) {
      fprintf(stderr, "Duplicate name: %s\n", q);
    }
    if (q[0] != '\0' && (q[0] != entries[i].name[0] ||
	(q[1] > 128 && q[1] != entries[i].name[1])))
      fprintf(f_out, "\\colorswap\n");
    q=entries[i].name;
    if (entries[i].key[0] != '@') {
      j=strncmp(label, entries[i].key, 1);
      strncpy(label, entries[i].key, 1);
      label[1]='\0';
      if (j) fprintf(f_out, "\\startletter{%s}\n", label);
    } else {
      j=strcmp(label, entries[i].key);
      strcpy(label, entries[i].key);
      if (j) fprintf(f_out, "\\startsection{%s}\n", label + 1);
    }
    /* insert a fullsize space for two character names */
    strcpy(buffer, entries[i].name);
    if (strlen(buffer) == 4 && buffer[0] > 128 && buffer[2] > 128) {
	buffer[4] = buffer[2];
	buffer[5] = buffer[3];
	buffer[6] = '\0';
	memcpy(buffer + 2, "", 2);
    }
    fprintf(f_out, "\\entry{%s}{", buffer);
    /* process URLs */
    strncpy(p = buffer, entries[i].addr, sizeof(buffer));
    parse_urls(buffer, sizeof(buffer));
    while (*p) {
      if (*p < 129) {
	  fputc(*p++, f_out);
      } else {
	  /* convert '' ==> ': ', '' ==> '; ' */
	  if (!memcmp(p, "", 2)) fprintf(f_out, "; ");
	  else if (!memcmp(p, "", 2)) fprintf(f_out, ": ");
#if 0
	  else if (!memcmp(p, "", 2)) fprintf(f_out, " (");
	  else if (!memcmp(p, "", 2)) fprintf(f_out, ") ");
#endif
	  else fprintf(f_out, "%c%c", *p, *(p+1));
	  p += 2;
      }
    }
    fprintf(f_out, "}\n");
  }
  fprintf(f_out, "\n\\end{document}\n");

  fclose(f_out);

  fprintf(stderr, "%d entries processed.\n", entries_used);

  return 0;

mem_err:
  fprintf(stderr,"\nMemory allocation error!\n");
  if (f_in != NULL) fclose(f_in);
  if (f_out != NULL) fclose(f_out);
  return 3;
}

static void getkey(Entry *entry)
{
  byte key[1024], tmp[20];
  register byte *q=entry->name, *p=key;
  int code;

  if ((p = strchr(q, '@')) != NULL) {
	entry->key = strdup(p);
	*p = '\0';
	return;
  }

  p = key;
  while (*q) {
    if (isspace(*q)) {q++; continue;}
    if (*q<129) *(p++)=*(q++); else {
      if (q[1]=='\0') break;
      code = (q[0] - 129) * 190 + (q[1] - 64);
      if (q[1] == 127) code = -1; else if (q[1] > 127) code--;
      q+=2;
      if (code>=0 && code<NbrOfCC) {
        sscanf(pytable[code],"%s",tmp);
	/* copy pinyin, terminated by ' ' */
	sprintf(p,"%s ", tmp); p+=strlen(p);
      } else if ( !code ) {
        strcat(p, "! "); p+=strlen(p);
      }
    }
  }
  *p='\0';

  entry->key=strdup(key);

  for (p=entry->key; *p; p++) *p=toupper(*p);

  return;
}

static int cccomp(Entry *s1, Entry *s2)
/* used by qsort */
{
  byte *k1, *k2, *p1, *p2, *n1, *n2;
  int i;

  if (s1->key[0] == '@' || s2->key[0] == '@') {
    if (s1->key[0] != '@') return 1;
    if (s2->key[0] != '@') return -1;
    return strcmp(s1->key+ 1, s2->key + 1);
  }

  k1=strdup(s1->key);
  k2=strdup(s2->key);
  if ( k1==NULL || k2==NULL ) {
    fprintf(stderr,"\nMemory allocation error!\n");
    exit(1);
  }
  n1=s1->name; n2=s2->name;

  while ( 1 ) {
    if ( *k1=='\0' && *k2=='\0' ) return 0;
    if ( *k1=='\0' ) return 1;
    if ( *k2=='\0' ) return -1;
    p1=strchr(k1, ' '); if ( p1!=NULL) *(p1++)='\0';
    p2=strchr(k2, ' '); if ( p2!=NULL) *(p2++)='\0';

    i=strcmp(k1, k2);
    if ( i ) return i;
    i = *n1; if ( *n1 ) {i += (*(++n1))*256; if (*n1) ++n1;}
    i -= *n2; if ( *n2 ) {i -= (*(++n2))*256; if (*n2) ++n2;}
    if ( i ) return i;
    k1=p1; k2=p2;
  }
}
