/* $Id: ctexscan.c,v 1.4 2005/03/21 02:03:24 zlb Exp $
 *
 * Generates list of input files for \input, \include commands, and copy
 * the file contents to stdout.
 *
 * In this implementation no white space is allow within the
 * \input or \include commands 
 *
 * nsii:
 * \includegraphicsļеֲַ֧ƣҵǾò
 * ⣬ϾļҪиԼҼ򵥲һ£\includegraphicsļ
 * ֵ֧Ľ£вûԳֱָʹñ޷ͨ
 * \stringxʽҲ֧֣֪ûʾ
 *
 * 	DOS/Windowsֹʹõģ*?\|/":<>
 * 	texֱʹõģ!@^&'[]()
 * 	\stringʽֵ֧ģ~
 * 	dvi`
 * 	ûԳģֵ֧ܲģ#$%{}Ϳո */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <assert.h>
#ifndef WIN32
#  include <unistd.h>
#else
#  include <direct.h>
#endif

#include "common.h"

#ifdef USE_KPSE
#  include <kpathsea/c-auto.h>
#  include <kpathsea/kpathsea.h>

extern Boolean use_kpse;
#endif

#define LEADINGSIZE 16	/* keep at least this number of chars before
			   the current position */
#define BUFFERSIZE (48*1024)
static char buffer[BUFFERSIZE + 1], *ptr, *ptr_end;

static char fn[PATH_MAX];

typedef enum {FT_INCLUDE, FT_VERBATIM, FT_CHECK, FT_NOCHECK} Filetype;

/* list of commands of the form: \xxxxxxxx[options]{file}
 * Note: the star-form (if any) must comes before the non-star form. */
typedef struct {
    char *cmd;		/* command name */
    short opts;		/* number of optional arguments */
    /* data for extensions */
    char *ext;		/* default/required extention */
    Boolean force;	/* True  ==> always append the extesion to filename
			   False ==> append only if file is missing and
				     does not have given extension */
    Filetype type;	/* FT_INCLUDE:
			   	normal include file (check existence and scan)
			   FT_VERBATIM:
			   	copy the file verbatim (e.g., graphics files,
				check existence and copy to work directory
				if necessary, but don't scan)
			   FT_NOCHECK:
			 	transform filename, don't check existence and
			 	don't scan the file.
			   FT_CHECK:
			        transform filename if the file exists,
				but don't scan the file.
			 */
    char sep;		/* Seperator if multiple filenames */
} cmd_t;

static cmd_t cmdlist[] = {
/*  command		opt	ext	force	type		sep
 *  ----------------------------------------------------------------- */
    {"input",		0,	".tex",	False,	FT_INCLUDE,	'\0'},
    {"include",		0,	".tex",	True,	FT_INCLUDE,	'\0'},
    {"usepackage",	1,	".sty", True,	FT_INCLUDE,	','},
    {"RequirePackage",	1,	".sty", True,	FT_INCLUDE,	','},
    {"LoadClass",	1,	".cls", True,	FT_INCLUDE,	','},
    {"documentclass",	1,	".cls", True,	FT_INCLUDE,	'\0'},
    {"InputIfFileExists",0,	".tex", False,	FT_INCLUDE,	'\0'},
/*#ifdef USE_TEMPDIR*/
    {"verbatiminput*",	0,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"verbatiminput",	0,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"includegraphics*",2,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"includegraphics",	2,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"epsfbox",		0,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"epsffile",	0,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"psfig",		0,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"epsfig",		0,	NULL,	False,	FT_VERBATIM,	'\0'},
    {"placedrawing",	0,	NULL,	False,	FT_VERBATIM,	'\0'},
/*#else*/
    {"includeonly",	0,	".tex",	True,	FT_CHECK,	','},
    {"ProvidesClass",	0,	NULL,	False,	FT_NOCHECK,	'\0'},
    {"ProvidesPackage",	0,	NULL,	False,	FT_NOCHECK,	'\0'},
    {"ProvidesFile",	0,	NULL,	False,	FT_NOCHECK,	'\0'},
/*#endif*/
};
#define NCMDS	(sizeof(cmdlist)/sizeof(cmdlist[0]))

static FILE *fin, *flst;

static int
chkcmd(char **str, const char *cmd)
/* advance str to end of command if comparison successful */
{
    int len = strlen(cmd);

    if (!memcmp(*str, cmd, len) && !isalpha((*str)[len])) {
	(*str) += len;
	return 1;
    }
    else
	return 0;
}

static Boolean 
iscomment(char *p)
/* test if the buffer position pointed by 'p' is a comment character
   note: '\%' is not comment char, but '\\%' is. */
{
    int n = 0;

    if (*p != '%') return False;

    /* if '%' follows odd number of '\\'s, then it is not comment char */
    for (--p; p >= buffer && *p == '\\'; p--, n++);

    return (n&1) == 0;
}

static char *MatchParenthesis(char *str)
/* find matching parenthesis */
{
    int nest = 1;
    char c, left, right;

    left = *str;
    switch (left) {
	case '{' :	right = '}';	break;
	case '}' :	right = '{';	break;
	case '[' :	right = ']';	break;
	case ']' :	right = '[';	break;
	case '(' :	right = ')';	break;
	case ')' :	right = '(';	break;
	case '`' :	right = '\'';	break;
	case '\'' :	right = '`';	break;
	default: 	return NULL;
    }
    
    while ((c = *(++str)) != '\0') {
	if (iscomment(str)) {
	    while (str < ptr_end && *str != '\n') str++;
	    if (str >= ptr_end) return NULL;
	}
	if (c == left) nest++;
	else if (c == right) if (! --nest) return str;
    }
    return NULL;
}

static Boolean
check_file(char *fn, cmd_t *cmd, Boolean ctx_flag, FILE *fout)
/* Parse an input file. Returns True if the filename is processed 
 * (written to fout) by this function, False otherwise */
{
    struct stat buf;
    char fnout[PATH_MAX], *p, *q, c;
#ifdef USE_TEMPDIR
#ifdef UNIX
    char path_buffer[PATH_MAX];
#else
    char path_buffer[2*PATH_MAX];
#endif
#endif
    Boolean absolute_flag = False;

    if (fn[0] == '\0') return False;

#ifdef DEBUG
    fprintf(stderr, "check_file: fn=%s\n", fn);
#endif

    /* Sanity checks on the filename */
    if (strchr(fn, '`') != NULL || strchr(fn, '|') != NULL ||
	strchr(fn, '<') != NULL || strchr(fn, '>') != NULL)
	return False;

    /* Check for extension */
    if (cmd->ext != NULL) {
	if (cmd->force) {
	    /* always append the extension to filename
	     * (e.g., \include, \usepackage, etc.) */
	    strcat(fn, cmd->ext);
	}
	else {
	    /* append ext if file doesn't exist and doesn't have extension */
#ifdef USE_KPSE
	    if (kpse_find_file(fn, kpse_tex_format, 1) == NULL)
#else
	    if (stat(fn, &buf))
#endif
		if (*(p=get_file_ext(fn)) == '\0')
		    strcpy(p, cmd->ext);
	}
    }

    /* Strip off pathname from output filename */
    BASENAME(fn, p)
#ifdef USE_TEMPDIR
    strcpy(fnout, p);
#else
    if (cmd->type != FT_VERBATIM)
        sprintf(fnout, CTEX_TMP "%s", p);
    else
	strcpy(fnout, fn);
#endif

#if 0
    (void)ctx_flag;
    if (/*ctx_flag || stat(fn, &buf)*/ *(p=get_file_ext(fn)) != '\0') {
    	/* check if .ctx file exists */
	char *ext = strdup(p);
	strcpy(p, ".ctx");
	if (stat(fn, &buf)) strcpy(p, ext);
	free(ext);
    }
#else
    if (ctx_flag || stat(fn, &buf)) {
	/* if no extension or extension is .tex, check if .ctx file exists */
	if (*(p=get_file_ext(fn)) == '\0' || !fnstrcmp(p, ".tex")) {
	    strcpy(p, ".ctx");
	    if (stat(fn, &buf)) strcpy(p, ".tex");
	}
    }
#endif

#ifdef USE_KPSE
    if (use_kpse) {
	p = kpse_find_file(fn, kpse_tex_format, 1);
#ifdef DEBUG
	fprintf(stderr, "kpse: lookup file \"%s\": %s.\n", fn,
		p == NULL ? "(not found)" : p);
#endif
	if (p != NULL) strcpy(fn, p);
    }
#endif

    /* return silently if file not found. */
    if (stat(fn, &buf) && cmd->type != FT_NOCHECK)
	return False;

    /* Notes on graphics files (flag == FT_VERBATIM): if the filename contains:
     *	- no path	==> keep name, copy or link file to work directory,
     *	- absolute path	==> keep name, don't copy/link.
     *	- relative path	==> convert to absolute path, don't copy/link. */

    p = fnout;
#ifdef USE_TEMPDIR
    /* check for directory component in filename */
    if ((cmd->type == FT_VERBATIM) && (
#ifndef UNIX
	strchr(fn, ':') != NULL || strchr(fn, '\\') != NULL ||
#endif
	strchr(fn, '/') != NULL
    )) {
	/* convert to absolute pathname */
	absolute_flag = True;
#ifdef UNIX
	p = (*fn == '/') ? fn : realpath(fn, path_buffer);
	if (p == NULL) return False;
#else
	if (*fn == '/' || *fn == '\\' || strchr(fn, ':') != NULL)
	    p = fn;
	else {
	    size_t i;
	    if (getcwd(path_buffer, sizeof(path_buffer)) == NULL) return False;
	    i = strlen(path_buffer);
	    if (path_buffer[i] != '/' && path_buffer[i] != '\\')
		path_buffer[i++] = '\\';
	    strcpy(path_buffer + i, fn);
	    p = path_buffer;
	}
#endif
    }
#endif	/* USE_TEMPDIR */

    /* replace filename with new name pointed by p */
#ifndef UNIX
    /* Change '\' to '/' if MSDOS */
    for (q = p; *q != '\0'; q++) if (*q == '\\') *q = '/';
#endif
    /* strip the file extension (which will be added by LaTeX) */
    q = get_file_ext(p);
    c = *q;	/* save extension */
    if (cmd->force) *q = '\0';
    fprintf(fout, p);
    *q = c;	/* restore extension */

    if (cmd->type != FT_INCLUDE && cmd->type != FT_VERBATIM)
	return True;
    
    /* don't copy file when absolute pathname is used */
    if (absolute_flag ||
	(cmd->type != FT_INCLUDE && cmd->type != FT_VERBATIM))
	p = "<nop>";
    else if (cmd->type != FT_INCLUDE)
#ifdef USE_TEMPDIR
	p = "<copy>";
#else
	p = "<nop>";
#endif
    else
	p = fnout;

    fprintf(flst, "%s\n%s\n", fn, p);

    return True;
}

static void
parse_cmd(Boolean ctx_flag, FILE *fout)
{
    int i, n;
    char *p, *q;
    cmd_t *cmd;

    p = ptr;
    for (i=0; i<NCMDS; i++)
	if (chkcmd(&p, cmdlist[i].cmd)) break;

    if (i>=NCMDS) return;
    cmd = cmdlist + i;

    while (True) {
	if (iscomment(p)) {
	    while (p < ptr_end && *p != '\n') p++;
	    if (p >= ptr_end) return;
	    p++;
	    continue;
	}
	if (!isspace(*p)) break;
	p++;
    }
    while (ptr < p) fputc(*(ptr++), fout);

    /* skip optional arguments */
    n = cmd->opts;
    while (n-- && *p == '[') {
	p = MatchParenthesis(p++);
	if (p == NULL) {
error:
	    fprintf(stderr, "warning: incomplete command \"%s\" ignored\n",
			cmd->cmd);
	    return;
	}
	p++;
	while (True) {
	    if (iscomment(p)) {
		while (p < ptr_end && *p != '\n') p++;
		if (p >= ptr_end) return;
		p++;
		continue;
	    }
	    if (!isspace(*p)) break;
	    p++;
 	}
    }

    while (ptr < p) fputc(*(ptr++), fout);

    if (*p != '{') {
	q = p + 1;
    }
    else {
	fputc(*(ptr++), fout);
	if ((q = MatchParenthesis(p++)) == NULL)
	    goto error;
    }
    if (*p == '\0')
	goto error;

    /* Now p ... q - 1 is the (list of) filename(s) */
    while (p < q) {	/* loop on filenames */
	/* skip leading spaces in a filename */
	while (p < q) {
	    if (iscomment(p)) {
		while (p < ptr_end && *p != '\n') p++;
		if (p >= ptr_end) return;
		p++;
		fprintf(fout, "%%\n");
		continue;
	    }
	    if (!isspace(*p)) break;
	    if (*p == '\n') fputc('\n', fout);
	    p++;
 	}
	ptr = p;
	i = 0;
	n = 0; /* n is used to count number of newlines behind '%' */
	while (p < q) {
	    if (iscomment(p)) {
		while (p < ptr_end && *p != '\n') p++;
		if (p >= ptr_end) break;
		p++;
		n++;
		while (isspace(*p) && *p != '\n') p++;
		continue;
	    }
	    if (*p == cmd->sep) break;
	    fn[i++] = *(p++);
	}
	/* Now filename is in `fn' */
	while (i > 0 && isspace(fn[i - 1])) i--;
	if (i > 0) {
	    fn[i] = '\0';
	    /* skip scanning file if filename contains TeX macro(s) ('\') */
	    if (strchr(fn, '\\') == NULL && check_file(fn, cmd, ctx_flag, fout))
		ptr = p;	/* filename processed by check_file */
	    else
		while (ptr < p) fputc(*(ptr++), fout);
	    if (*p == cmd->sep) {
		p++;
		fputc(*(ptr++), fout);
	    }
	}
	/* output newlines hidden behind '%' to preserve line numbering */
	while (n) {
	    fprintf(fout, "%%\n");
	    n--;
	}
    }
}

int
ctexscan(const char *infile, const char *lstfile, Boolean ctx_flag, FILE *fout)
{
    char *q, *r;
    int i;
    char *endv = NULL;
    static char endv_buf[] = "\\end{everbatim*}";
#ifdef USE_KPSE
    static Boolean initialized = False;

    if (!initialized) {
	kpse_set_program_name("ctex", "latex");
	kpse_init_prog("", 600, NULL, NULL);
	initialized = True;
    }
#endif

    if ((fin = fopen(infile, "rt")) == NULL) {
	fprintf(stderr, "Cannot open input file \"%s\"\n", infile);
	return 1;
    }
    setvbuf(fin, NULL, _IOFBF, 32*1024);

    if ((flst = fopen(lstfile, "w+t")) == NULL) {
	fprintf(stderr, "Cannot open list file \"%s\"\n", lstfile);
	return 2;
    }
    setvbuf(flst, NULL, _IOFBF, 32*1024);

    fprintf(fout, "\\def\\CTeXPreproc{Created by ctex v" CTEX_VERSION
		  ", don't edit!}");
    ptr = ptr_end = buffer;
    while (1) {
	if ((ptr >= ptr_end || ptr - buffer >= BUFFERSIZE/2) && fin != NULL) {
	    size_t size;
	    assert ( BUFFERSIZE/2 > LEADINGSIZE );
	    if (ptr - buffer > LEADINGSIZE) {
		memcpy(buffer, ptr - LEADINGSIZE, ptr_end - ptr + LEADINGSIZE);
		ptr_end -= ptr - buffer - LEADINGSIZE;
		ptr -= ptr - buffer - LEADINGSIZE;
	    }
	    if (ptr_end - buffer < BUFFERSIZE) {
		size = fread(ptr_end, 1, BUFFERSIZE - (ptr_end - buffer), fin);
		if (!size) {
		    fclose(fin);
		    fin = NULL;
		}
		ptr_end += size;
	    }
	    *ptr_end = '\0';
	}

	if (ptr >= ptr_end) break;

	if (iscomment(ptr)) {
	    while (ptr < ptr_end && *ptr != '\n') fputc(*(ptr++), fout);
	    continue; 
	}

	if (*ptr != '\\') {
	    fputc(*(ptr++), fout);
	    continue;
	}

#define STRNCMP(p, v)  (strncmp(p, v, strlen(v)))
	if (endv != NULL) {
	    /* We are within verbatim mode */
	    if (!STRNCMP(ptr, endv)) {
		fprintf(fout, endv);
		ptr += strlen(endv);
		endv = NULL;
		continue;
	    }
	    fputc(*(ptr++), fout);
	}
	
	/* check and skip \begin{verbatim}, \begin{verbatim*}, \verb*, \verb */
	if (((i = 6) && !STRNCMP(ptr, "\\verb*")) || 
	    ((i = 7) && !STRNCMP(ptr, "\\everb*")) || 
	    ((((i = 5) && !STRNCMP(ptr, "\\verb")) ||
	      ((i = 6) && !STRNCMP(ptr, "\\everb"))) && !isalpha(ptr[i]))) {
	    q = strchr(ptr + i + 1, ptr[i]);
	    if (q == NULL) q = ptr_end;
	    while (ptr < q) fputc(*(ptr++), fout);
	    continue;
	}

	if (!STRNCMP(ptr, "\\begin{verbatim*}")) {
	    fprintf(fout, "\\begin{verbatim*}");
	    ptr += strlen("\\begin{verbatim*}");
	    endv = "\\end{verbatim*}";
	    continue;
	}
	
	if (((i = 16) && !STRNCMP(ptr, "\\begin{verbatim}")) ||
	    ((i = 17) && !STRNCMP(ptr, "\\begin{verbatim*}")) ||
	    ((i = 17) && !STRNCMP(ptr, "\\begin{everbatim}")) ||
	    ((i = 18) && !STRNCMP(ptr, "\\begin{everbatim*}"))) {
	    fwrite(ptr, 1, i, fout);
	    memcpy(endv_buf + 5, ptr + 7, i - 7);
	    endv_buf[i - 2] = '\0';
	    endv = endv_buf;
	    ptr += i;
	    continue;
	}

	fputc(*(ptr++), fout); 
	q = ptr;
#if 0	/* DON'T USE! scanning may stop at, say, '\def\abc{\endinput}' */
	/* check for endinput commands */
	if (chkcmd(&q, "endinput") || chkcmd(&q, "bye") ||
		 !memcmp(q, "end{document}", 13)) {
	    if (fout != NULL) fprintf(fout, "%s", line);
	    break;
	}
#endif
	
	if (chkcmd(&q, "input")) {
	    /* \input filename */
	    while (isspace(*q)) q++;
	    if (*q != '{') {
		cmd_t cmd = {"input", 0, ".tex", False,	FT_INCLUDE, '\0'};
		/* special case: \input filename */
		while (ptr < q) fputc(*(ptr++), fout);
		q = fn;
		r = ptr;
		while (!isspace(*r) && *r != '\n' && *r !='\0' && *r != '\\')
		    *(q++) = *(r++);
		*q = '\0';
		if (!check_file(fn, &cmd, ctx_flag, fout))
		    while (ptr < r) fputc(*(ptr++), fout);
		else
		    ptr = r;
		continue;
	    }
	}

	parse_cmd(ctx_flag, fout);
    }

    fclose(flst);

    return 0;
}

char *get_file_ext(char *fn)
{
    size_t l=strlen(fn);
    char *p=fn+l-1;

#ifdef UNIX
    while (p >= fn && *p != '.' && *p != '/')
#else
    while (p >= fn && *p != '.' && *p != '/' && *p!='\\' && *p != ':')
#endif
	p--;
    if (p<fn || *p!='.') p=fn+l;

    return p;
}
