1 zlb 1.1 // gbk2uni.cpp : Transform GBK characters in .out file to unicode codes.
2 // the initial code is from out2uni in dvipdfmx project of KTUG
3 // authors: cxterm and Linbo Zhang in 2003
4 // reach them at http://www.ctex.org
5 // enhancer: hooklee (Shujun Li) in 2003
6 // reach hooklee at http://www.hooklee.com or www.chinatex.org
7
8 /////////////////////////////////////////////////////////////////////////
9 //********************hyperref书签文件编码规则**************************
10 //每个书签以如下形式存放 :\BOOKMARK [1][-]{section.0.1}{书签正文}{}
11 //非unicode模式下使用hyperref宏包,bookmark中的部分特殊字符以\ooo的形式插入
12 //' ':\040, '#':\043, '$':\044, '%':\045, '&':\046, '\':\134, '^':\136, '_':\137, '{':\173, '}':\175, '~':176
13 //比较特殊的是'('和')',是以'\('和'\)'的形式插入的,而不是\ooo形式
14 //\S:\247
15 //所有其他字符和汉字均不作任何处理,在bookmark中保留
16 //已经知道,这种保留会造成部分汉字在bookmark中无法显示
17 //当使用\CJKchar{"0081}{"040}方式直接以GBK代码的方法插入汉字,bookmark中会生成如下的书签代码:
18 //"0081"040,显然,pdflatex忽略了\CJKchar命令本身和前后的{}把参数当做普通文本做了转换
19 //'^^xx^^yy'形式的CJK汉字在.out中有两种可能的出现方式:'^^xx^^yy'和'^^xxL'
20 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
21 //unicode模式下使用hyperref宏包,bookmark特殊字符均编码为\ooo\ooo或者\000x或者\000x\80y形式的unicode代码
22 zlb 1.1 //书签内容均以\376\377开头作为前导标示符
23 //经过实验,相应的bookmarkunicode代码插入规则如下:
24 //*****A类:编码为\ooo\ooo的特殊字符部分*****
25 //' '(空格):\000\040,使用\textvisiblespace也得到同样的书签
26 //'#'(\#):\000\043, '$'(\$):\000\044, '%'(\%):\000\045, '&'(\&):\000\046
27 //'(':\000\050; ')':\000\051
28 //'\'(\textbackslash):\000\134;
29 //'^'(\textasciicircum):\000\136; '_'(\_):\000\137
30 //'{'(\{):\000\173; '}'(\}):\000\175
31 //'~'(\textasciitilde):\000\176;
32 //*****B类:编码为\000x的普通字符部分,其中x表示字符本身*****
33 //abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
34 //|:',./!?;"-+=[]`*@(直接用@即可,无需\@)<(或\textless)>(或\textgreater)
35 //*****C类:单个汉字*****
36 //假设其高位码为H,低位码为L,则一般的GB汉字其插入形式为:\000H\80L
37 //但是上述情况存在例外,当L为普通拉丁字符时,将会以\000HL的形式插入
38 //如果任何汉字出现在一个低位为拉丁字符的GBK汉字之后,第二个汉字会以\80H\000L的形式出现
39 //只有\80HL是不可能出现的汉字代码
40 //当书签中包含多个汉字的时候,重复按照上述规则插入,汉字中间的其他字符按照正常规则插入
41 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
42 //@注意:当汉字低位字节为字符'}{~\_^'时,tex文档编译会出现错误,强行编译可能出现不可预测的行为
43 zlb 1.1 //@插入out文件的内容变得很混乱,一般书签正文会在低位'}'出现之后终止,gbk2uni只尽可能地消除这种影响
44 //@这可能使得部分GBK汉字在书签中消失或者显示为其他字符
45 //@使用张林波老师随CCT新版发行的cctconv程序可以解决这个问题
46 //@cctconv把汉字低位字节为'\', '{', '}', '^', '_', '~'的汉字低位字节分别改为'012345'以方便处理
47 //@或者使用-f开关转换可以将所有高位为1的字符转换为^^xx的形式,这在一些老的不支持扩展字符的tex系统中有用
48 //@gbk2uni处理这样的汉字假设cctconv已经运行(cctconv与CJK兼容,无需cct.sty即可得到正确的dvi文件)
49 //@这样的汉字经cctconv处理后,插入out文件的对应内容在unicode模式下有两种可能:
50 //@sprintf("\\000%d\\%03o", H, L)和sprintf("%d\\%03o", H, L),这里L已经是被转换回来的'}{~\_^'
51 //现在还不清楚是否也有sprintf("\\80%d\\%03o", H, L)形式出现(根据'\80HL'不出现推测这种形式可能也不出现)
52 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
53 //*****D类:\CJKchar{"00ab}{"0cd}形式的CJK汉字*****
54 //\000"\0000\0000\000a\000b\000"\0000\000c\000d
55 //显然,unicode模式的hyperref是如下工作的:
56 //第一步先生成非unicode模式的out文件,接着对其中的扩展字符做了一个后处理,但是这个处理对汉字不正确
57 //*****E类:'^^xx^^yy'形式的CJK汉字*****
58 //在unicode模式下,.out文件的内容一般为'\000^^xx\80^^yy'或者'\000^^xx\80L'
59 //估计其他CJK汉字的样式也可能出现:'\80^^xx\000^^yy'、'\80^^xx\000L'、'\000^^xx^^yy'、'\000^^xx\ooo'
60 //以上情况说明,在处理过程中,我们可以像TeX那样将每一个'^^xx'码字当成普通的ASCII字符来处理即可兼容'^^xx'代码
61 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
62 //如果\CJKchar{}{}中的前后两个参数位数不同,单从.out文件无法判断第二个参数何时结束
63 //因此,gbk2uni要求在tex文档中统一采用三位十六进制法\CJKchar{"0xx"}{"0xx}表示前后两个参数
64 zlb 1.1 //考虑到在实际中\CJKchar用的比较少,这个约定应该不会算大的限制。
65 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 #define VERSION "0.22"
68
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <ctype.h>
|
74 zlb 1.1 # include <io.h>
75 # define PATH_MAX _MAX_PATH+1
76 #else
77 # include <unistd.h>
78 # include <limits.h>
79 #endif
80
81 #include "gbk2uni.h"
82
83 #ifndef WIN32
84 # define _fileno(f) f
85 static size_t _filelength(FILE *f)
86 {
87 size_t pos = ftell(f);
88 size_t length;
89
90 fseek(f, 0, SEEK_END);
91 length = ftell(f);
92 fseek(f, pos, SEEK_SET);
93 return length;
94 }
95 zlb 1.1 #endif
96
97 #if !defined(WIN32) && !defined(GO32)
98 static int strnicmp (const char *s0, const char *s1, int n)
99 {
100 int i;
101 while (n-- > 0 && *s0 != '\0' && *s1 != '\0') {
102 i = toupper(*(s0++)) - toupper(*(s1++));
103 if (i) return i;
104 }
105 return n > 0 ? toupper(*s0) - toupper(*s1) : 0;
106 }
107 #endif
108
109 #define BYTE unsigned char
110 #define DWORD unsigned int
111
112 //is a valid high byte of some GBK character
113 #define GBK_HIGH(h) ((0x81<=(h&0xff)) && ((h&0xff)<=0xfe))
114
115 //is a valid low byte of some GBK character
116 zlb 1.1 #define GBK_LOW(l) ((0x40<=(l&0xff)) && ((l&0xff)<=0xfe))
117
118 int bLock = 0;//lock
119 int bUnlockOnly = 0;
120 int bCJKchar = 1;//enable \CJKchar support defaultly, disable it with '-nc' option
121 //int bIgnoreCJK7 = 0;
122 int bParsingErrors = 1;
123 //int bVerbose = 0;
124 int bSilent = 0;
125 FILE *Fout;
126 FILE *Fin;
127
128 void version(void)
129 {
130 printf("gbk2uni, version "VERSION", initially implemented by cxterm and ZLB in Jan. 2003\n");
131 printf("\t enhanced by hooklee in Mar. 2003.\n");
132 printf("\t please visit www.ctex.org and www.chinatex.org for more information.\n");
133 }
134
135 //print usage of gbk2uni
136 void usage(void)
137 zlb 1.1 {
138 version();
139 printf("Usage : gbk2uni [options] filename[.out] [options]\n");
140 printf("Options:\n");
141 printf("\t-u(-l)\t lock .out file to avoid overwritten in the next (pdf)latex run\n");
142 printf("\t\t (.out file will be unlocked if no '-u' and '-l' options)\n");
143 printf("\t-unlock\t unlock .out file without parsing .out file\n");
144 // printf("\t-i\t ignore all CJK characters with \"^^xx^^yy\" format\n");
145 printf("\t-s\t run gbk2uni silently (but errors remain)\n");
146 printf("\t-cjk\t parse \\CJKchar{\"0xx}{\"0xx} command (default)\n");
147 printf("\t-nocjk\t disable parsing \\CJKchar{\"0xx}{\"0xx} command\n");
148 printf("\t-npe\t disable display of all parsing errors\n");
149 }
150
151 /*
152 //write unicode into the file Fout
153 void putucode(unsigned int u)
154 {
155 unsigned int h, l;
156
157 l = u & 0xff;
158 zlb 1.1 h = (u >> 8) & 0xff;
159
160 fprintf(Fout,"%c%03o%c%03o",'\\',h,'\\',l);
161 }*/
162
163 //put a GBK code
164 void putGBKcode(BYTE h, BYTE l,int nLine)
165 {
166 unsigned int u;
167 unsigned int hu, lu;
168
169 if (!GBK_HIGH(h) || !GBK_LOW(l)) {
170 //if current GBK character is not valid, it will be discarded
171 if (bParsingErrors)
172 fprintf (stderr, "An invalid GBK character is found:\n\tLine %d: ... 0x%x%x\n", nLine, h,l);
173 return;
174 }
175
176 u = gbk2uni[(h-0x81)*192 + (l-0x40)];
177 lu = u & 0xff;
178 hu = (u >> 8) & 0xff;
179 zlb 1.1 fprintf(Fout,"\\%03o\\%03o",hu,lu);
180 }
181
182 //is a character c '0'...'9','a'...'f','A'...'F'?
183 int is8digit(char c)
184 {
185 return (c>='0' && c<='7') ? 1 : 0;
186 }
187
188 //is a character c '0'...'9','a'...'f','A'...'F'?
189 int is16digit(char c)
190 {
|
304 zlb 1.1 //in unicode mode, '\ooo\ooo' and '\000x' and '\000H\80L' are all possible for different characters
305 //possibly, '\oo' should be taken into consideration to avoid possible collapse of gbk2uni
306 strCode[0]=*str++;
307 for (i=1; i<3; i++) {
308 if(isdigit(*str)) strCode[i]=*str++;
309 else break;
310 }
311 strCode[i]='\0';
312 if (i == 1) {
313 if (bParsingErrors)
314 fprintf (stderr, "An incomplete special unicode code is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-1);
315 break;
316 }
317 lh = atoi(strCode);//get the high byte of current unicode character
318 if(i ==3 && *str == '\\') {//'\ooo\ooo': normal unicode character
319 fprintf(Fout, "\\%s\\", strCode);//directly output leading '\ooo\'
320 str++;
321 for (i=0; i<3; i++) {
322 if(isdigit(*str)) strCode[i]=*str++;//directly output the left 'ooo'
323 else break;
324 }
325 zlb 1.1 strCode[i]='\0';
326 if (i < 3) {
327 if (bParsingErrors)
328 fprintf (stderr, "An incomplete unicode code is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-1);
329 }
330 else fprintf(Fout, "%s", strCode);//directly output the left 'ooo'
331 break;
332 }
333 if (lh == 0 && *str == '\"' && bCJKchar) {//\CJKchar{"0xx}{"0xx} command in unicode mode
334 rtn = getCJKchar (&str, nLine);
335 if(rtn == 0) return str;
336 if(rtn == -1) break;//skip the first '\0000'
337 rtn = getCJKchar (&str, nLine);
338 if(rtn == 0) return str;
339 if(rtn == -1) break;
340 strCode[0] = *str++;//get the first digit of high byte
341 rtn = getCJKchar (&str, nLine);
342 if(rtn == 0) return str;
343 if(rtn == -1) break;
344 strCode[1] = *str++;//get the second digit of high byte
345 h = xtoi (strCode);//get high byte
346 zlb 1.1
347 rtn = getCJKchar (&str, nLine);
348 if(rtn == 0) return str;
349 if(rtn == -1) break;
350 if (*str != '\"') {//is the third unicode code '"'?
351 if (bParsingErrors)
352 fprintf (stderr, "An incomplete \\CJKchar{}{} command is found:\n\tLine %d: ... \"%s\"\n", nLine, str-4);
353 break;
354 }
355 rtn = getCJKchar (&str, nLine);
356 if(rtn == 0) return str;
357 if(rtn == -1) break;//skip the second '\0000'
358 rtn = getCJKchar (&str, nLine);
359 if(rtn == 0) return str;
360 if(rtn == -1) break;
361 strCode[0] = *str++;//get the first digit of low byte
362 rtn = getCJKchar (&str, nLine);
363 if(rtn == 0) return str;
364 if(rtn == -1) break;
365 strCode[1] = *str++;//get the second digit of low byte
366 l = xtoi (strCode);//get low byte
367 zlb 1.1 putGBKcode(h, l, nLine);//put unicode code via GBK2UNICODE transformation
368 break;
369 }
370 //'\000x' or '\000H\80L' or '\80H\000L' or '\000H\ooo'
371 rtn = translateChar(&str,nLine);
372 if (rtn == -1) break;//break when encountering errors
373 else h = (BYTE) rtn;
374 if (lh == 0 && h != 0 && h != '}' && h < 0x80) {//'\000x' format remains
375 //translate '\000x' to '\000\ooo' to get more robust result
376 fprintf(Fout, "\\000\\%03o", h);
377 break;
378 }
379 if (lh == 0 && h > 0x80) {//'\000HL' or '\000H\80L' or '\000H\ooo'
380 //h = (BYTE) *str++;//set high GBK byte
381 if ( *str != '\\') {//'\000HL'
382 rtn = translateChar(&str,nLine);
383 if (rtn == -1) break;//break when encountering errors
384 else l = (BYTE) rtn;
385 //l = *str++;
386 putGBKcode(h, l, nLine);//put unicode code via GBK2UNICODE transformation
387 }
388 zlb 1.1 else {//'\000H\80L' or '\000H\ooo'
389 if (*(str+1) == '8' && *(str+2) == '0') {// is '80L' after '\'?
390 str += 3;
391 rtn = translateChar(&str,nLine);
392 if (rtn == -1) break;//break when encountering errors
393 else l = (BYTE) rtn;
394 //l = *str++;//set low GBK byte
395 putGBKcode(h, l, nLine);//put unicode code via GBK2UNICODE transformation
396 }
397 else if ( is8digit(*(str+1)) && is8digit(*(str+2)) && is8digit(*(str+3)) ) {
398 l = otoi(str+1);
399 putGBKcode(h, l, nLine);//put a GBK code
400 /* if (l == '{' || l == '}' || l == '\\' || l == '^' || l == '_' || l == '~' || l == 0x80)
401 putGBKcode(h, l, nLine);//put a GBK code
402 else if (bParsingErrors)
403 fprintf (stderr, "An invalid GBK character (in cctconv format) is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-2);
404 */ str += 4;
405 }
406 else if (bParsingErrors)
407 fprintf (stderr, "An incomplete GBK character is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-1);
408 }
409 zlb 1.1 break;
410 }
411 if (lh == 80 && h > 0x80) {//'\80H\000L'
412 //h = (BYTE) *str++;//set high GBK byte
413 if (*str == '\\' && *(str+1) == '0' && *(str+2) == '0' && *(str+3) == '0') {// is '\000L' after '\80H'?
414 str += 4;
415 rtn = translateChar(&str,nLine);
416 if (rtn == -1) break;//break when encountering errors
417 else l = (BYTE) rtn;
418 //l = *str++;//set low GBK byte
419 putGBKcode(h, l, nLine);//put unicode code via GBK2UNICODE transformation
420 }
421 else if (bParsingErrors)
422 fprintf (stderr, "An incomplete GBK character is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-1);
423 break;
424 }
425 if (h == 80 && *str > 0) {//is '\80x' possible?
426 if(*str != '}' && *str != '\0') str++;//goto the next code
427 }
428 break;
429 }
430 zlb 1.1 //remove '\par' from .out file
431 if(*str == 'p' && *(str+1) == 'a' && *(str+2) == 'r') str+=3;
432 break;
433 /* case '^'://GBK characters with CJK format '^^xx^^yy'
434 while(*str=='^') str++;//skip all '^' characters
435 if (is16digit(*str) && is16digit(*(str+1))) {
436 h = xtoi(str); str += 2;
437 while(*str=='^') str++;//skip all '^' characters
438 if (is16digit(*str) && is16digit(*(str+1))) {
439 l = xtoi(str); str += 2;
440 if (!bIgnoreCJK7) putGBKcode(h, l, nLine);//put a GBK code if not ignoring
441 }
442 else if (*str < 0)
443 else if (bParsingErrors)
444 fprintf (stderr, "An incomplete GBK character (in CJK format) is found:\n\tLine %d: ... \"^^%s\"\n", nLine, str-2);
445 }
446 else if (bParsingErrors)
447 fprintf (stderr, "An incomplete GBK character (in CJK format) is found:\n\tLine %d: ... \"^^%s\"\n", nLine, str);
448 break;
449 */ case '\"'://\CJKchar{"0xx}{"0xx} command in non-unicode mode?
450 if (!bCJKchar) fprintf(Fout, "\\000\\%03o", *str++);//normal '"' character in non-unicode mode
451 zlb 1.1 else {//\CJKchar{"0xx}{"0xx} command in non-unicode mode
452 while(*str=='\"') str++;//skip all '"' characters
453 if (*str == '0') str++;//skip the first '0'
454 if (is16digit(*str) && is16digit(*(str+1))) {
455 h = xtoi(str); str += 2;
456 while(*str=='\"') str++;//skip all '"' characters
457 if (*str == '0') str++;//skip the second '0'
458 if (is16digit(*str) && is16digit(*(str+1))) {
459 l = xtoi(str); str += 2;
460 putGBKcode(h, l, nLine);//put a GBK code
461 }
462 else if (bParsingErrors)
463 fprintf (stderr, "An incomplete GBK character (in \\CJKchar{}{} format) is found:\n\tLine %d: ... \"%s\"\n", nLine, str-2);
464 }
465 else if (bParsingErrors)
466 fprintf (stderr, "An incomplete GBK character (in \\CJKchar{}{} format) is found:\n\tLine %d: ... \"%s\"\n", nLine, str);
467 }
468 break;
469 default://normal characters in non-unicode mode or cctconv GBK characters in both mode
470 //or '^^xx^^yy'/'^^xxL' TeX characters
471 rtn = translateChar(&str,nLine);
472 zlb 1.1 if (rtn == -1) break;//break when encountering errors
473 else h = (BYTE) rtn;
474 if (h != 0 && h != '}' && h < 0x80) fprintf(Fout, "\\000\\%03o", h);//normal latin character
475 else {//GBK character
476 //h = *str++;//GBK high byte
477 if (*str == '\\') {//characters generated by cctconv
478 str++;
479 for(i = 0; i < 3; i++) {
480 if(is8digit(*str)) strCode[i] = *str++;
481 else break;
482 }
483 strCode[i] = '\0';
484 if ( i != 3) {
485 if (bParsingErrors)
486 fprintf (stderr, "An incomplete GBK character (in cctconv format) is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-2);
487 break;
488 }
489 l = otoi(strCode);//get low byte from '\ooo'
490 putGBKcode(h, l, nLine);//put a GBK code
491 /* if (l == '{' || l == '}' || l == '\\' || l == '^' || l == '_' || l == '~')
492 putGBKcode(h, l, nLine);//put a GBK code
493 zlb 1.1 else if (bParsingErrors)
494 fprintf (stderr, "An invalid GBK character (in cctconv format) is found:\n\tLine %d: ... \"%s\"\n", nLine, str-i-2);
495 */ break;
496 }
497 //low byte of a normal CJK character or '^^yy'
498 rtn = translateChar(&str,nLine);
499 if (rtn == -1) break;//break when encountering errors
500 else l = (BYTE) rtn;
501 switch(l) {
502 case '}':
503 case '\0':
504 if (bParsingErrors)
505 fprintf (stderr, "An incomplete GBK character (in '^^xx^^yy' format) is found:\n\tLine %d: ... \"%s\"\n", nLine, str);
506 return str;//SHOULD exit when reading '\0' or '}'
507 case '0': l = '\\'; break;//reserved for future CCT
508 case '1': l = '{'; break;//reserved for future CCT
509 case '2': l = '}'; break;//reserved for future CCT
510 case '3': l = '^'; break;//reserved for future CCT
511 case '4': l = '_'; break;//reserved for future CCT
512 case '5': l = '~'; break;//reserved for future CCT
513 case '6': l = 0x80; break;//reserved for future CCT
514 zlb 1.1 default: ;//normal GBK character or '^^yy'
515 }
516 putGBKcode(h, l, nLine);//put a GBK code
517 //str++;
518 }
519 }
520 }
521 }
522
523 int main(int argc, char* argv[])
524 {
525 char inname[PATH_MAX]="";
526 char outname[PATH_MAX]="";
527 char bakname[PATH_MAX]="";
528 char *p;
529 unsigned int nLength;
530 unsigned char *b_in,*b2_in,*b3_in;
531 BYTE bUnicode=0;
532 int nLine, i;
533
534 for(i=1; i < argc; i++) {
535 zlb 1.1 #if defined(WIN32) || defined(GO32)
536 strlwr(argv[i]);
537 #endif
538 if (!strcmp(argv[i], "-u") || !strcmp(argv[i], "-l")) {
539 bLock = 1; if (bUnlockOnly) bUnlockOnly = 0;
540 }
541 // else if (!strcmp(argv[i], "-i")) bIgnoreCJK7 = 1;
542 // else if (!strcmp(argv[i], "-v")) bVerbose = 1;
543 else if (!strcmp(argv[i], "-s")) bSilent = 1;
544 else if (!strcmp(argv[i], "-cjk")) bCJKchar = 1;
545 else if (!strcmp(argv[i], "-nocjk")) bCJKchar = 0;
546 else if (!strcmp(argv[i], "-unlock")) {
547 bUnlockOnly = 1; if (bLock) bLock = 0;
548 }
549 else if (!strcmp(argv[i], "-npe")) bParsingErrors = 0;
550 else strcpy(inname, argv[i]);
551 }
552
553 if(inname[0] == '\0') {
554 usage();
555 return 1;
556 zlb 1.1 }
557
558 if (!bSilent) version();//display version and developer information
559
560 p = strrchr(inname, '.');
561 #ifdef WIN32
562 if((p == NULL) || stricmp(p, ".out")) strcat(inname, ".out");
563 #else
564 if((p == NULL) || strcmp(p, ".out")) strcat(inname, ".out");
565 #endif
566
567 strcpy(outname, inname);
568 strcat(outname, ".tmp");
569
570 Fin = fopen(inname, "r");
571 if(!Fin) {
572 fprintf(stderr, "Cannot open %s to read!\n", inname);
573 exit(1);
574 }
575 nLength = _filelength (_fileno(Fin));
576 if (nLength == -1L) {
577 zlb 1.1 fprintf(stderr, "Cannot get the file size of %s!\n", inname);
578 fclose (Fin); exit(1);
579 }
580
581 Fout = fopen(outname, "wt");
582 if(!Fout) {
583 fprintf(stderr, "Cannot open %s to write!\n", outname);
584 fclose (Fin); exit(1);
585 }
586
587 b_in = (unsigned char *)malloc(nLength);
588 if(!b_in) {
589 fprintf(stderr, "Memory allocation error!\n");
590 fclose (Fin); fclose (Fout); exit (2);
591 }
592
593 if(bLock) {
594 fprintf(Fout,"\\let\\WriteBookmarks\\relax\n");
595 }
596
597 nLine = 0;
598 zlb 1.1 while(!feof(Fin))
599 {
600 if( fgets(b_in, nLength, Fin) == NULL) break;
601 nLine++;
602 //if '\let\WriteBookmarks\relax' is found, skip the current line
603 if (strstr(b_in,"\\let\\WriteBookmarks\\relax\n")) continue;
604 if (bUnlockOnly) {//only unlock .out file when '-unlock' option is set
605 fputs(b_in, Fout);
606 continue;
607 }
608 b2_in = b_in;
609 while( (*b2_in==' ' || *b2_in=='\n' || *b2_in=='\r' || *b2_in=='\t') && *b2_in!='\0' ) b2_in++;
610 if ( *b2_in == '\0') {
611 // if (bParsingErrors)
612 // fprintf(stderr, "Warning: No bookmark content is found:\n\tLine %d: \"%s\"\n", nLine, b_in);
613 // fputs(b_in,Fout);//simply copy the wong line into new .out file
614 continue;
615 }
616 if (strnicmp(b2_in, "\\BOOKMARK",9)) {//skip invalid line in .out file
617 if (bParsingErrors)
618 fprintf(stderr, "Invalid line is found:\n\tLine %d: \"%s\"\n", nLine, b_in);
619 zlb 1.1 // fputs(b_in,Fout);//simply copy the wong line into new .out file
620 continue;
621 }
622 //find the position of the second parentheses
623 //'{' and '}' are displayed as '\173' and '\175' in .out file
624 //so there is no nested parentheses
625 while(*b2_in!='}' && *b2_in!='\0' && *b2_in!='\n' && *b2_in!='\r' && *b2_in!='\t') b2_in++;
626 while(*b2_in!='{' && *b2_in!='\0' && *b2_in!='\n' && *b2_in!='\r' && *b2_in!='\t') b2_in++;
627 if ( *b2_in == '\0') {
628 if (bParsingErrors)
629 fprintf(stderr, "Warning: No bookmark content is found:\n\tLine %d: \"%s\"\n", nLine, b_in);
630 // fputs(b_in,Fout);//simply copy the wong line into new .out file
631 continue;
632 }
633 b2_in ++;
634 fwrite ( b_in, 1, b2_in-b_in, Fout);//copy the left part of the new line
635 //Set unicode flag and skip the leading characters if encountering '\376\377'
636 if( !strncmp(b2_in, "\\376\\377", 8) ) {
637 b2_in += 8; bUnicode = 1;
638 }
639 fputs("\\376\\377",Fout);//set .out file to unicode format in non-unicode mode
640 zlb 1.1 b3_in=doparse(b2_in, bUnicode, nLine);//parse the middle part and write the new unicode codes
641 fputs(b3_in,Fout);//copy the right part of the new line
642 }
643
644 free (b_in);
645 fclose(Fin);
646 fclose(Fout);
647
648 sprintf(bakname, "%s.bak",inname);
649 remove(bakname);
650 rename(inname,bakname);
651 rename(outname,inname);
652
653 if (!bSilent) fprintf(stdout, "gbk2uni %s is finished!\n",inname);
654
655 // getchar();
656 return 0;
657
658 }
659
|