File: | libs/tiff-4.0.2/contrib/iptcutil/iptcutil.c |
Location: | line 395, column 3 |
Description: | Value stored to 'length' is never read |
1 | /* $Id: iptcutil.c,v 1.8 2011-05-08 00:44:18 fwarmerdam Exp $ */ |
2 | |
3 | #include "tif_config.h" |
4 | |
5 | #include <stdio.h> |
6 | #include <stdlib.h> |
7 | #include <string.h> |
8 | #include <ctype.h> |
9 | |
10 | #ifdef HAVE_STRINGS_H1 |
11 | # include <strings.h> |
12 | #endif |
13 | |
14 | #ifdef HAVE_IO_H |
15 | # include <io.h> |
16 | #endif |
17 | |
18 | #ifdef HAVE_FCNTL_H1 |
19 | # include <fcntl.h> |
20 | #endif |
21 | |
22 | #ifdef WIN32 |
23 | #define STRNICMPstrncasecmp strnicmp |
24 | #else |
25 | #define STRNICMPstrncasecmp strncasecmp |
26 | #endif |
27 | |
28 | typedef struct _tag_spec |
29 | { |
30 | short |
31 | id; |
32 | |
33 | char |
34 | *name; |
35 | } tag_spec; |
36 | |
37 | static tag_spec tags[] = { |
38 | { 5,"Image Name" }, |
39 | { 7,"Edit Status" }, |
40 | { 10,"Priority" }, |
41 | { 15,"Category" }, |
42 | { 20,"Supplemental Category" }, |
43 | { 22,"Fixture Identifier" }, |
44 | { 25,"Keyword" }, |
45 | { 30,"Release Date" }, |
46 | { 35,"Release Time" }, |
47 | { 40,"Special Instructions" }, |
48 | { 45,"Reference Service" }, |
49 | { 47,"Reference Date" }, |
50 | { 50,"Reference Number" }, |
51 | { 55,"Created Date" }, |
52 | { 60,"Created Time" }, |
53 | { 65,"Originating Program" }, |
54 | { 70,"Program Version" }, |
55 | { 75,"Object Cycle" }, |
56 | { 80,"Byline" }, |
57 | { 85,"Byline Title" }, |
58 | { 90,"City" }, |
59 | { 95,"Province State" }, |
60 | { 100,"Country Code" }, |
61 | { 101,"Country" }, |
62 | { 103,"Original Transmission Reference" }, |
63 | { 105,"Headline" }, |
64 | { 110,"Credit" }, |
65 | { 115,"Source" }, |
66 | { 116,"Copyright String" }, |
67 | { 120,"Caption" }, |
68 | { 121,"Local Caption" }, |
69 | { 122,"Caption Writer" }, |
70 | { 200,"Custom Field 1" }, |
71 | { 201,"Custom Field 2" }, |
72 | { 202,"Custom Field 3" }, |
73 | { 203,"Custom Field 4" }, |
74 | { 204,"Custom Field 5" }, |
75 | { 205,"Custom Field 6" }, |
76 | { 206,"Custom Field 7" }, |
77 | { 207,"Custom Field 8" }, |
78 | { 208,"Custom Field 9" }, |
79 | { 209,"Custom Field 10" }, |
80 | { 210,"Custom Field 11" }, |
81 | { 211,"Custom Field 12" }, |
82 | { 212,"Custom Field 13" }, |
83 | { 213,"Custom Field 14" }, |
84 | { 214,"Custom Field 15" }, |
85 | { 215,"Custom Field 16" }, |
86 | { 216,"Custom Field 17" }, |
87 | { 217,"Custom Field 18" }, |
88 | { 218,"Custom Field 19" }, |
89 | { 219,"Custom Field 20" } |
90 | }; |
91 | |
92 | /* |
93 | * We format the output using HTML conventions |
94 | * to preserve control characters and such. |
95 | */ |
96 | void formatString(FILE *ofile, const char *s, int len) |
97 | { |
98 | putc('"', ofile)_IO_putc ('"', ofile); |
99 | for (; len > 0; --len, ++s) { |
100 | int c = *s; |
101 | switch (c) { |
102 | case '&': |
103 | fputs("&", ofile); |
104 | break; |
105 | #ifdef HANDLE_GT_LT |
106 | case '<': |
107 | fputs("<", ofile); |
108 | break; |
109 | case '>': |
110 | fputs(">", ofile); |
111 | break; |
112 | #endif |
113 | case '"': |
114 | fputs(""", ofile); |
115 | break; |
116 | default: |
117 | if (iscntrl(c)((*__ctype_b_loc ())[(int) ((c))] & (unsigned short int) _IScntrl )) |
118 | fprintf(ofile, "&#%d;", c); |
119 | else |
120 | putc(*s, ofile)_IO_putc (*s, ofile); |
121 | break; |
122 | } |
123 | } |
124 | fputs("\"\n", ofile); |
125 | } |
126 | |
127 | typedef struct _html_code |
128 | { |
129 | short |
130 | len; |
131 | const char |
132 | *code, |
133 | val; |
134 | } html_code; |
135 | |
136 | static html_code html_codes[] = { |
137 | #ifdef HANDLE_GT_LT |
138 | { 4,"<",'<' }, |
139 | { 4,">",'>' }, |
140 | #endif |
141 | { 5,"&",'&' }, |
142 | { 6,""",'"' } |
143 | }; |
144 | |
145 | /* |
146 | * This routine converts HTML escape sequence |
147 | * back to the original ASCII representation. |
148 | * - returns the number of characters dropped. |
149 | */ |
150 | int convertHTMLcodes(char *s, int len) |
151 | { |
152 | if (len <=0 || s==(char*)NULL((void*)0) || *s=='\0') |
153 | return 0; |
154 | |
155 | if (s[1] == '#') |
156 | { |
157 | int val, o; |
158 | |
159 | if (sscanf(s,"&#%d;",&val) == 1) |
160 | { |
161 | o = 3; |
162 | while (s[o] != ';') |
163 | { |
164 | o++; |
165 | if (o > 5) |
166 | break; |
167 | } |
168 | if (o < 5) |
169 | strcpy(s+1, s+1+o); |
170 | *s = val; |
171 | return o; |
172 | } |
173 | } |
174 | else |
175 | { |
176 | int |
177 | i, |
178 | codes = sizeof(html_codes) / sizeof(html_code); |
179 | |
180 | for (i=0; i < codes; i++) |
181 | { |
182 | if (html_codes[i].len <= len) |
183 | if (STRNICMPstrncasecmp(s, html_codes[i].code, html_codes[i].len) == 0) |
184 | { |
185 | strcpy(s+1, s+html_codes[i].len); |
186 | *s = html_codes[i].val; |
187 | return html_codes[i].len-1; |
188 | } |
189 | } |
190 | } |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | int formatIPTC(FILE *ifile, FILE *ofile) |
196 | { |
197 | unsigned int |
198 | foundiptc, |
199 | tagsfound; |
200 | |
201 | unsigned char |
202 | recnum, |
203 | dataset; |
204 | |
205 | char |
206 | *readable, |
207 | *str; |
208 | |
209 | long |
210 | tagindx, |
211 | taglen; |
212 | |
213 | int |
214 | i, |
215 | tagcount = sizeof(tags) / sizeof(tag_spec); |
216 | |
217 | char |
218 | c; |
219 | |
220 | foundiptc = 0; /* found the IPTC-Header */ |
221 | tagsfound = 0; /* number of tags found */ |
222 | |
223 | c = getc(ifile)_IO_getc (ifile); |
224 | while (c != EOF(-1)) |
225 | { |
226 | if (c == 0x1c) |
227 | foundiptc = 1; |
228 | else |
229 | { |
230 | if (foundiptc) |
231 | return -1; |
232 | else |
233 | continue; |
234 | } |
235 | |
236 | /* we found the 0x1c tag and now grab the dataset and record number tags */ |
237 | dataset = getc(ifile)_IO_getc (ifile); |
238 | if ((char) dataset == EOF(-1)) |
239 | return -1; |
240 | recnum = getc(ifile)_IO_getc (ifile); |
241 | if ((char) recnum == EOF(-1)) |
242 | return -1; |
243 | /* try to match this record to one of the ones in our named table */ |
244 | for (i=0; i< tagcount; i++) |
245 | { |
246 | if (tags[i].id == recnum) |
247 | break; |
248 | } |
249 | if (i < tagcount) |
250 | readable = tags[i].name; |
251 | else |
252 | readable = ""; |
253 | |
254 | /* then we decode the length of the block that follows - long or short fmt */ |
255 | c = getc(ifile)_IO_getc (ifile); |
256 | if (c == EOF(-1)) |
257 | return 0; |
258 | if (c & (unsigned char) 0x80) |
259 | { |
260 | unsigned char |
261 | buffer[4]; |
262 | |
263 | for (i=0; i<4; i++) |
264 | { |
265 | c = buffer[i] = getc(ifile)_IO_getc (ifile); |
266 | if (c == EOF(-1)) |
267 | return -1; |
268 | } |
269 | taglen = (((long) buffer[ 0 ]) << 24) | |
270 | (((long) buffer[ 1 ]) << 16) | |
271 | (((long) buffer[ 2 ]) << 8) | |
272 | (((long) buffer[ 3 ])); |
273 | } |
274 | else |
275 | { |
276 | unsigned char |
277 | x = c; |
278 | |
279 | taglen = ((long) x) << 8; |
280 | x = getc(ifile)_IO_getc (ifile); |
281 | if ((char)x == EOF(-1)) |
282 | return -1; |
283 | taglen |= (long) x; |
284 | } |
285 | /* make a buffer to hold the tag data and snag it from the input stream */ |
286 | str = (char *) malloc((unsigned int) (taglen+1)); |
287 | if (str == (char *) NULL((void*)0)) |
288 | { |
289 | printf("Memory allocation failed"); |
290 | return 0; |
291 | } |
292 | for (tagindx=0; tagindx<taglen; tagindx++) |
293 | { |
294 | c = str[tagindx] = getc(ifile)_IO_getc (ifile); |
295 | if (c == EOF(-1)) |
296 | { |
297 | free(str); |
298 | return -1; |
299 | } |
300 | } |
301 | str[ taglen ] = 0; |
302 | |
303 | /* now finish up by formatting this binary data into ASCII equivalent */ |
304 | if (strlen(readable) > 0) |
305 | fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable); |
306 | else |
307 | fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum); |
308 | formatString( ofile, str, taglen ); |
309 | free(str); |
310 | |
311 | tagsfound++; |
312 | |
313 | c = getc(ifile)_IO_getc (ifile); |
314 | } |
315 | return tagsfound; |
316 | } |
317 | |
318 | int tokenizer(unsigned inflag,char *token,int tokmax,char *line, |
319 | char *white,char *brkchar,char *quote,char eschar,char *brkused, |
320 | int *next,char *quoted); |
321 | |
322 | char *super_fgets(char *b, int *blen, FILE *file) |
323 | { |
324 | int |
325 | c, |
326 | len; |
327 | |
328 | char |
329 | *q; |
330 | |
331 | len=*blen; |
332 | for (q=b; ; q++) |
333 | { |
334 | c=fgetc(file); |
335 | if (c == EOF(-1) || c == '\n') |
336 | break; |
337 | if (((long)q - (long)b + 1 ) >= (long) len) |
338 | { |
339 | long |
340 | tlen; |
341 | |
342 | tlen=(long)q-(long)b; |
343 | len<<=1; |
344 | b=(char *) realloc((char *) b,(len+2)); |
345 | if ((char *) b == (char *) NULL((void*)0)) |
346 | break; |
347 | q=b+tlen; |
348 | } |
349 | *q=(unsigned char) c; |
350 | } |
351 | *blen=0; |
352 | if ((unsigned char *)b != (unsigned char *) NULL((void*)0)) |
353 | { |
354 | int |
355 | tlen; |
356 | |
357 | tlen=(long)q - (long)b; |
358 | if (tlen == 0) |
359 | return (char *) NULL((void*)0); |
360 | b[tlen] = '\0'; |
361 | *blen=++tlen; |
362 | } |
363 | return b; |
364 | } |
365 | |
366 | #define BUFFER_SZ4096 4096 |
367 | |
368 | int main(int argc, char *argv[]) |
369 | { |
370 | unsigned int |
371 | length; |
372 | |
373 | unsigned char |
374 | *buffer; |
375 | |
376 | int |
377 | i, |
378 | mode; /* iptc binary, or iptc text */ |
379 | |
380 | FILE |
381 | *ifile = stdinstdin, |
382 | *ofile = stdoutstdout; |
383 | |
384 | char |
385 | c, |
386 | *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output"; |
387 | |
388 | if( argc < 2 ) |
389 | { |
390 | puts(usage); |
391 | return 1; |
392 | } |
393 | |
394 | mode = 0; |
395 | length = -1; |
Value stored to 'length' is never read | |
396 | buffer = (unsigned char *)NULL((void*)0); |
397 | |
398 | for (i=1; i<argc; i++) |
399 | { |
400 | c = argv[i][0]; |
401 | if (c == '-' || c == '/') |
402 | { |
403 | c = argv[i][1]; |
404 | switch( c ) |
405 | { |
406 | case 't': |
407 | mode = 1; |
408 | #ifdef WIN32 |
409 | /* Set "stdout" to binary mode: */ |
410 | _setmode( _fileno( ofile ), _O_BINARY ); |
411 | #endif |
412 | break; |
413 | case 'b': |
414 | mode = 0; |
415 | #ifdef WIN32 |
416 | /* Set "stdin" to binary mode: */ |
417 | _setmode( _fileno( ifile ), _O_BINARY ); |
418 | #endif |
419 | break; |
420 | case 'i': |
421 | if (mode == 0) |
422 | ifile = fopen(argv[++i], "rb"); |
423 | else |
424 | ifile = fopen(argv[++i], "rt"); |
425 | if (ifile == (FILE *)NULL((void*)0)) |
426 | { |
427 | printf("Unable to open: %s\n", argv[i]); |
428 | return 1; |
429 | } |
430 | break; |
431 | case 'o': |
432 | if (mode == 0) |
433 | ofile = fopen(argv[++i], "wt"); |
434 | else |
435 | ofile = fopen(argv[++i], "wb"); |
436 | if (ofile == (FILE *)NULL((void*)0)) |
437 | { |
438 | printf("Unable to open: %s\n", argv[i]); |
439 | return 1; |
440 | } |
441 | break; |
442 | default: |
443 | printf("Unknown option: %s\n", argv[i]); |
444 | return 1; |
445 | } |
446 | } |
447 | else |
448 | { |
449 | puts(usage); |
450 | return 1; |
451 | } |
452 | } |
453 | |
454 | if (mode == 0) /* handle binary iptc info */ |
455 | formatIPTC(ifile, ofile); |
456 | |
457 | if (mode == 1) /* handle text form of iptc info */ |
458 | { |
459 | char |
460 | brkused, |
461 | quoted, |
462 | *line, |
463 | *token, |
464 | *newstr; |
465 | |
466 | int |
467 | state, |
468 | next; |
469 | |
470 | unsigned char |
471 | recnum = 0, |
472 | dataset = 0; |
473 | |
474 | int |
475 | inputlen = BUFFER_SZ4096; |
476 | |
477 | line = (char *) malloc(inputlen); |
478 | token = (char *)NULL((void*)0); |
479 | while((line = super_fgets(line,&inputlen,ifile))!=NULL((void*)0)) |
480 | { |
481 | state=0; |
482 | next=0; |
483 | |
484 | token = (char *) malloc(inputlen); |
485 | newstr = (char *) malloc(inputlen); |
486 | while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0, |
487 | &brkused,&next,"ed)==0) |
488 | { |
489 | if (state == 0) |
490 | { |
491 | int |
492 | state, |
493 | next; |
494 | |
495 | char |
496 | brkused, |
497 | quoted; |
498 | |
499 | state=0; |
500 | next=0; |
501 | while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0, |
502 | &brkused, &next, "ed)==0) |
503 | { |
504 | if (state == 0) |
505 | dataset = (unsigned char) atoi(newstr); |
506 | else |
507 | if (state == 1) |
508 | recnum = (unsigned char) atoi(newstr); |
509 | state++; |
510 | } |
511 | } |
512 | else |
513 | if (state == 1) |
514 | { |
515 | int |
516 | next; |
517 | |
518 | unsigned long |
519 | len; |
520 | |
521 | char |
522 | brkused, |
523 | quoted; |
524 | |
525 | next=0; |
526 | len = strlen(token); |
527 | while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0, |
528 | &brkused, &next, "ed)==0) |
529 | { |
530 | if (brkused && next > 0) |
531 | { |
532 | char |
533 | *s = &token[next-1]; |
534 | |
535 | len -= convertHTMLcodes(s, strlen(s)); |
536 | } |
537 | } |
538 | |
539 | fputc(0x1c, ofile); |
540 | fputc(dataset, ofile); |
541 | fputc(recnum, ofile); |
542 | if (len < 0x10000) |
543 | { |
544 | fputc((len >> 8) & 255, ofile); |
545 | fputc(len & 255, ofile); |
546 | } |
547 | else |
548 | { |
549 | fputc(((len >> 24) & 255) | 0x80, ofile); |
550 | fputc((len >> 16) & 255, ofile); |
551 | fputc((len >> 8) & 255, ofile); |
552 | fputc(len & 255, ofile); |
553 | } |
554 | next=0; |
555 | while (len--) |
556 | fputc(token[next++], ofile); |
557 | } |
558 | state++; |
559 | } |
560 | free(token); |
561 | token = (char *)NULL((void*)0); |
562 | free(newstr); |
563 | newstr = (char *)NULL((void*)0); |
564 | } |
565 | free(line); |
566 | |
567 | fclose( ifile ); |
568 | fclose( ofile ); |
569 | } |
570 | |
571 | return 0; |
572 | } |
573 | |
574 | /* |
575 | This routine is a generalized, finite state token parser. It allows |
576 | you extract tokens one at a time from a string of characters. The |
577 | characters used for white space, for break characters, and for quotes |
578 | can be specified. Also, characters in the string can be preceded by |
579 | a specifiable escape character which removes any special meaning the |
580 | character may have. |
581 | |
582 | There are a lot of formal parameters in this subroutine call, but |
583 | once you get familiar with them, this routine is fairly easy to use. |
584 | "#define" macros can be used to generate simpler looking calls for |
585 | commonly used applications of this routine. |
586 | |
587 | First, some terminology: |
588 | |
589 | token: used here, a single unit of information in |
590 | the form of a group of characters. |
591 | |
592 | white space: space that gets ignored (except within quotes |
593 | or when escaped), like blanks and tabs. in |
594 | addition, white space terminates a non-quoted |
595 | token. |
596 | |
597 | break character: a character that separates non-quoted tokens. |
598 | commas are a common break character. the |
599 | usage of break characters to signal the end |
600 | of a token is the same as that of white space, |
601 | except multiple break characters with nothing |
602 | or only white space between generate a null |
603 | token for each two break characters together. |
604 | |
605 | for example, if blank is set to be the white |
606 | space and comma is set to be the break |
607 | character, the line ... |
608 | |
609 | A, B, C , , DEF |
610 | |
611 | ... consists of 5 tokens: |
612 | |
613 | 1) "A" |
614 | 2) "B" |
615 | 3) "C" |
616 | 4) "" (the null string) |
617 | 5) "DEF" |
618 | |
619 | quote character: a character that, when surrounding a group |
620 | of other characters, causes the group of |
621 | characters to be treated as a single token, |
622 | no matter how many white spaces or break |
623 | characters exist in the group. also, a |
624 | token always terminates after the closing |
625 | quote. for example, if ' is the quote |
626 | character, blank is white space, and comma |
627 | is the break character, the following |
628 | string ... |
629 | |
630 | A, ' B, CD'EF GHI |
631 | |
632 | ... consists of 4 tokens: |
633 | |
634 | 1) "A" |
635 | 2) " B, CD" (note the blanks & comma) |
636 | 3) "EF" |
637 | 4) "GHI" |
638 | |
639 | the quote characters themselves do |
640 | not appear in the resultant tokens. the |
641 | double quotes are delimiters i use here for |
642 | documentation purposes only. |
643 | |
644 | escape character: a character which itself is ignored but |
645 | which causes the next character to be |
646 | used as is. ^ and \ are often used as |
647 | escape characters. an escape in the last |
648 | position of the string gets treated as a |
649 | "normal" (i.e., non-quote, non-white, |
650 | non-break, and non-escape) character. |
651 | for example, assume white space, break |
652 | character, and quote are the same as in the |
653 | above examples, and further, assume that |
654 | ^ is the escape character. then, in the |
655 | string ... |
656 | |
657 | ABC, ' DEF ^' GH' I ^ J K^ L ^ |
658 | |
659 | ... there are 7 tokens: |
660 | |
661 | 1) "ABC" |
662 | 2) " DEF ' GH" |
663 | 3) "I" |
664 | 4) " " (a lone blank) |
665 | 5) "J" |
666 | 6) "K L" |
667 | 7) "^" (passed as is at end of line) |
668 | |
669 | |
670 | OK, now that you have this background, here's how to call "tokenizer": |
671 | |
672 | result=tokenizer(flag,token,maxtok,string,white,break,quote,escape, |
673 | brkused,next,quoted) |
674 | |
675 | result: 0 if we haven't reached EOS (end of string), and |
676 | 1 if we have (this is an "int"). |
677 | |
678 | flag: right now, only the low order 3 bits are used. |
679 | 1 => convert non-quoted tokens to upper case |
680 | 2 => convert non-quoted tokens to lower case |
681 | 0 => do not convert non-quoted tokens |
682 | (this is a "char"). |
683 | |
684 | token: a character string containing the returned next token |
685 | (this is a "char[]"). |
686 | |
687 | maxtok: the maximum size of "token". characters beyond |
688 | "maxtok" are truncated (this is an "int"). |
689 | |
690 | string: the string to be parsed (this is a "char[]"). |
691 | |
692 | white: a string of the valid white spaces. example: |
693 | |
694 | char whitesp[]={" \t"}; |
695 | |
696 | blank and tab will be valid white space (this is |
697 | a "char[]"). |
698 | |
699 | break: a string of the valid break characters. example: |
700 | |
701 | char breakch[]={";,"}; |
702 | |
703 | semicolon and comma will be valid break characters |
704 | (this is a "char[]"). |
705 | |
706 | IMPORTANT: do not use the name "break" as a C |
707 | variable, as this is a reserved word in C. |
708 | |
709 | quote: a string of the valid quote characters. an example |
710 | would be |
711 | |
712 | char whitesp[]={"'\""); |
713 | |
714 | (this causes single and double quotes to be valid) |
715 | note that a token starting with one of these characters |
716 | needs the same quote character to terminate it. |
717 | |
718 | for example, |
719 | |
720 | "ABC ' |
721 | |
722 | is unterminated, but |
723 | |
724 | "DEF" and 'GHI' |
725 | |
726 | are properly terminated. note that different quote |
727 | characters can appear on the same line; only for |
728 | a given token do the quote characters have to be |
729 | the same (this is a "char[]"). |
730 | |
731 | escape: the escape character (NOT a string ... only one |
732 | allowed). use zero if none is desired (this is |
733 | a "char"). |
734 | |
735 | brkused: the break character used to terminate the current |
736 | token. if the token was quoted, this will be the |
737 | quote used. if the token is the last one on the |
738 | line, this will be zero (this is a pointer to a |
739 | "char"). |
740 | |
741 | next: this variable points to the first character of the |
742 | next token. it gets reset by "tokenizer" as it steps |
743 | through the string. set it to 0 upon initialization, |
744 | and leave it alone after that. you can change it |
745 | if you want to jump around in the string or re-parse |
746 | from the beginning, but be careful (this is a |
747 | pointer to an "int"). |
748 | |
749 | quoted: set to 1 (true) if the token was quoted and 0 (false) |
750 | if not. you may need this information (for example: |
751 | in C, a string with quotes around it is a character |
752 | string, while one without is an identifier). |
753 | |
754 | (this is a pointer to a "char"). |
755 | */ |
756 | |
757 | /* states */ |
758 | |
759 | #define IN_WHITE0 0 |
760 | #define IN_TOKEN1 1 |
761 | #define IN_QUOTE2 2 |
762 | #define IN_OZONE3 3 |
763 | |
764 | int _p_state; /* current state */ |
765 | unsigned _p_flag; /* option flag */ |
766 | char _p_curquote; /* current quote char */ |
767 | int _p_tokpos; /* current token pos */ |
768 | |
769 | /* routine to find character in string ... used only by "tokenizer" */ |
770 | |
771 | int sindex(char ch,char *string) |
772 | { |
773 | char *cp; |
774 | for(cp=string;*cp;++cp) |
775 | if(ch==*cp) |
776 | return (int)(cp-string); /* return postion of character */ |
777 | return -1; /* eol ... no match found */ |
778 | } |
779 | |
780 | /* routine to store a character in a string ... used only by "tokenizer" */ |
781 | |
782 | void chstore(char *string,int max,char ch) |
783 | { |
784 | char c; |
785 | if(_p_tokpos>=0&&_p_tokpos<max-1) |
786 | { |
787 | if(_p_state==IN_QUOTE2) |
788 | c=ch; |
789 | else |
790 | switch(_p_flag&3) |
791 | { |
792 | case 1: /* convert to upper */ |
793 | c=toupper(ch)(__extension__ ({ int __res; if (sizeof (ch) > 1) { if (__builtin_constant_p (ch)) { int __c = (ch); __res = __c < -128 || __c > 255 ? __c : (*__ctype_toupper_loc ())[__c]; } else __res = toupper (ch); } else __res = (*__ctype_toupper_loc ())[(int) (ch)]; __res ; })); |
794 | break; |
795 | |
796 | case 2: /* convert to lower */ |
797 | c=tolower(ch)(__extension__ ({ int __res; if (sizeof (ch) > 1) { if (__builtin_constant_p (ch)) { int __c = (ch); __res = __c < -128 || __c > 255 ? __c : (*__ctype_tolower_loc ())[__c]; } else __res = tolower (ch); } else __res = (*__ctype_tolower_loc ())[(int) (ch)]; __res ; })); |
798 | break; |
799 | |
800 | default: /* use as is */ |
801 | c=ch; |
802 | break; |
803 | } |
804 | string[_p_tokpos++]=c; |
805 | } |
806 | return; |
807 | } |
808 | |
809 | int tokenizer(unsigned inflag,char *token,int tokmax,char *line, |
810 | char *white,char *brkchar,char *quote,char eschar,char *brkused, |
811 | int *next,char *quoted) |
812 | { |
813 | int qp; |
814 | char c,nc; |
815 | |
816 | *brkused=0; /* initialize to null */ |
817 | *quoted=0; /* assume not quoted */ |
818 | |
819 | if(!line[*next]) /* if we're at end of line, indicate such */ |
820 | return 1; |
821 | |
822 | _p_state=IN_WHITE0; /* initialize state */ |
823 | _p_curquote=0; /* initialize previous quote char */ |
824 | _p_flag=inflag; /* set option flag */ |
825 | |
826 | for(_p_tokpos=0;(c=line[*next]);++(*next)) /* main loop */ |
827 | { |
828 | if((qp=sindex(c,brkchar))>=0) /* break */ |
829 | { |
830 | switch(_p_state) |
831 | { |
832 | case IN_WHITE0: /* these are the same here ... */ |
833 | case IN_TOKEN1: /* ... just get out */ |
834 | case IN_OZONE3: /* ditto */ |
835 | ++(*next); |
836 | *brkused=brkchar[qp]; |
837 | goto byebye; |
838 | |
839 | case IN_QUOTE2: /* just keep going */ |
840 | chstore(token,tokmax,c); |
841 | break; |
842 | } |
843 | } |
844 | else if((qp=sindex(c,quote))>=0) /* quote */ |
845 | { |
846 | switch(_p_state) |
847 | { |
848 | case IN_WHITE0: /* these are identical, */ |
849 | _p_state=IN_QUOTE2; /* change states */ |
850 | _p_curquote=quote[qp]; /* save quote char */ |
851 | *quoted=1; /* set to true as long as something is in quotes */ |
852 | break; |
853 | |
854 | case IN_QUOTE2: |
855 | if(quote[qp]==_p_curquote) /* same as the beginning quote? */ |
856 | { |
857 | _p_state=IN_OZONE3; |
858 | _p_curquote=0; |
859 | } |
860 | else |
861 | chstore(token,tokmax,c); /* treat as regular char */ |
862 | break; |
863 | |
864 | case IN_TOKEN1: |
865 | case IN_OZONE3: |
866 | *brkused=c; /* uses quote as break char */ |
867 | goto byebye; |
868 | } |
869 | } |
870 | else if((qp=sindex(c,white))>=0) /* white */ |
871 | { |
872 | switch(_p_state) |
873 | { |
874 | case IN_WHITE0: |
875 | case IN_OZONE3: |
876 | break; /* keep going */ |
877 | |
878 | case IN_TOKEN1: |
879 | _p_state=IN_OZONE3; |
880 | break; |
881 | |
882 | case IN_QUOTE2: |
883 | chstore(token,tokmax,c); /* it's valid here */ |
884 | break; |
885 | } |
886 | } |
887 | else if(c==eschar) /* escape */ |
888 | { |
889 | nc=line[(*next)+1]; |
890 | if(nc==0) /* end of line */ |
891 | { |
892 | *brkused=0; |
893 | chstore(token,tokmax,c); |
894 | ++(*next); |
895 | goto byebye; |
896 | } |
897 | switch(_p_state) |
898 | { |
899 | case IN_WHITE0: |
900 | --(*next); |
901 | _p_state=IN_TOKEN1; |
902 | break; |
903 | |
904 | case IN_TOKEN1: |
905 | case IN_QUOTE2: |
906 | ++(*next); |
907 | chstore(token,tokmax,nc); |
908 | break; |
909 | |
910 | case IN_OZONE3: |
911 | goto byebye; |
912 | } |
913 | } |
914 | else /* anything else is just a real character */ |
915 | { |
916 | switch(_p_state) |
917 | { |
918 | case IN_WHITE0: |
919 | _p_state=IN_TOKEN1; /* switch states */ |
920 | |
921 | case IN_TOKEN1: /* these 2 are */ |
922 | case IN_QUOTE2: /* identical here */ |
923 | chstore(token,tokmax,c); |
924 | break; |
925 | |
926 | case IN_OZONE3: |
927 | goto byebye; |
928 | } |
929 | } |
930 | } /* end of main loop */ |
931 | |
932 | byebye: |
933 | token[_p_tokpos]=0; /* make sure token ends with EOS */ |
934 | |
935 | return 0; |
936 | } |
937 | /* |
938 | * Local Variables: |
939 | * mode: c |
940 | * c-basic-offset: 8 |
941 | * fill-column: 78 |
942 | * End: |
943 | */ |