Bug Summary

File:src/mod/xml_int/mod_xml_rpc/../../../../libs/xmlrpc-c/lib/expat/xmltok/xmltok.c
Location:line 1248, column 17
Description:Assigned value is garbage or undefined

Annotated Source Code

1/*
2Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3See the file copying.txt for copying permission.
4*/
5
6#include "xmlrpc_config.h"
7#include "bool.h"
8#include "xmldef.h"
9#include "xmltok.h"
10#include "nametab.h"
11
12#define IGNORE_SECTION_TOK_VTABLE, PREFIX(ignoreSectionTok) , PREFIX(ignoreSectionTok)
13
14#define VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
\
15 { PREFIX(prologTok), PREFIX(contentTok), \
16 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE, PREFIX(ignoreSectionTok) }, \
17 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
18 PREFIX(sameName), \
19 PREFIX(nameMatchesAscii), \
20 PREFIX(nameLength), \
21 PREFIX(skipS), \
22 PREFIX(getAtts), \
23 PREFIX(charRefNumber), \
24 PREFIX(predefinedEntityName), \
25 PREFIX(updatePosition), \
26 PREFIX(isPublicId)
27
28#define VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, PREFIX(toUtf8), PREFIX(toUtf16)
29
30#define UCS2_GET_NAMING(pages, hi, lo)(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] &
(1 << ((lo) & 0x1F)))
\
31 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
32
33/* A 2 byte UTF-8 representation splits the characters 11 bits
34between the bottom 5 and 6 bits of the bytes.
35We need 8 bits to index into pages, 3 bits to add to that index and
365 bits to generate the mask. */
37#define UTF8_GET_NAMING2(pages, byte)(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] <<
3) + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >>
5) & 1)] & (1 << (((byte)[1]) & 0x1F)))
\
38 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
39 + ((((byte)[0]) & 3) << 1) \
40 + ((((byte)[1]) >> 5) & 1)] \
41 & (1 << (((byte)[1]) & 0x1F)))
42
43/* A 3 byte UTF-8 representation splits the characters 16 bits
44between the bottom 4, 6 and 6 bits of the bytes.
45We need 8 bits to index into pages, 3 bits to add to that index and
465 bits to generate the mask. */
47#define UTF8_GET_NAMING3(pages, byte)(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) +
((((byte)[1]) >> 2) & 0xF)] << 3) + ((((byte
)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1
)] & (1 << (((byte)[2]) & 0x1F)))
\
48 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
49 + ((((byte)[1]) >> 2) & 0xF)] \
50 << 3) \
51 + ((((byte)[1]) & 3) << 1) \
52 + ((((byte)[2]) >> 5) & 1)] \
53 & (1 << (((byte)[2]) & 0x1F)))
54
55#define UTF8_GET_NAMING(pages, p, n)((n) == 2 ? (namingBitmap[((pages)[((((const unsigned char *)
(p))[0]) >> 2) & 7] << 3) + (((((const unsigned
char *)(p))[0]) & 3) << 1) + (((((const unsigned char
*)(p))[1]) >> 5) & 1)] & (1 << ((((const
unsigned char *)(p))[1]) & 0x1F))) : ((n) == 3 ? (namingBitmap
[((pages)[(((((const unsigned char *)(p))[0]) & 0xF) <<
4) + (((((const unsigned char *)(p))[1]) >> 2) & 0xF
)] << 3) + (((((const unsigned char *)(p))[1]) & 3)
<< 1) + (((((const unsigned char *)(p))[2]) >> 5
) & 1)] & (1 << ((((const unsigned char *)(p))[
2]) & 0x1F))) : 0))
\
56 ((n) == 2 \
57 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p))(namingBitmap[((pages)[((((const unsigned char *)(p))[0]) >>
2) & 7] << 3) + (((((const unsigned char *)(p))[0]
) & 3) << 1) + (((((const unsigned char *)(p))[1]) >>
5) & 1)] & (1 << ((((const unsigned char *)(p)
)[1]) & 0x1F)))
\
58 : ((n) == 3 \
59 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p))(namingBitmap[((pages)[(((((const unsigned char *)(p))[0]) &
0xF) << 4) + (((((const unsigned char *)(p))[1]) >>
2) & 0xF)] << 3) + (((((const unsigned char *)(p))
[1]) & 3) << 1) + (((((const unsigned char *)(p))[2
]) >> 5) & 1)] & (1 << ((((const unsigned
char *)(p))[2]) & 0x1F)))
\
60 : 0))
61
62#define UTF8_INVALID3(p)((*p) == 0xED ? (((p)[1] & 0x20) != 0) : ((*p) == 0xEF ? (
(p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE))
: 0))
\
63 ((*p) == 0xED \
64 ? (((p)[1] & 0x20) != 0) \
65 : ((*p) == 0xEF \
66 ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
67 : 0))
68
69#define UTF8_INVALID4(p)((*p) == 0xF4 && ((p)[1] & 0x30) != 0) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
70
71static
72int isNever(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p ATTR_UNUSED__attribute__((__unused__)))
73{
74 return 0;
75}
76
77static
78int utf8_isName2(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p)
79{
80 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p)(namingBitmap[((namePages)[((((const unsigned char *)p)[0]) >>
2) & 7] << 3) + (((((const unsigned char *)p)[0]) &
3) << 1) + (((((const unsigned char *)p)[1]) >> 5
) & 1)] & (1 << ((((const unsigned char *)p)[1]
) & 0x1F)))
;
81}
82
83static
84int utf8_isName3(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p)
85{
86 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p)(namingBitmap[((namePages)[(((((const unsigned char *)p)[0]) &
0xF) << 4) + (((((const unsigned char *)p)[1]) >>
2) & 0xF)] << 3) + (((((const unsigned char *)p)[1
]) & 3) << 1) + (((((const unsigned char *)p)[2]) >>
5) & 1)] & (1 << ((((const unsigned char *)p)[
2]) & 0x1F)))
;
87}
88
89#define utf8_isName4isNever isNever
90
91static
92int utf8_isNmstrt2(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p)
93{
94 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p)(namingBitmap[((nmstrtPages)[((((const unsigned char *)p)[0])
>> 2) & 7] << 3) + (((((const unsigned char *
)p)[0]) & 3) << 1) + (((((const unsigned char *)p)[
1]) >> 5) & 1)] & (1 << ((((const unsigned
char *)p)[1]) & 0x1F)))
;
95}
96
97static
98int utf8_isNmstrt3(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p)
99{
100 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p)(namingBitmap[((nmstrtPages)[(((((const unsigned char *)p)[0]
) & 0xF) << 4) + (((((const unsigned char *)p)[1]) >>
2) & 0xF)] << 3) + (((((const unsigned char *)p)[1
]) & 3) << 1) + (((((const unsigned char *)p)[2]) >>
5) & 1)] & (1 << ((((const unsigned char *)p)[
2]) & 0x1F)))
;
101}
102
103#define utf8_isNmstrt4isNever isNever
104
105#define utf8_isInvalid2isNever isNever
106
107static
108int utf8_isInvalid3(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p)
109{
110 return UTF8_INVALID3((const unsigned char *)p)((*(const unsigned char *)p) == 0xED ? ((((const unsigned char
*)p)[1] & 0x20) != 0) : ((*(const unsigned char *)p) == 0xEF
? (((const unsigned char *)p)[1] == 0xBF && (((const
unsigned char *)p)[2] == 0xBF || ((const unsigned char *)p)[
2] == 0xBE)) : 0))
;
111}
112
113static
114int utf8_isInvalid4(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *p)
115{
116 return UTF8_INVALID4((const unsigned char *)p)((*(const unsigned char *)p) == 0xF4 && (((const unsigned
char *)p)[1] & 0x30) != 0)
;
117}
118
119struct normal_encoding {
120 ENCODING enc;
121 unsigned char type[256];
122#ifdef XML_MIN_SIZE
123 int (*byteType)(const ENCODING *, const char *);
124 int (*isNameMin)(const ENCODING *, const char *);
125 int (*isNmstrtMin)(const ENCODING *, const char *);
126 int (*byteToAscii)(const ENCODING *, const char *);
127 int (*charMatches)(const ENCODING *, const char *, int);
128#endif /* XML_MIN_SIZE */
129 int (*isName2)(const ENCODING *, const char *);
130 int (*isName3)(const ENCODING *, const char *);
131 int (*isName4)(const ENCODING *, const char *);
132 int (*isNmstrt2)(const ENCODING *, const char *);
133 int (*isNmstrt3)(const ENCODING *, const char *);
134 int (*isNmstrt4)(const ENCODING *, const char *);
135 int (*isInvalid2)(const ENCODING *, const char *);
136 int (*isInvalid3)(const ENCODING *, const char *);
137 int (*isInvalid4)(const ENCODING *, const char *);
138};
139
140#ifdef XML_MIN_SIZE
141
142#define STANDARD_VTABLE(E) \
143 E ## byteType, \
144 E ## isNameMin, \
145 E ## isNmstrtMin, \
146 E ## byteToAscii, \
147 E ## charMatches,
148
149#else
150
151#define STANDARD_VTABLE(E) /* as nothing */
152
153#endif
154
155#define NORMAL_VTABLE(E)EisName2, EisName3, EisName4, EisNmstrt2, EisNmstrt3, EisNmstrt4
, EisInvalid2, EisInvalid3, EisInvalid4
\
156 E ## isName2, \
157 E ## isName3, \
158 E ## isName4, \
159 E ## isNmstrt2, \
160 E ## isNmstrt3, \
161 E ## isNmstrt4, \
162 E ## isInvalid2, \
163 E ## isInvalid3, \
164 E ## isInvalid4
165
166#define NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
\
167 NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)
168
169static int checkCharRefNumber(int);
170
171#include "xmltok_impl.h"
172#include "ascii.h"
173
174#ifdef XML_MIN_SIZE
175#define sb_isNameMin isNever
176#define sb_isNmstrtMin isNever
177#endif
178
179#ifdef XML_MIN_SIZE
180#define MINBPC(enc) ((enc)->minBytesPerChar)
181#else
182/* minimum bytes per character */
183#define MINBPC(enc) 1
184#endif
185
186#define SB_BYTE_TYPE(enc, p)(((struct normal_encoding *)(enc))->type[(unsigned char)*(
p)])
\
187 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
188
189#ifdef XML_MIN_SIZE
190static
191int sb_byteType(const ENCODING *enc, const char *p)
192{
193 return SB_BYTE_TYPE(enc, p)(((struct normal_encoding *)(enc))->type[(unsigned char)*(
p)])
;
194}
195#define BYTE_TYPE(enc, p) \
196 (((const struct normal_encoding *)(enc))->byteType(enc, p))
197#else
198#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)(((struct normal_encoding *)(enc))->type[(unsigned char)*(
p)])
199#endif
200
201#ifdef XML_MIN_SIZE
202#define BYTE_TO_ASCII(enc, p) \
203 (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))
204static
205int sb_byteToAscii(const ENCODING *enc, const char *p)
206{
207 return *p;
208}
209#else
210#define BYTE_TO_ASCII(enc, p) (*(p))
211#endif
212
213#define IS_NAME_CHAR(enc, p, n) \
214 (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
215#define IS_NMSTRT_CHAR(enc, p, n) \
216 (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
217#define IS_INVALID_CHAR(enc, p, n) \
218 (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
219
220#ifdef XML_MIN_SIZE
221#define IS_NAME_CHAR_MINBPC(enc, p) \
222 (((const struct normal_encoding *)(enc))->isNameMin(enc, p))
223#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
224 (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))
225#else
226#define IS_NAME_CHAR_MINBPC(enc, p) (0)
227#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
228#endif
229
230#ifdef XML_MIN_SIZE
231#define CHAR_MATCHES(enc, p, c) \
232 (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))
233static
234int sb_charMatches(const ENCODING *enc, const char *p, int c)
235{
236 return *p == c;
237}
238#else
239/* c is an ASCII character */
240#define CHAR_MATCHES(enc, p, c) (*(p) == c)
241#endif
242
243#define PREFIX(ident) normal_ ## ident
244#include "xmltok_impl.c"
245
246#undef MINBPC
247#undef BYTE_TYPE
248#undef BYTE_TO_ASCII
249#undef CHAR_MATCHES
250#undef IS_NAME_CHAR
251#undef IS_NAME_CHAR_MINBPC
252#undef IS_NMSTRT_CHAR
253#undef IS_NMSTRT_CHAR_MINBPC
254#undef IS_INVALID_CHAR
255
256enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
257 UTF8_cval1 = 0x00,
258 UTF8_cval2 = 0xc0,
259 UTF8_cval3 = 0xe0,
260 UTF8_cval4 = 0xf0
261};
262
263static
264void utf8_toUtf8(const ENCODING * enc ATTR_UNUSED__attribute__((__unused__)),
265 const char **fromP, const char *fromLim,
266 char **toP, const char *toLim)
267{
268 char *to;
269 const char *from;
270 if (fromLim - *fromP > toLim - *toP) {
271 /* Avoid copying partial characters. */
272 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
273 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
274 break;
275 }
276 for (to = *toP, from = *fromP; from != fromLim; from++, to++)
277 *to = *from;
278 *fromP = from;
279 *toP = to;
280}
281
282static
283void utf8_toUtf16(const ENCODING *enc,
284 const char **fromP, const char *fromLim,
285 unsigned short **toP, const unsigned short *toLim)
286{
287 unsigned short *to = *toP;
288 const char *from = *fromP;
289 while (from != fromLim && to != toLim) {
290 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
291 case BT_LEAD2:
292 *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
293 from += 2;
294 break;
295 case BT_LEAD3:
296 *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
297 from += 3;
298 break;
299 case BT_LEAD4:
300 {
301 unsigned long n;
302 if (to + 1 == toLim)
303 break;
304 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
305 n -= 0x10000;
306 to[0] = (unsigned short)((n >> 10) | 0xD800);
307 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
308 to += 2;
309 from += 4;
310 }
311 break;
312 default:
313 *to++ = *from++;
314 break;
315 }
316 }
317 *fromP = from;
318 *toP = to;
319}
320
321static const struct normal_encoding utf8_encoding_ns = {
322 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
323 {
324#include "asciitab.h"
325#include "utf8tab.h"
326 },
327 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)utf8_isName2, utf8_isName3, isNever, utf8_isNmstrt2, utf8_isNmstrt3
, isNever, isNever, utf8_isInvalid3, utf8_isInvalid4
328};
329
330static const struct normal_encoding utf8_encoding = {
331 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
332 {
333#define BT_COLON BT_NMSTRT
334#include "asciitab.h"
335#undef BT_COLON
336#include "utf8tab.h"
337 },
338 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)utf8_isName2, utf8_isName3, isNever, utf8_isNmstrt2, utf8_isNmstrt3
, isNever, isNever, utf8_isInvalid3, utf8_isInvalid4
339};
340
341static const struct normal_encoding internal_utf8_encoding_ns = {
342 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
343 {
344#include "iasciitab.h"
345#include "utf8tab.h"
346 },
347 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)utf8_isName2, utf8_isName3, isNever, utf8_isNmstrt2, utf8_isNmstrt3
, isNever, isNever, utf8_isInvalid3, utf8_isInvalid4
348};
349
350static const struct normal_encoding internal_utf8_encoding = {
351 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
352 {
353#define BT_COLON BT_NMSTRT
354#include "iasciitab.h"
355#undef BT_COLON
356#include "utf8tab.h"
357 },
358 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)utf8_isName2, utf8_isName3, isNever, utf8_isNmstrt2, utf8_isNmstrt3
, isNever, isNever, utf8_isInvalid3, utf8_isInvalid4
359};
360
361
362
363static void
364latin1_toUtf8(const ENCODING * const enc ATTR_UNUSED__attribute__((__unused__)),
365 const char ** const fromP,
366 const char * const fromLim,
367 char ** const toP,
368 const char * const toLim) {
369/*----------------------------------------------------------------------------
370 Convert the Latin1 string that starts at *fromP and ends at 'fromLim'
371 to UTF8 in the buffer that starts at *toP and ends at 'toLim'.
372
373 Go from left to right and stop when the output buffer is full.
374
375 Note that the buffer can be full while still having a byte left in it
376 because a Latin1 character may require two bytes of the output buffer.
377
378 Leave *fromP and *toP pointing after the last character converted.
379-----------------------------------------------------------------------------*/
380 bool bufferIsFull;
381
382 for (bufferIsFull = false; *fromP != fromLim && !bufferIsFull;) {
383 unsigned char const c = (unsigned char)**fromP;
384 if (c & 0x80) {
385 if (toLim - *toP < 2)
386 bufferIsFull = true;
387 else {
388 *(*toP)++ = ((c >> 6) | UTF8_cval2);
389 *(*toP)++ = ((c & 0x3f) | 0x80);
390 ++(*fromP);
391 }
392 } else {
393 if (*toP == toLim)
394 bufferIsFull = true;
395 else
396 *(*toP)++ = *(*fromP)++;
397 }
398 }
399}
400
401
402
403static
404void latin1_toUtf16(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)),
405 const char **fromP, const char *fromLim,
406 unsigned short **toP, const unsigned short *toLim)
407{
408 while (*fromP != fromLim && *toP != toLim)
409 *(*toP)++ = (unsigned char)*(*fromP)++;
410}
411
412static const struct normal_encoding latin1_encoding_ns = {
413 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
414 {
415#include "asciitab.h"
416#include "latin1tab.h"
417 },
418 STANDARD_VTABLE(sb_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
419};
420
421static const struct normal_encoding latin1_encoding = {
422 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
423 {
424#define BT_COLON BT_NMSTRT
425#include "asciitab.h"
426#undef BT_COLON
427#include "latin1tab.h"
428 },
429 STANDARD_VTABLE(sb_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
430};
431
432static
433void ascii_toUtf8(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)),
434 const char **fromP, const char *fromLim,
435 char **toP, const char *toLim)
436{
437 while (*fromP != fromLim && *toP != toLim)
438 *(*toP)++ = *(*fromP)++;
439}
440
441static const struct normal_encoding ascii_encoding_ns = {
442 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
443 {
444#include "asciitab.h"
445/* BT_NONXML == 0 */
446 },
447 STANDARD_VTABLE(sb_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
448};
449
450static const struct normal_encoding ascii_encoding = {
451 { VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
452 {
453#define BT_COLON BT_NMSTRT
454#include "asciitab.h"
455#undef BT_COLON
456/* BT_NONXML == 0 */
457 },
458 STANDARD_VTABLE(sb_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
459};
460
461static int unicode_byte_type(char hi, char lo)
462{
463 switch ((unsigned char)hi) {
464 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
465 return BT_LEAD4;
466 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
467 return BT_TRAIL;
468 case 0xFF:
469 switch ((unsigned char)lo) {
470 case 0xFF:
471 case 0xFE:
472 return BT_NONXML;
473 }
474 break;
475 }
476 return BT_NONASCII;
477}
478
479#define DEFINE_UTF16_TO_UTF8(E)static void EtoUtf8(const ENCODING *enc __attribute__((__unused__
)), const char **fromP, const char *fromLim, char **toP, const
char *toLim) { const char *from; for (from = *fromP; from !=
fromLim; from += 2) { int plane; unsigned char lo2; unsigned
char lo = GET_LO(from); unsigned char hi = GET_HI(from); switch
(hi) { case 0: if (lo < 0x80) { if (*toP == toLim) { *fromP
= from; return; } *(*toP)++ = lo; break; } case 0x1: case 0x2
: case 0x3: case 0x4: case 0x5: case 0x6: case 0x7: if (toLim
- *toP < 2) { *fromP = from; return; } *(*toP)++ = ((lo >>
6) | (hi << 2) | UTF8_cval2); *(*toP)++ = ((lo & 0x3f
) | 0x80); break; default: if (toLim - *toP < 3) { *fromP =
from; return; } *(*toP)++ = ((hi >> 4) | UTF8_cval3); *
(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80
); *(*toP)++ = ((lo & 0x3f) | 0x80); break; case 0xD8: case
0xD9: case 0xDA: case 0xDB: if (toLim - *toP < 4) { *fromP
= from; return; } plane = (((hi & 0x3) << 2) | ((lo
>> 6) & 0x3)) + 1; *(*toP)++ = ((plane >> 2)
| UTF8_cval4); *(*toP)++ = (((lo >> 2) & 0xF) | ((
plane & 0x3) << 4) | 0x80); from += 2; lo2 = GET_LO
(from); *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from
) & 0x3) << 2) | (lo2 >> 6) | 0x80); *(*toP)++
= ((lo2 & 0x3f) | 0x80); break; } } *fromP = from; }
\
480static \
481void E ## toUtf8(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), \
482 const char **fromP, const char *fromLim, \
483 char **toP, const char *toLim) \
484{ \
485 const char *from; \
486 for (from = *fromP; from != fromLim; from += 2) { \
487 int plane; \
488 unsigned char lo2; \
489 unsigned char lo = GET_LO(from); \
490 unsigned char hi = GET_HI(from); \
491 switch (hi) { \
492 case 0: \
493 if (lo < 0x80) { \
494 if (*toP == toLim) { \
495 *fromP = from; \
496 return; \
497 } \
498 *(*toP)++ = lo; \
499 break; \
500 } \
501 /* fall through */ \
502 case 0x1: case 0x2: case 0x3: \
503 case 0x4: case 0x5: case 0x6: case 0x7: \
504 if (toLim - *toP < 2) { \
505 *fromP = from; \
506 return; \
507 } \
508 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
509 *(*toP)++ = ((lo & 0x3f) | 0x80); \
510 break; \
511 default: \
512 if (toLim - *toP < 3) { \
513 *fromP = from; \
514 return; \
515 } \
516 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
517 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
518 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
519 *(*toP)++ = ((lo & 0x3f) | 0x80); \
520 break; \
521 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
522 if (toLim - *toP < 4) { \
523 *fromP = from; \
524 return; \
525 } \
526 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
527 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
528 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
529 from += 2; \
530 lo2 = GET_LO(from); \
531 *(*toP)++ = (((lo & 0x3) << 4) \
532 | ((GET_HI(from) & 0x3) << 2) \
533 | (lo2 >> 6) \
534 | 0x80); \
535 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
536 break; \
537 } \
538 } \
539 *fromP = from; \
540}
541
542#define DEFINE_UTF16_TO_UTF16(E)static void EtoUtf16(const ENCODING *enc __attribute__((__unused__
)), const char **fromP, const char *fromLim, unsigned short *
*toP, const unsigned short *toLim) { if (fromLim - *fromP >
((toLim - *toP) << 1) && (GET_HI(fromLim - 2) &
0xF8) == 0xD8) fromLim -= 2; for (; *fromP != fromLim &&
*toP != toLim; *fromP += 2) *(*toP)++ = (GET_HI(*fromP) <<
8) | GET_LO(*fromP); }
\
543static \
544void E ## toUtf16(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), \
545 const char **fromP, const char *fromLim, \
546 unsigned short **toP, const unsigned short *toLim) \
547{ \
548 /* Avoid copying first half only of surrogate */ \
549 if (fromLim - *fromP > ((toLim - *toP) << 1) \
550 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
551 fromLim -= 2; \
552 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
553 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
554}
555
556#define SET2(ptr, ch) \
557 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
558#define GET_LO(ptr) ((unsigned char)(ptr)[0])
559#define GET_HI(ptr) ((unsigned char)(ptr)[1])
560
561DEFINE_UTF16_TO_UTF8(little2_)static void little2_toUtf8(const ENCODING *enc __attribute__(
(__unused__)), const char **fromP, const char *fromLim, char *
*toP, const char *toLim) { const char *from; for (from = *fromP
; from != fromLim; from += 2) { int plane; unsigned char lo2;
unsigned char lo = GET_LO(from); unsigned char hi = GET_HI(from
); switch (hi) { case 0: if (lo < 0x80) { if (*toP == toLim
) { *fromP = from; return; } *(*toP)++ = lo; break; } case 0x1
: case 0x2: case 0x3: case 0x4: case 0x5: case 0x6: case 0x7:
if (toLim - *toP < 2) { *fromP = from; return; } *(*toP)++
= ((lo >> 6) | (hi << 2) | UTF8_cval2); *(*toP)++
= ((lo & 0x3f) | 0x80); break; default: if (toLim - *toP
< 3) { *fromP = from; return; } *(*toP)++ = ((hi >>
4) | UTF8_cval3); *(*toP)++ = (((hi & 0xf) << 2) |
(lo >> 6) | 0x80); *(*toP)++ = ((lo & 0x3f) | 0x80
); break; case 0xD8: case 0xD9: case 0xDA: case 0xDB: if (toLim
- *toP < 4) { *fromP = from; return; } plane = (((hi &
0x3) << 2) | ((lo >> 6) & 0x3)) + 1; *(*toP)
++ = ((plane >> 2) | UTF8_cval4); *(*toP)++ = (((lo >>
2) & 0xF) | ((plane & 0x3) << 4) | 0x80); from
+= 2; lo2 = GET_LO(from); *(*toP)++ = (((lo & 0x3) <<
4) | ((GET_HI(from) & 0x3) << 2) | (lo2 >> 6
) | 0x80); *(*toP)++ = ((lo2 & 0x3f) | 0x80); break; } } *
fromP = from; }
562DEFINE_UTF16_TO_UTF16(little2_)static void little2_toUtf16(const ENCODING *enc __attribute__
((__unused__)), const char **fromP, const char *fromLim, unsigned
short **toP, const unsigned short *toLim) { if (fromLim - *fromP
> ((toLim - *toP) << 1) && (GET_HI(fromLim -
2) & 0xF8) == 0xD8) fromLim -= 2; for (; *fromP != fromLim
&& *toP != toLim; *fromP += 2) *(*toP)++ = (GET_HI(*
fromP) << 8) | GET_LO(*fromP); }
563
564#undef SET2
565#undef GET_LO
566#undef GET_HI
567
568#define SET2(ptr, ch) \
569 (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
570#define GET_LO(ptr) ((unsigned char)(ptr)[1])
571#define GET_HI(ptr) ((unsigned char)(ptr)[0])
572
573DEFINE_UTF16_TO_UTF8(big2_)static void big2_toUtf8(const ENCODING *enc __attribute__((__unused__
)), const char **fromP, const char *fromLim, char **toP, const
char *toLim) { const char *from; for (from = *fromP; from !=
fromLim; from += 2) { int plane; unsigned char lo2; unsigned
char lo = GET_LO(from); unsigned char hi = GET_HI(from); switch
(hi) { case 0: if (lo < 0x80) { if (*toP == toLim) { *fromP
= from; return; } *(*toP)++ = lo; break; } case 0x1: case 0x2
: case 0x3: case 0x4: case 0x5: case 0x6: case 0x7: if (toLim
- *toP < 2) { *fromP = from; return; } *(*toP)++ = ((lo >>
6) | (hi << 2) | UTF8_cval2); *(*toP)++ = ((lo & 0x3f
) | 0x80); break; default: if (toLim - *toP < 3) { *fromP =
from; return; } *(*toP)++ = ((hi >> 4) | UTF8_cval3); *
(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80
); *(*toP)++ = ((lo & 0x3f) | 0x80); break; case 0xD8: case
0xD9: case 0xDA: case 0xDB: if (toLim - *toP < 4) { *fromP
= from; return; } plane = (((hi & 0x3) << 2) | ((lo
>> 6) & 0x3)) + 1; *(*toP)++ = ((plane >> 2)
| UTF8_cval4); *(*toP)++ = (((lo >> 2) & 0xF) | ((
plane & 0x3) << 4) | 0x80); from += 2; lo2 = GET_LO
(from); *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from
) & 0x3) << 2) | (lo2 >> 6) | 0x80); *(*toP)++
= ((lo2 & 0x3f) | 0x80); break; } } *fromP = from; }
574DEFINE_UTF16_TO_UTF16(big2_)static void big2_toUtf16(const ENCODING *enc __attribute__((__unused__
)), const char **fromP, const char *fromLim, unsigned short *
*toP, const unsigned short *toLim) { if (fromLim - *fromP >
((toLim - *toP) << 1) && (GET_HI(fromLim - 2) &
0xF8) == 0xD8) fromLim -= 2; for (; *fromP != fromLim &&
*toP != toLim; *fromP += 2) *(*toP)++ = (GET_HI(*fromP) <<
8) | GET_LO(*fromP); }
575
576#undef SET2
577#undef GET_LO
578#undef GET_HI
579
580#define LITTLE2_BYTE_TYPE(enc, p)((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned
char)*(p)] : unicode_byte_type((p)[1], (p)[0]))
\
581 ((p)[1] == 0 \
582 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
583 : unicode_byte_type((p)[1], (p)[0]))
584#define LITTLE2_BYTE_TO_ASCII(enc, p)((p)[1] == 0 ? (p)[0] : -1) ((p)[1] == 0 ? (p)[0] : -1)
585#define LITTLE2_CHAR_MATCHES(enc, p, c)((p)[1] == 0 && (p)[0] == c) ((p)[1] == 0 && (p)[0] == c)
586#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)(namingBitmap[(namePages[(unsigned char)p[1]] << 3) + (
((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
\
587 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])(namingBitmap[(namePages[(unsigned char)p[1]] << 3) + (
((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
588#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)(namingBitmap[(nmstrtPages[(unsigned char)p[1]] << 3) +
(((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
\
589 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])(namingBitmap[(nmstrtPages[(unsigned char)p[1]] << 3) +
(((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
590
591#ifdef XML_MIN_SIZE
592
593static
594int little2_byteType(const ENCODING *enc, const char *p)
595{
596 return LITTLE2_BYTE_TYPE(enc, p)((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned
char)*(p)] : unicode_byte_type((p)[1], (p)[0]))
;
597}
598
599static
600int little2_byteToAscii(const ENCODING *enc, const char *p)
601{
602 return LITTLE2_BYTE_TO_ASCII(enc, p)((p)[1] == 0 ? (p)[0] : -1);
603}
604
605static
606int little2_charMatches(const ENCODING *enc, const char *p, int c)
607{
608 return LITTLE2_CHAR_MATCHES(enc, p, c)((p)[1] == 0 && (p)[0] == c);
609}
610
611static
612int little2_isNameMin(const ENCODING *enc, const char *p)
613{
614 return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)(namingBitmap[(namePages[(unsigned char)p[1]] << 3) + (
((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
;
615}
616
617static
618int little2_isNmstrtMin(const ENCODING *enc, const char *p)
619{
620 return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)(namingBitmap[(nmstrtPages[(unsigned char)p[1]] << 3) +
(((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
;
621}
622
623#undef VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
624#define VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, little2_toUtf8, little2_toUtf16
625
626#else /* not XML_MIN_SIZE */
627
628#undef PREFIX
629#define PREFIX(ident) little2_ ## ident
630#define MINBPC(enc) 2
631/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
632#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned
char)*(p)] : unicode_byte_type((p)[1], (p)[0]))
633#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)((p)[1] == 0 ? (p)[0] : -1)
634#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)((p)[1] == 0 && (p)[0] == c)
635#define IS_NAME_CHAR(enc, p, n) 0
636#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)(namingBitmap[(namePages[(unsigned char)p[1]] << 3) + (
((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
637#define IS_NMSTRT_CHAR(enc, p, n) (0)
638#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)(namingBitmap[(nmstrtPages[(unsigned char)p[1]] << 3) +
(((unsigned char)p[0]) >> 5)] & (1 << (((unsigned
char)p[0]) & 0x1F)))
639
640#include "xmltok_impl.c"
641
642#undef MINBPC
643#undef BYTE_TYPE
644#undef BYTE_TO_ASCII
645#undef CHAR_MATCHES
646#undef IS_NAME_CHAR
647#undef IS_NAME_CHAR_MINBPC
648#undef IS_NMSTRT_CHAR
649#undef IS_NMSTRT_CHAR_MINBPC
650#undef IS_INVALID_CHAR
651
652#endif /* not XML_MIN_SIZE */
653
654static const struct normal_encoding little2_encoding_ns = {
655 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0,
656#if XML_BYTE_ORDER == 12
657 1
658#else
659 0
660#endif
661 },
662 {
663#include "asciitab.h"
664#include "latin1tab.h"
665 },
666 STANDARD_VTABLE(little2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
667};
668
669static const struct normal_encoding little2_encoding = {
670 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0,
671#if XML_BYTE_ORDER == 12
672 1
673#else
674 0
675#endif
676 },
677 {
678#define BT_COLON BT_NMSTRT
679#include "asciitab.h"
680#undef BT_COLON
681#include "latin1tab.h"
682 },
683 STANDARD_VTABLE(little2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
684};
685
686#if XML_BYTE_ORDER != 21
687
688static const struct normal_encoding internal_little2_encoding_ns = {
689 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0, 1 },
690 {
691#include "iasciitab.h"
692#include "latin1tab.h"
693 },
694 STANDARD_VTABLE(little2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
695};
696
697static const struct normal_encoding internal_little2_encoding = {
698 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0, 1 },
699 {
700#define BT_COLON BT_NMSTRT
701#include "iasciitab.h"
702#undef BT_COLON
703#include "latin1tab.h"
704 },
705 STANDARD_VTABLE(little2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
706};
707
708#endif
709
710
711#define BIG2_BYTE_TYPE(enc, p)((p)[0] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned
char)(p)[1]] : unicode_byte_type((p)[0], (p)[1]))
\
712 ((p)[0] == 0 \
713 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
714 : unicode_byte_type((p)[0], (p)[1]))
715#define BIG2_BYTE_TO_ASCII(enc, p)((p)[0] == 0 ? (p)[1] : -1) ((p)[0] == 0 ? (p)[1] : -1)
716#define BIG2_CHAR_MATCHES(enc, p, c)((p)[0] == 0 && (p)[1] == c) ((p)[0] == 0 && (p)[1] == c)
717#define BIG2_IS_NAME_CHAR_MINBPC(enc, p)(namingBitmap[(namePages[(unsigned char)p[0]] << 3) + (
((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
\
718 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])(namingBitmap[(namePages[(unsigned char)p[0]] << 3) + (
((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
719#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)(namingBitmap[(nmstrtPages[(unsigned char)p[0]] << 3) +
(((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
\
720 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])(namingBitmap[(nmstrtPages[(unsigned char)p[0]] << 3) +
(((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
721
722#ifdef XML_MIN_SIZE
723
724static
725int big2_byteType(const ENCODING *enc, const char *p)
726{
727 return BIG2_BYTE_TYPE(enc, p)((p)[0] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned
char)(p)[1]] : unicode_byte_type((p)[0], (p)[1]))
;
728}
729
730static
731int big2_byteToAscii(const ENCODING *enc, const char *p)
732{
733 return BIG2_BYTE_TO_ASCII(enc, p)((p)[0] == 0 ? (p)[1] : -1);
734}
735
736static
737int big2_charMatches(const ENCODING *enc, const char *p, int c)
738{
739 return BIG2_CHAR_MATCHES(enc, p, c)((p)[0] == 0 && (p)[1] == c);
740}
741
742static
743int big2_isNameMin(const ENCODING *enc, const char *p)
744{
745 return BIG2_IS_NAME_CHAR_MINBPC(enc, p)(namingBitmap[(namePages[(unsigned char)p[0]] << 3) + (
((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
;
746}
747
748static
749int big2_isNmstrtMin(const ENCODING *enc, const char *p)
750{
751 return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)(namingBitmap[(nmstrtPages[(unsigned char)p[0]] << 3) +
(((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
;
752}
753
754#undef VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
755#define VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
VTABLE1{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId)
, big2_toUtf8, big2_toUtf16
756
757#else /* not XML_MIN_SIZE */
758
759#undef PREFIX
760#define PREFIX(ident) big2_ ## ident
761#define MINBPC(enc) 2
762/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
763#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)((p)[0] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned
char)(p)[1]] : unicode_byte_type((p)[0], (p)[1]))
764#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)((p)[0] == 0 ? (p)[1] : -1)
765#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)((p)[0] == 0 && (p)[1] == c)
766#define IS_NAME_CHAR(enc, p, n) 0
767#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)(namingBitmap[(namePages[(unsigned char)p[0]] << 3) + (
((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
768#define IS_NMSTRT_CHAR(enc, p, n) (0)
769#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)(namingBitmap[(nmstrtPages[(unsigned char)p[0]] << 3) +
(((unsigned char)p[1]) >> 5)] & (1 << (((unsigned
char)p[1]) & 0x1F)))
770
771#include "xmltok_impl.c"
772
773#undef MINBPC
774#undef BYTE_TYPE
775#undef BYTE_TO_ASCII
776#undef CHAR_MATCHES
777#undef IS_NAME_CHAR
778#undef IS_NAME_CHAR_MINBPC
779#undef IS_NMSTRT_CHAR
780#undef IS_NMSTRT_CHAR_MINBPC
781#undef IS_INVALID_CHAR
782
783#endif /* not XML_MIN_SIZE */
784
785static const struct normal_encoding big2_encoding_ns = {
786 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0,
787#if XML_BYTE_ORDER == 21
788 1
789#else
790 0
791#endif
792 },
793 {
794#include "asciitab.h"
795#include "latin1tab.h"
796 },
797 STANDARD_VTABLE(big2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
798};
799
800static const struct normal_encoding big2_encoding = {
801 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0,
802#if XML_BYTE_ORDER == 21
803 1
804#else
805 0
806#endif
807 },
808 {
809#define BT_COLON BT_NMSTRT
810#include "asciitab.h"
811#undef BT_COLON
812#include "latin1tab.h"
813 },
814 STANDARD_VTABLE(big2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
815};
816
817#if XML_BYTE_ORDER != 12
818
819static const struct normal_encoding internal_big2_encoding_ns = {
820 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0, 1 },
821 {
822#include "iasciitab.h"
823#include "latin1tab.h"
824 },
825 STANDARD_VTABLE(big2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
826};
827
828static const struct normal_encoding internal_big2_encoding = {
829 { VTABLE{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok
) , PREFIX(ignoreSectionTok) }, { PREFIX(attributeValueTok), PREFIX
(entityValueTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii
), PREFIX(nameLength), PREFIX(skipS), PREFIX(getAtts), PREFIX
(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition
), PREFIX(isPublicId), PREFIX(toUtf8), PREFIX(toUtf16)
, 2, 0, 1 },
830 {
831#define BT_COLON BT_NMSTRT
832#include "iasciitab.h"
833#undef BT_COLON
834#include "latin1tab.h"
835 },
836 STANDARD_VTABLE(big2_) NULL_NORMAL_VTABLE((void*)0), ((void*)0), ((void*)0), ((void*)0), ((void*)0), (
(void*)0), ((void*)0), ((void*)0), ((void*)0)
837};
838
839#endif
840
841#undef PREFIX
842
843static
844int streqci(const char *s1, const char *s2)
845{
846 for (;;) {
847 char c1 = *s1++;
848 char c2 = *s2++;
849 if (ASCII_a0x61 <= c1 && c1 <= ASCII_z0x7A)
850 c1 += ASCII_A0x41 - ASCII_a0x61;
851 if (ASCII_a0x61 <= c2 && c2 <= ASCII_z0x7A)
852 c2 += ASCII_A0x41 - ASCII_a0x61;
853 if (c1 != c2)
854 return 0;
855 if (!c1)
856 break;
857 }
858 return 1;
859}
860
861static
862void initUpdatePosition(const ENCODING *enc ATTR_UNUSED__attribute__((__unused__)), const char *ptr,
863 const char *end, POSITION *pos)
864{
865 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
866}
867
868static
869int toAscii(const ENCODING *enc, const char *ptr, const char *end)
870{
871 char buf[1];
872 char *p = buf;
873 XmlUtf8Convert(enc, &ptr, end, &p, p + 1)(((enc)->utf8Convert)(enc, &ptr, end, &p, p + 1));
874 if (p == buf)
875 return -1;
876 else
877 return buf[0];
878}
879
880static
881int isSpace(int c)
882{
883 switch (c) {
884 case 0x20:
885 case 0xD:
886 case 0xA:
887 case 0x9:
888 return 1;
889 }
890 return 0;
891}
892
893/* Return 1 if there's just optional white space
894or there's an S followed by name=val. */
895static
896int parsePseudoAttribute(const ENCODING *enc,
897 const char *ptr,
898 const char *end,
899 const char **namePtr,
900 const char **nameEndPtr,
901 const char **valPtr,
902 const char **nextTokPtr)
903{
904 int c;
905 char open;
906 if (ptr == end) {
907 *namePtr = 0;
908 return 1;
909 }
910 if (!isSpace(toAscii(enc, ptr, end))) {
911 *nextTokPtr = ptr;
912 return 0;
913 }
914 do {
915 ptr += enc->minBytesPerChar;
916 } while (isSpace(toAscii(enc, ptr, end)));
917 if (ptr == end) {
918 *namePtr = 0;
919 return 1;
920 }
921 *namePtr = ptr;
922 for (;;) {
923 c = toAscii(enc, ptr, end);
924 if (c == -1) {
925 *nextTokPtr = ptr;
926 return 0;
927 }
928 if (c == ASCII_EQUALS0x3D) {
929 *nameEndPtr = ptr;
930 break;
931 }
932 if (isSpace(c)) {
933 *nameEndPtr = ptr;
934 do {
935 ptr += enc->minBytesPerChar;
936 } while (isSpace(c = toAscii(enc, ptr, end)));
937 if (c != ASCII_EQUALS0x3D) {
938 *nextTokPtr = ptr;
939 return 0;
940 }
941 break;
942 }
943 ptr += enc->minBytesPerChar;
944 }
945 if (ptr == *namePtr) {
946 *nextTokPtr = ptr;
947 return 0;
948 }
949 ptr += enc->minBytesPerChar;
950 c = toAscii(enc, ptr, end);
951 while (isSpace(c)) {
952 ptr += enc->minBytesPerChar;
953 c = toAscii(enc, ptr, end);
954 }
955 if (c != ASCII_QUOT0x22 && c != ASCII_APOS0x27) {
956 *nextTokPtr = ptr;
957 return 0;
958 }
959 open = c;
960 ptr += enc->minBytesPerChar;
961 *valPtr = ptr;
962 for (;; ptr += enc->minBytesPerChar) {
963 c = toAscii(enc, ptr, end);
964 if (c == open)
965 break;
966 if (!(ASCII_a0x61 <= c && c <= ASCII_z0x7A)
967 && !(ASCII_A0x41 <= c && c <= ASCII_Z0x5A)
968 && !(ASCII_00x30 <= c && c <= ASCII_90x39)
969 && c != ASCII_PERIOD0x2E
970 && c != ASCII_MINUS0x2D
971 && c != ASCII_UNDERSCORE0x5F) {
972 *nextTokPtr = ptr;
973 return 0;
974 }
975 }
976 *nextTokPtr = ptr + enc->minBytesPerChar;
977 return 1;
978}
979
980static const char KW_version[] = {
981 ASCII_v0x76, ASCII_e0x65, ASCII_r0x72, ASCII_s0x73, ASCII_i0x69, ASCII_o0x6F, ASCII_n0x6E, '\0'
982};
983
984static const char KW_encoding[] = {
985 ASCII_e0x65, ASCII_n0x6E, ASCII_c0x63, ASCII_o0x6F, ASCII_d0x64, ASCII_i0x69, ASCII_n0x6E, ASCII_g0x67, '\0'
986};
987
988static const char KW_standalone[] = {
989 ASCII_s0x73, ASCII_t0x74, ASCII_a0x61, ASCII_n0x6E, ASCII_d0x64, ASCII_a0x61, ASCII_l0x6C, ASCII_o0x6F, ASCII_n0x6E, ASCII_e0x65, '\0'
990};
991
992static const char KW_yes[] = {
993 ASCII_y0x79, ASCII_e0x65, ASCII_s0x73, '\0'
994};
995
996static const char KW_no[] = {
997 ASCII_n0x6E, ASCII_o0x6F, '\0'
998};
999
1000static
1001int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
1002 const char *,
1003 const char *),
1004 int isGeneralTextEntity,
1005 const ENCODING *enc,
1006 const char *ptr,
1007 const char *end,
1008 const char **badPtr,
1009 const char **versionPtr,
1010 const char **encodingName,
1011 const ENCODING **encoding,
1012 int *standalone)
1013{
1014 const char *val = 0;
1015 const char *name = 0;
1016 const char *nameEnd = 0;
1017 ptr += 5 * enc->minBytesPerChar;
1018 end -= 2 * enc->minBytesPerChar;
1019 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) {
1020 *badPtr = ptr;
1021 return 0;
1022 }
1023 if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)(((enc)->nameMatchesAscii)(enc, name, nameEnd, KW_version)
)
) {
1024 if (!isGeneralTextEntity) {
1025 *badPtr = name;
1026 return 0;
1027 }
1028 }
1029 else {
1030 if (versionPtr)
1031 *versionPtr = val;
1032 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1033 *badPtr = ptr;
1034 return 0;
1035 }
1036 if (!name) {
1037 if (isGeneralTextEntity) {
1038 /* a TextDecl must have an EncodingDecl */
1039 *badPtr = ptr;
1040 return 0;
1041 }
1042 return 1;
1043 }
1044 }
1045 if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)(((enc)->nameMatchesAscii)(enc, name, nameEnd, KW_encoding
))
) {
1046 int c = toAscii(enc, val, end);
1047 if (!(ASCII_a0x61 <= c && c <= ASCII_z0x7A) && !(ASCII_A0x41 <= c && c <= ASCII_Z0x5A)) {
1048 *badPtr = val;
1049 return 0;
1050 }
1051 if (encodingName)
1052 *encodingName = val;
1053 if (encoding)
1054 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1055 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1056 *badPtr = ptr;
1057 return 0;
1058 }
1059 if (!name)
1060 return 1;
1061 }
1062 if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)(((enc)->nameMatchesAscii)(enc, name, nameEnd, KW_standalone
))
|| isGeneralTextEntity) {
1063 *badPtr = name;
1064 return 0;
1065 }
1066 if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)(((enc)->nameMatchesAscii)(enc, val, ptr - enc->minBytesPerChar
, KW_yes))
) {
1067 if (standalone)
1068 *standalone = 1;
1069 }
1070 else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)(((enc)->nameMatchesAscii)(enc, val, ptr - enc->minBytesPerChar
, KW_no))
) {
1071 if (standalone)
1072 *standalone = 0;
1073 }
1074 else {
1075 *badPtr = val;
1076 return 0;
1077 }
1078 while (isSpace(toAscii(enc, ptr, end)))
1079 ptr += enc->minBytesPerChar;
1080 if (ptr != end) {
1081 *badPtr = ptr;
1082 return 0;
1083 }
1084 return 1;
1085}
1086
1087static
1088int checkCharRefNumber(int result)
1089{
1090 switch (result >> 8) {
1091 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
1092 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
1093 return -1;
1094 case 0:
1095 if (latin1_encoding.type[result] == BT_NONXML)
1096 return -1;
1097 break;
1098 case 0xFF:
1099 if (result == 0xFFFE || result == 0xFFFF)
1100 return -1;
1101 break;
1102 }
1103 return result;
1104}
1105
1106
1107
1108int
1109xmlrpc_XmlUtf8Encode(int const c,
1110 char * const buf) {
1111
1112 enum {
1113 /* minN is minimum legal resulting value for N byte sequence */
1114 min2 = 0x80,
1115 min3 = 0x800,
1116 min4 = 0x10000
1117 };
1118
1119 if (c < 0)
1120 return 0;
1121 if (c < min2) {
1122 buf[0] = (c | UTF8_cval1);
1123 return 1;
1124 }
1125 if (c < min3) {
1126 buf[0] = ((c >> 6) | UTF8_cval2);
1127 buf[1] = ((c & 0x3f) | 0x80);
1128 return 2;
1129 }
1130 if (c < min4) {
1131 buf[0] = ((c >> 12) | UTF8_cval3);
1132 buf[1] = (((c >> 6) & 0x3f) | 0x80);
1133 buf[2] = ((c & 0x3f) | 0x80);
1134 return 3;
1135 }
1136 if (c < 0x110000) {
1137 buf[0] = ((c >> 18) | UTF8_cval4);
1138 buf[1] = (((c >> 12) & 0x3f) | 0x80);
1139 buf[2] = (((c >> 6) & 0x3f) | 0x80);
1140 buf[3] = ((c & 0x3f) | 0x80);
1141 return 4;
1142 }
1143 return 0;
1144}
1145
1146
1147
1148int
1149xmlrpc_XmlUtf16Encode(int const charNumArg,
1150 unsigned short * const buf) {
1151
1152 int charNum;
1153
1154 charNum = charNumArg; /* initial value */
1155
1156 if (charNum < 0)
1157 return 0;
1158 if (charNum < 0x10000) {
1159 buf[0] = charNum;
1160 return 1;
1161 }
1162 if (charNum < 0x110000) {
1163 charNum -= 0x10000;
1164 buf[0] = (charNum >> 10) + 0xD800;
1165 buf[1] = (charNum & 0x3FF) + 0xDC00;
1166 return 2;
1167 }
1168 return 0;
1169}
1170
1171
1172
1173struct unknown_encoding {
1174 struct normal_encoding normal;
1175 int (*convert)(void *userData, const char *p);
1176 void *userData;
1177 unsigned short utf16[256];
1178 char utf8[256][4];
1179};
1180
1181
1182
1183int
1184xmlrpc_XmlSizeOfUnknownEncoding(void) {
1185
1186 return sizeof(struct unknown_encoding);
1187}
1188
1189
1190
1191static
1192int unknown_isName(const ENCODING *enc, const char *p)
1193{
1194 int c = ((const struct unknown_encoding *)enc)
1195 ->convert(((const struct unknown_encoding *)enc)->userData, p);
1196 if (c & ~0xFFFF)
1197 return 0;
1198 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF)(namingBitmap[(namePages[c >> 8] << 3) + ((c &
0xFF) >> 5)] & (1 << ((c & 0xFF) & 0x1F
)))
;
1199}
1200
1201static
1202int unknown_isNmstrt(const ENCODING *enc, const char *p)
1203{
1204 int c = ((const struct unknown_encoding *)enc)
1205 ->convert(((const struct unknown_encoding *)enc)->userData, p);
1206 if (c & ~0xFFFF)
1207 return 0;
1208 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF)(namingBitmap[(nmstrtPages[c >> 8] << 3) + ((c &
0xFF) >> 5)] & (1 << ((c & 0xFF) & 0x1F
)))
;
1209}
1210
1211static
1212int unknown_isInvalid(const ENCODING *enc, const char *p)
1213{
1214 int c = ((const struct unknown_encoding *)enc)
1215 ->convert(((const struct unknown_encoding *)enc)->userData, p);
1216 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1217}
1218
1219static
1220void unknown_toUtf8(const ENCODING *enc,
1221 const char **fromP, const char *fromLim,
1222 char **toP, const char *toLim)
1223{
1224 char buf[XML_UTF8_ENCODE_MAX4];
1225 for (;;) {
1
Loop condition is true. Entering loop body
1226 const char *utf8;
1227 int n;
1228 if (*fromP == fromLim)
2
Taking false branch
1229 break;
1230 utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
1231 n = *utf8++;
1232 if (n == 0) {
3
Assuming 'n' is equal to 0
4
Taking true branch
1233 int c = ((const struct unknown_encoding *)enc)
1234 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
1235 n = xmlrpc_XmlUtf8Encode(c, buf);
1236 if (n > toLim - *toP)
5
Taking false branch
1237 break;
1238 utf8 = buf;
1239 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
1240 - (BT_LEAD2 - 2);
1241 }
1242 else {
1243 if (n > toLim - *toP)
1244 break;
1245 (*fromP)++;
1246 }
1247 do {
1248 *(*toP)++ = *utf8++;
6
Assigned value is garbage or undefined
1249 } while (--n != 0);
1250 }
1251}
1252
1253static
1254void unknown_toUtf16(const ENCODING *enc,
1255 const char **fromP, const char *fromLim,
1256 unsigned short **toP, const unsigned short *toLim)
1257{
1258 while (*fromP != fromLim && *toP != toLim) {
1259 unsigned short c
1260 = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
1261 if (c == 0) {
1262 c = (unsigned short)((const struct unknown_encoding *)enc)
1263 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
1264 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
1265 - (BT_LEAD2 - 2);
1266 }
1267 else
1268 (*fromP)++;
1269 *(*toP)++ = c;
1270 }
1271}
1272
1273ENCODING *
1274xmlrpc_XmlInitUnknownEncoding(void * const mem,
1275 int * const table,
1276 int (*convert)(void *userData, const char *p),
1277 void * const userData) {
1278
1279 int i;
1280 struct unknown_encoding *e = mem;
1281 for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
1282 ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
1283 for (i = 0; i < 128; i++)
1284 if (latin1_encoding.type[i] != BT_OTHER
1285 && latin1_encoding.type[i] != BT_NONXML
1286 && table[i] != i)
1287 return 0;
1288 for (i = 0; i < 256; i++) {
1289 int c = table[i];
1290 if (c == -1) {
1291 e->normal.type[i] = BT_MALFORM;
1292 /* This shouldn't really get used. */
1293 e->utf16[i] = 0xFFFF;
1294 e->utf8[i][0] = 1;
1295 e->utf8[i][1] = 0;
1296 }
1297 else if (c < 0) {
1298 if (c < -4)
1299 return 0;
1300 e->normal.type[i] = BT_LEAD2 - (c + 2);
1301 e->utf8[i][0] = 0;
1302 e->utf16[i] = 0;
1303 }
1304 else if (c < 0x80) {
1305 if (latin1_encoding.type[c] != BT_OTHER
1306 && latin1_encoding.type[c] != BT_NONXML
1307 && c != i)
1308 return 0;
1309 e->normal.type[i] = latin1_encoding.type[c];
1310 e->utf8[i][0] = 1;
1311 e->utf8[i][1] = (char)c;
1312 e->utf16[i] = c == 0 ? 0xFFFF : c;
1313 }
1314 else if (checkCharRefNumber(c) < 0) {
1315 e->normal.type[i] = BT_NONXML;
1316 /* This shouldn't really get used. */
1317 e->utf16[i] = 0xFFFF;
1318 e->utf8[i][0] = 1;
1319 e->utf8[i][1] = 0;
1320 }
1321 else {
1322 if (c > 0xFFFF)
1323 return 0;
1324 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)(namingBitmap[(nmstrtPages[c >> 8] << 3) + ((c &
0xff) >> 5)] & (1 << ((c & 0xff) & 0x1F
)))
)
1325 e->normal.type[i] = BT_NMSTRT;
1326 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)(namingBitmap[(namePages[c >> 8] << 3) + ((c &
0xff) >> 5)] & (1 << ((c & 0xff) & 0x1F
)))
)
1327 e->normal.type[i] = BT_NAME;
1328 else
1329 e->normal.type[i] = BT_OTHER;
1330 e->utf8[i][0] = (char)xmlrpc_XmlUtf8Encode(c, e->utf8[i] + 1);
1331 e->utf16[i] = c;
1332 }
1333 }
1334 e->userData = userData;
1335 e->convert = convert;
1336 if (convert) {
1337 e->normal.isName2 = unknown_isName;
1338 e->normal.isName3 = unknown_isName;
1339 e->normal.isName4 = unknown_isName;
1340 e->normal.isNmstrt2 = unknown_isNmstrt;
1341 e->normal.isNmstrt3 = unknown_isNmstrt;
1342 e->normal.isNmstrt4 = unknown_isNmstrt;
1343 e->normal.isInvalid2 = unknown_isInvalid;
1344 e->normal.isInvalid3 = unknown_isInvalid;
1345 e->normal.isInvalid4 = unknown_isInvalid;
1346 }
1347 e->normal.enc.utf8Convert = unknown_toUtf8;
1348 e->normal.enc.utf16Convert = unknown_toUtf16;
1349 return &(e->normal.enc);
1350}
1351
1352/* If this enumeration is changed, getEncodingIndex and encodings
1353must also be changed. */
1354enum {
1355 UNKNOWN_ENC = -1,
1356 ISO_8859_1_ENC = 0,
1357 US_ASCII_ENC,
1358 UTF_8_ENC,
1359 UTF_16_ENC,
1360 UTF_16BE_ENC,
1361 UTF_16LE_ENC,
1362 /* must match encodingNames up to here */
1363 NO_ENC
1364};
1365
1366static const char KW_ISO_8859_1[] = {
1367 ASCII_I0x49, ASCII_S0x53, ASCII_O0x4F, ASCII_MINUS0x2D, ASCII_80x38, ASCII_80x38, ASCII_50x35, ASCII_90x39, ASCII_MINUS0x2D, ASCII_10x31, '\0'
1368};
1369static const char KW_US_ASCII[] = {
1370 ASCII_U0x55, ASCII_S0x53, ASCII_MINUS0x2D, ASCII_A0x41, ASCII_S0x53, ASCII_C0x43, ASCII_I0x49, ASCII_I0x49, '\0'
1371};
1372static const char KW_UTF_8[] = {
1373 ASCII_U0x55, ASCII_T0x54, ASCII_F0x46, ASCII_MINUS0x2D, ASCII_80x38, '\0'
1374};
1375static const char KW_UTF_16[] = {
1376 ASCII_U0x55, ASCII_T0x54, ASCII_F0x46, ASCII_MINUS0x2D, ASCII_10x31, ASCII_60x36, '\0'
1377};
1378static const char KW_UTF_16BE[] = {
1379 ASCII_U0x55, ASCII_T0x54, ASCII_F0x46, ASCII_MINUS0x2D, ASCII_10x31, ASCII_60x36, ASCII_B0x42, ASCII_E0x45, '\0'
1380};
1381static const char KW_UTF_16LE[] = {
1382 ASCII_U0x55, ASCII_T0x54, ASCII_F0x46, ASCII_MINUS0x2D, ASCII_10x31, ASCII_60x36, ASCII_L0x4C, ASCII_E0x45, '\0'
1383};
1384
1385static
1386int getEncodingIndex(const char *name)
1387{
1388 static const char *encodingNames[] = {
1389 KW_ISO_8859_1,
1390 KW_US_ASCII,
1391 KW_UTF_8,
1392 KW_UTF_16,
1393 KW_UTF_16BE,
1394 KW_UTF_16LE,
1395 };
1396 int i;
1397 if (name == 0)
1398 return NO_ENC;
1399 for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
1400 if (streqci(name, encodingNames[i]))
1401 return i;
1402 return UNKNOWN_ENC;
1403}
1404
1405/* For binary compatibility, we store the index of the encoding specified
1406at initialization in the isUtf16 member. */
1407
1408#define INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) ((int)(enc)->initEnc.isUtf16)
1409#define SET_INIT_ENC_INDEX(enc, i)((enc)->initEnc.isUtf16 = (char)i) ((enc)->initEnc.isUtf16 = (char)i)
1410
1411/* This is what detects the encoding.
1412encodingTable maps from encoding indices to encodings;
1413INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
1414state is XML_CONTENT_STATE if we're parsing an external text entity,
1415and XML_PROLOG_STATE otherwise.
1416*/
1417
1418
1419static
1420int initScan(const ENCODING **encodingTable,
1421 const INIT_ENCODING *enc,
1422 int state,
1423 const char *ptr,
1424 const char *end,
1425 const char **nextTokPtr)
1426{
1427 const ENCODING **encPtr;
1428
1429 if (ptr == end)
1430 return XML_TOK_NONE-4;
1431 encPtr = enc->encPtr;
1432 if (ptr + 1 == end) {
1433 /* only a single byte available for auto-detection */
1434 /* so we're parsing an external text entity... */
1435 /* if UTF-16 was externally specified, then we need at least 2 bytes */
1436 switch (INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16)) {
1437 case UTF_16_ENC:
1438 case UTF_16LE_ENC:
1439 case UTF_16BE_ENC:
1440 return XML_TOK_PARTIAL-1;
1441 }
1442 switch ((unsigned char)*ptr) {
1443 case 0xFE:
1444 case 0xFF:
1445 case 0xEF: /* possibly first byte of UTF-8 BOM */
1446 if (INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) == ISO_8859_1_ENC
1447 && state == XML_CONTENT_STATE1)
1448 break;
1449 /* fall through */
1450 case 0x00:
1451 case 0x3C:
1452 return XML_TOK_PARTIAL-1;
1453 }
1454 }
1455 else {
1456 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1457 case 0xFEFF:
1458 if (INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) == ISO_8859_1_ENC
1459 && state == XML_CONTENT_STATE1)
1460 break;
1461 *nextTokPtr = ptr + 2;
1462 *encPtr = encodingTable[UTF_16BE_ENC];
1463 return XML_TOK_BOM14;
1464 /* 00 3C is handled in the default case */
1465 case 0x3C00:
1466 if ((INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) == UTF_16BE_ENC
1467 || INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) == UTF_16_ENC)
1468 && state == XML_CONTENT_STATE1)
1469 break;
1470 *encPtr = encodingTable[UTF_16LE_ENC];
1471 return XmlTok(*encPtr, state, ptr, end, nextTokPtr)(((*encPtr)->scanners[state])(*encPtr, ptr, end, nextTokPtr
))
;
1472 case 0xFFFE:
1473 if (INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) == ISO_8859_1_ENC
1474 && state == XML_CONTENT_STATE1)
1475 break;
1476 *nextTokPtr = ptr + 2;
1477 *encPtr = encodingTable[UTF_16LE_ENC];
1478 return XML_TOK_BOM14;
1479 case 0xEFBB:
1480 /* Maybe a UTF-8 BOM (EF BB BF) */
1481 /* If there's an explicitly specified (external) encoding
1482 of ISO-8859-1 or some flavour of UTF-16
1483 and this is an external text entity,
1484 don't look for the BOM,
1485 because it might be a legal data. */
1486 if (state == XML_CONTENT_STATE1) {
1487 int e = INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16);
1488 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
1489 break;
1490 }
1491 if (ptr + 2 == end)
1492 return XML_TOK_PARTIAL-1;
1493 if ((unsigned char)ptr[2] == 0xBF) {
1494 *encPtr = encodingTable[UTF_8_ENC];
1495 return XML_TOK_BOM14;
1496 }
1497 break;
1498 default:
1499 if (ptr[0] == '\0') {
1500 /* 0 isn't a legal data character. Furthermore a document entity can only
1501 start with ASCII characters. So the only way this can fail to be big-endian
1502 UTF-16 if it it's an external parsed general entity that's labelled as
1503 UTF-16LE. */
1504 if (state == XML_CONTENT_STATE1 && INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16) == UTF_16LE_ENC)
1505 break;
1506 *encPtr = encodingTable[UTF_16BE_ENC];
1507 return XmlTok(*encPtr, state, ptr, end, nextTokPtr)(((*encPtr)->scanners[state])(*encPtr, ptr, end, nextTokPtr
))
;
1508 }
1509 else if (ptr[1] == '\0') {
1510 /* We could recover here in the case:
1511 - parsing an external entity
1512 - second byte is 0
1513 - no externally specified encoding
1514 - no encoding declaration
1515 by assuming UTF-16LE. But we don't, because this would mean when
1516 presented just with a single byte, we couldn't reliably determine
1517 whether we needed further bytes. */
1518 if (state == XML_CONTENT_STATE1)
1519 break;
1520 *encPtr = encodingTable[UTF_16LE_ENC];
1521 return XmlTok(*encPtr, state, ptr, end, nextTokPtr)(((*encPtr)->scanners[state])(*encPtr, ptr, end, nextTokPtr
))
;
1522 }
1523 break;
1524 }
1525 }
1526 *encPtr = encodingTable[INIT_ENC_INDEX(enc)((int)(enc)->initEnc.isUtf16)];
1527 return XmlTok(*encPtr, state, ptr, end, nextTokPtr)(((*encPtr)->scanners[state])(*encPtr, ptr, end, nextTokPtr
))
;
1528}
1529
1530
1531#define NS(x) x
1532#define ns(x) x
1533#include "xmltok_ns.c"
1534#undef NS
1535#undef ns
1536
1537#define NS(x) x ## NS
1538#define ns(x) x ## _ns
1539
1540#include "xmltok_ns.c"
1541
1542#undef NS
1543#undef ns
1544
1545ENCODING *
1546xmlrpc_XmlInitUnknownEncodingNS(void * const mem,
1547 int * const table,
1548 int (*convert)(void *userData, const char *p),
1549 void * const userData) {
1550
1551 ENCODING * const enc =
1552 xmlrpc_XmlInitUnknownEncoding(mem, table, convert, userData);
1553 if (enc)
1554 ((struct normal_encoding *)enc)->type[ASCII_COLON0x3A] = BT_COLON;
1555 return enc;
1556}