1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2002      Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
17    Copyright (c) 2016      Don Lewis <truckman@apache.org>
18    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
19    Copyright (c) 2017      Alexander Bluhm <alexander.bluhm@gmx.net>
20    Copyright (c) 2017      Benbuck Nason <bnason@netflix.com>
21    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
22    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
23    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
24    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
25    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
26    Copyright (c) 2023      Hanno Böck <hanno@gentoo.org>
27    Licensed under the MIT license:
28 
29    Permission is  hereby granted,  free of charge,  to any  person obtaining
30    a  copy  of  this  software   and  associated  documentation  files  (the
31    "Software"),  to  deal in  the  Software  without restriction,  including
32    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
33    distribute, sublicense, and/or sell copies of the Software, and to permit
34    persons  to whom  the Software  is  furnished to  do so,  subject to  the
35    following conditions:
36 
37    The above copyright  notice and this permission notice  shall be included
38    in all copies or substantial portions of the Software.
39 
40    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
41    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
42    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
43    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
44    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
45    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
46    USE OR OTHER DEALINGS IN THE SOFTWARE.
47 */
48 
49 #include "../../lv_conf_internal.h"
50 #if LV_USE_XML
51 
52 #include "expat_config.h"
53 
54 #include <stddef.h>
55 #include <string.h> /* memcpy */
56 #include <stdbool.h>
57 
58 #ifdef _WIN32
59 #  include "winconfig.h"
60 #endif
61 
62 #include "expat_external.h"
63 #include "internal.h"
64 #include "xmltok.h"
65 #include "nametab.h"
66 
67 #ifdef XML_DTD
68 #  define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
69 #else
70 #  define IGNORE_SECTION_TOK_VTABLE /* as nothing */
71 #endif
72 
73 #define VTABLE1                                                                \
74   {PREFIX(prologTok), PREFIX(contentTok),                                      \
75    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE},                         \
76       {PREFIX(attributeValueTok), PREFIX(entityValueTok)},                     \
77       PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS),             \
78       PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName),    \
79       PREFIX(updatePosition), PREFIX(isPublicId)
80 
81 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
82 
83 #define UCS2_GET_NAMING(pages, hi, lo)                                         \
84   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
85 
86 /* A 2 byte UTF-8 representation splits the characters 11 bits between
87    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
88    pages, 3 bits to add to that index and 5 bits to generate the mask.
89 */
90 #define UTF8_GET_NAMING2(pages, byte)                                          \
91   (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3)                         \
92                 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)]         \
93    & (1u << (((byte)[1]) & 0x1F)))
94 
95 /* A 3 byte UTF-8 representation splits the characters 16 bits between
96    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
97    into pages, 3 bits to add to that index and 5 bits to generate the
98    mask.
99 */
100 #define UTF8_GET_NAMING3(pages, byte)                                          \
101   (namingBitmap                                                                \
102        [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)]      \
103          << 3)                                                                 \
104         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
105    & (1u << (((byte)[2]) & 0x1F)))
106 
107 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
108    of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/
109    with the additional restriction of not allowing the Unicode
110    code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
111    Implementation details:
112      (A & 0x80) == 0     means A < 0x80
113    and
114      (A & 0xC0) == 0xC0  means A > 0xBF
115 */
116 
117 #define UTF8_INVALID2(p)                                                       \
118   ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
119 
120 #define UTF8_INVALID3(p)                                                       \
121   (((p)[2] & 0x80) == 0                                                        \
122    || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD                          \
123                                       : ((p)[2] & 0xC0) == 0xC0)               \
124    || ((*p) == 0xE0                                                            \
125            ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0                          \
126            : ((p)[1] & 0x80) == 0                                              \
127                  || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
128 
129 #define UTF8_INVALID4(p)                                                       \
130   (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0     \
131    || ((p)[2] & 0xC0) == 0xC0                                                  \
132    || ((*p) == 0xF0                                                            \
133            ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0                          \
134            : ((p)[1] & 0x80) == 0                                              \
135                  || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
136 
137 static int PTRFASTCALL
isNever(const ENCODING * enc,const char * p)138 isNever(const ENCODING *enc, const char *p) {
139   UNUSED_P(enc);
140   UNUSED_P(p);
141   return 0;
142 }
143 
144 static int PTRFASTCALL
utf8_isName2(const ENCODING * enc,const char * p)145 utf8_isName2(const ENCODING *enc, const char *p) {
146   UNUSED_P(enc);
147   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
148 }
149 
150 static int PTRFASTCALL
utf8_isName3(const ENCODING * enc,const char * p)151 utf8_isName3(const ENCODING *enc, const char *p) {
152   UNUSED_P(enc);
153   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
154 }
155 
156 #define utf8_isName4 isNever
157 
158 static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING * enc,const char * p)159 utf8_isNmstrt2(const ENCODING *enc, const char *p) {
160   UNUSED_P(enc);
161   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
162 }
163 
164 static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING * enc,const char * p)165 utf8_isNmstrt3(const ENCODING *enc, const char *p) {
166   UNUSED_P(enc);
167   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
168 }
169 
170 #define utf8_isNmstrt4 isNever
171 
172 static int PTRFASTCALL
utf8_isInvalid2(const ENCODING * enc,const char * p)173 utf8_isInvalid2(const ENCODING *enc, const char *p) {
174   UNUSED_P(enc);
175   return UTF8_INVALID2((const unsigned char *)p);
176 }
177 
178 static int PTRFASTCALL
utf8_isInvalid3(const ENCODING * enc,const char * p)179 utf8_isInvalid3(const ENCODING *enc, const char *p) {
180   UNUSED_P(enc);
181   return UTF8_INVALID3((const unsigned char *)p);
182 }
183 
184 static int PTRFASTCALL
utf8_isInvalid4(const ENCODING * enc,const char * p)185 utf8_isInvalid4(const ENCODING *enc, const char *p) {
186   UNUSED_P(enc);
187   return UTF8_INVALID4((const unsigned char *)p);
188 }
189 
190 struct normal_encoding {
191   ENCODING enc;
192   unsigned char type[256];
193 #ifdef XML_MIN_SIZE
194   int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
195   int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
196   int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
197   int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
198   int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
199 #endif /* XML_MIN_SIZE */
200   int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
201   int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
202   int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
203   int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
204   int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
205   int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
206   int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
207   int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
208   int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
209 };
210 
211 #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
212 
213 #ifdef XML_MIN_SIZE
214 
215 #  define STANDARD_VTABLE(E)                                                   \
216     E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
217 
218 #else
219 
220 #  define STANDARD_VTABLE(E) /* as nothing */
221 
222 #endif
223 
224 #define NORMAL_VTABLE(E)                                                       \
225   E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3,              \
226       E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
227 
228 #define NULL_VTABLE                                                            \
229   /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL,                  \
230       /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL,        \
231       /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
232 
233 static int FASTCALL checkCharRefNumber(int result);
234 
235 #include "xmltok_impl.h"
236 #include "ascii.h"
237 
238 #ifdef XML_MIN_SIZE
239 #  define sb_isNameMin isNever
240 #  define sb_isNmstrtMin isNever
241 #endif
242 
243 #ifdef XML_MIN_SIZE
244 #  define MINBPC(enc) ((enc)->minBytesPerChar)
245 #else
246 /* minimum bytes per character */
247 #  define MINBPC(enc) 1
248 #endif
249 
250 #define SB_BYTE_TYPE(enc, p)                                                   \
251   (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
252 
253 #ifdef XML_MIN_SIZE
254 static int PTRFASTCALL
sb_byteType(const ENCODING * enc,const char * p)255 sb_byteType(const ENCODING *enc, const char *p) {
256   return SB_BYTE_TYPE(enc, p);
257 }
258 #  define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
259 #else
260 #  define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
261 #endif
262 
263 #ifdef XML_MIN_SIZE
264 #  define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
265 static int PTRFASTCALL
sb_byteToAscii(const ENCODING * enc,const char * p)266 sb_byteToAscii(const ENCODING *enc, const char *p) {
267   UNUSED_P(enc);
268   return *p;
269 }
270 #else
271 #  define BYTE_TO_ASCII(enc, p) (*(p))
272 #endif
273 
274 #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
275 #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
276 #ifdef XML_MIN_SIZE
277 #  define IS_INVALID_CHAR(enc, p, n)                                           \
278     (AS_NORMAL_ENCODING(enc)->isInvalid##n                                     \
279      && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
280 #else
281 #  define IS_INVALID_CHAR(enc, p, n)                                           \
282     (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
283 #endif
284 
285 #ifdef XML_MIN_SIZE
286 #  define IS_NAME_CHAR_MINBPC(enc, p)                                          \
287     (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
288 #  define IS_NMSTRT_CHAR_MINBPC(enc, p)                                        \
289     (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
290 #else
291 #  define IS_NAME_CHAR_MINBPC(enc, p) (0)
292 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
293 #endif
294 
295 #ifdef XML_MIN_SIZE
296 #  define CHAR_MATCHES(enc, p, c)                                              \
297     (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
298 static int PTRCALL
sb_charMatches(const ENCODING * enc,const char * p,int c)299 sb_charMatches(const ENCODING *enc, const char *p, int c) {
300   UNUSED_P(enc);
301   return *p == c;
302 }
303 #else
304 /* c is an ASCII character */
305 #  define CHAR_MATCHES(enc, p, c) (*(p) == (c))
306 #endif
307 
308 #define PREFIX(ident) normal_##ident
309 #define XML_TOK_IMPL_C
310 #include "xmltok_impl.c"
311 #undef XML_TOK_IMPL_C
312 
313 #undef MINBPC
314 #undef BYTE_TYPE
315 #undef BYTE_TO_ASCII
316 #undef CHAR_MATCHES
317 #undef IS_NAME_CHAR
318 #undef IS_NAME_CHAR_MINBPC
319 #undef IS_NMSTRT_CHAR
320 #undef IS_NMSTRT_CHAR_MINBPC
321 #undef IS_INVALID_CHAR
322 
323 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
324        UTF8_cval1 = 0x00,
325        UTF8_cval2 = 0xc0,
326        UTF8_cval3 = 0xe0,
327        UTF8_cval4 = 0xf0
328 };
329 
330 void
_INTERNAL_trim_to_complete_utf8_characters(const char * from,const char ** fromLimRef)331 _INTERNAL_trim_to_complete_utf8_characters(const char *from,
332                                            const char **fromLimRef) {
333   const char *fromLim = *fromLimRef;
334   size_t walked = 0;
335   for (; fromLim > from; fromLim--, walked++) {
336     const unsigned char prev = (unsigned char)fromLim[-1];
337     if ((prev & 0xf8u)
338         == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
339       if (walked + 1 >= 4) {
340         fromLim += 4 - 1;
341         break;
342       } else {
343         walked = 0;
344       }
345     } else if ((prev & 0xf0u)
346                == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
347       if (walked + 1 >= 3) {
348         fromLim += 3 - 1;
349         break;
350       } else {
351         walked = 0;
352       }
353     } else if ((prev & 0xe0u)
354                == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
355       if (walked + 1 >= 2) {
356         fromLim += 2 - 1;
357         break;
358       } else {
359         walked = 0;
360       }
361     } else if ((prev & 0x80u)
362                == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
363       break;
364     }
365   }
366   *fromLimRef = fromLim;
367 }
368 
369 static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)370 utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
371             char **toP, const char *toLim) {
372   bool input_incomplete = false;
373   bool output_exhausted = false;
374 
375   /* Avoid copying partial characters (due to limited space). */
376   const ptrdiff_t bytesAvailable = fromLim - *fromP;
377   const ptrdiff_t bytesStorable = toLim - *toP;
378   UNUSED_P(enc);
379   if (bytesAvailable > bytesStorable) {
380     fromLim = *fromP + bytesStorable;
381     output_exhausted = true;
382   }
383 
384   /* Avoid copying partial characters (from incomplete input). */
385   {
386     const char *const fromLimBefore = fromLim;
387     _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
388     if (fromLim < fromLimBefore) {
389       input_incomplete = true;
390     }
391   }
392 
393   {
394     const ptrdiff_t bytesToCopy = fromLim - *fromP;
395     memcpy(*toP, *fromP, bytesToCopy);
396     *fromP += bytesToCopy;
397     *toP += bytesToCopy;
398   }
399 
400   if (output_exhausted) /* needs to go first */
401     return XML_CONVERT_OUTPUT_EXHAUSTED;
402   else if (input_incomplete)
403     return XML_CONVERT_INPUT_INCOMPLETE;
404   else
405     return XML_CONVERT_COMPLETED;
406 }
407 
408 static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)409 utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
410              unsigned short **toP, const unsigned short *toLim) {
411   enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
412   unsigned short *to = *toP;
413   const char *from = *fromP;
414   while (from < fromLim && to < toLim) {
415     switch (SB_BYTE_TYPE(enc, from)) {
416     case BT_LEAD2:
417       if (fromLim - from < 2) {
418         res = XML_CONVERT_INPUT_INCOMPLETE;
419         goto after;
420       }
421       *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
422       from += 2;
423       break;
424     case BT_LEAD3:
425       if (fromLim - from < 3) {
426         res = XML_CONVERT_INPUT_INCOMPLETE;
427         goto after;
428       }
429       *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
430                                | (from[2] & 0x3f));
431       from += 3;
432       break;
433     case BT_LEAD4: {
434       unsigned long n;
435       if (toLim - to < 2) {
436         res = XML_CONVERT_OUTPUT_EXHAUSTED;
437         goto after;
438       }
439       if (fromLim - from < 4) {
440         res = XML_CONVERT_INPUT_INCOMPLETE;
441         goto after;
442       }
443       n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
444           | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
445       n -= 0x10000;
446       to[0] = (unsigned short)((n >> 10) | 0xD800);
447       to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
448       to += 2;
449       from += 4;
450     } break;
451     default:
452       *to++ = *from++;
453       break;
454     }
455   }
456   if (from < fromLim)
457     res = XML_CONVERT_OUTPUT_EXHAUSTED;
458 after:
459   *fromP = from;
460   *toP = to;
461   return res;
462 }
463 
464 #ifdef XML_NS
465 static const struct normal_encoding utf8_encoding_ns
466     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
467        {
468 #  include "asciitab.h"
469 #  include "utf8tab.h"
470        },
471        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
472 #endif
473 
474 static const struct normal_encoding utf8_encoding
475     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
476        {
477 #define BT_COLON BT_NMSTRT
478 #include "asciitab.h"
479 #undef BT_COLON
480 #include "utf8tab.h"
481        },
482        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
483 
484 #ifdef XML_NS
485 
486 static const struct normal_encoding internal_utf8_encoding_ns
487     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
488        {
489 #  include "iasciitab.h"
490 #  include "utf8tab.h"
491        },
492        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
493 
494 #endif
495 
496 static const struct normal_encoding internal_utf8_encoding
497     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
498        {
499 #define BT_COLON BT_NMSTRT
500 #include "iasciitab.h"
501 #undef BT_COLON
502 #include "utf8tab.h"
503        },
504        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
505 
506 static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)507 latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
508               char **toP, const char *toLim) {
509   UNUSED_P(enc);
510   for (;;) {
511     unsigned char c;
512     if (*fromP == fromLim)
513       return XML_CONVERT_COMPLETED;
514     c = (unsigned char)**fromP;
515     if (c & 0x80) {
516       if (toLim - *toP < 2)
517         return XML_CONVERT_OUTPUT_EXHAUSTED;
518       *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
519       *(*toP)++ = (char)((c & 0x3f) | 0x80);
520       (*fromP)++;
521     } else {
522       if (*toP == toLim)
523         return XML_CONVERT_OUTPUT_EXHAUSTED;
524       *(*toP)++ = *(*fromP)++;
525     }
526   }
527 }
528 
529 static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)530 latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
531                unsigned short **toP, const unsigned short *toLim) {
532   UNUSED_P(enc);
533   while (*fromP < fromLim && *toP < toLim)
534     *(*toP)++ = (unsigned char)*(*fromP)++;
535 
536   if ((*toP == toLim) && (*fromP < fromLim))
537     return XML_CONVERT_OUTPUT_EXHAUSTED;
538   else
539     return XML_CONVERT_COMPLETED;
540 }
541 
542 #ifdef XML_NS
543 
544 static const struct normal_encoding latin1_encoding_ns
545     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
546        {
547 #  include "asciitab.h"
548 #  include "latin1tab.h"
549        },
550        STANDARD_VTABLE(sb_) NULL_VTABLE};
551 
552 #endif
553 
554 static const struct normal_encoding latin1_encoding
555     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
556        {
557 #define BT_COLON BT_NMSTRT
558 #include "asciitab.h"
559 #undef BT_COLON
560 #include "latin1tab.h"
561        },
562        STANDARD_VTABLE(sb_) NULL_VTABLE};
563 
564 static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)565 ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
566              char **toP, const char *toLim) {
567   UNUSED_P(enc);
568   while (*fromP < fromLim && *toP < toLim)
569     *(*toP)++ = *(*fromP)++;
570 
571   if ((*toP == toLim) && (*fromP < fromLim))
572     return XML_CONVERT_OUTPUT_EXHAUSTED;
573   else
574     return XML_CONVERT_COMPLETED;
575 }
576 
577 #ifdef XML_NS
578 
579 static const struct normal_encoding ascii_encoding_ns
580     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
581        {
582 #  include "asciitab.h"
583            /* BT_NONXML == 0 */
584        },
585        STANDARD_VTABLE(sb_) NULL_VTABLE};
586 
587 #endif
588 
589 static const struct normal_encoding ascii_encoding
590     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
591        {
592 #define BT_COLON BT_NMSTRT
593 #include "asciitab.h"
594 #undef BT_COLON
595            /* BT_NONXML == 0 */
596        },
597        STANDARD_VTABLE(sb_) NULL_VTABLE};
598 
599 static int PTRFASTCALL
unicode_byte_type(char hi,char lo)600 unicode_byte_type(char hi, char lo) {
601   switch ((unsigned char)hi) {
602   /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
603   case 0xD8:
604   case 0xD9:
605   case 0xDA:
606   case 0xDB:
607     return BT_LEAD4;
608   /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
609   case 0xDC:
610   case 0xDD:
611   case 0xDE:
612   case 0xDF:
613     return BT_TRAIL;
614   case 0xFF:
615     switch ((unsigned char)lo) {
616     case 0xFF: /* noncharacter-FFFF */
617     case 0xFE: /* noncharacter-FFFE */
618       return BT_NONXML;
619     }
620     break;
621   }
622   return BT_NONASCII;
623 }
624 
625 #define DEFINE_UTF16_TO_UTF8(E)                                                \
626   static enum XML_Convert_Result PTRCALL E##toUtf8(                            \
627       const ENCODING *enc, const char **fromP, const char *fromLim,            \
628       char **toP, const char *toLim) {                                         \
629     const char *from = *fromP;                                                 \
630     UNUSED_P(enc);                                                             \
631     fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */      \
632     for (; from < fromLim; from += 2) {                                        \
633       int plane;                                                               \
634       unsigned char lo2;                                                       \
635       unsigned char lo = GET_LO(from);                                         \
636       unsigned char hi = GET_HI(from);                                         \
637       switch (hi) {                                                            \
638       case 0:                                                                  \
639         if (lo < 0x80) {                                                       \
640           if (*toP == toLim) {                                                 \
641             *fromP = from;                                                     \
642             return XML_CONVERT_OUTPUT_EXHAUSTED;                               \
643           }                                                                    \
644           *(*toP)++ = lo;                                                      \
645           break;                                                               \
646         }                                                                      \
647         /* fall through */                                                     \
648       case 0x1:                                                                \
649       case 0x2:                                                                \
650       case 0x3:                                                                \
651       case 0x4:                                                                \
652       case 0x5:                                                                \
653       case 0x6:                                                                \
654       case 0x7:                                                                \
655         if (toLim - *toP < 2) {                                                \
656           *fromP = from;                                                       \
657           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
658         }                                                                      \
659         *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2);                      \
660         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
661         break;                                                                 \
662       default:                                                                 \
663         if (toLim - *toP < 3) {                                                \
664           *fromP = from;                                                       \
665           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
666         }                                                                      \
667         /* 16 bits divided 4, 6, 6 amongst 3 bytes */                          \
668         *(*toP)++ = ((hi >> 4) | UTF8_cval3);                                  \
669         *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80);                    \
670         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
671         break;                                                                 \
672       case 0xD8:                                                               \
673       case 0xD9:                                                               \
674       case 0xDA:                                                               \
675       case 0xDB:                                                               \
676         if (toLim - *toP < 4) {                                                \
677           *fromP = from;                                                       \
678           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
679         }                                                                      \
680         if (fromLim - from < 4) {                                              \
681           *fromP = from;                                                       \
682           return XML_CONVERT_INPUT_INCOMPLETE;                                 \
683         }                                                                      \
684         plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1;                   \
685         *(*toP)++ = (char)((plane >> 2) | UTF8_cval4);                         \
686         *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80);         \
687         from += 2;                                                             \
688         lo2 = GET_LO(from);                                                    \
689         *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2)           \
690                      | (lo2 >> 6) | 0x80);                                     \
691         *(*toP)++ = ((lo2 & 0x3f) | 0x80);                                     \
692         break;                                                                 \
693       }                                                                        \
694     }                                                                          \
695     *fromP = from;                                                             \
696     if (from < fromLim)                                                        \
697       return XML_CONVERT_INPUT_INCOMPLETE;                                     \
698     else                                                                       \
699       return XML_CONVERT_COMPLETED;                                            \
700   }
701 
702 #define DEFINE_UTF16_TO_UTF16(E)                                               \
703   static enum XML_Convert_Result PTRCALL E##toUtf16(                           \
704       const ENCODING *enc, const char **fromP, const char *fromLim,            \
705       unsigned short **toP, const unsigned short *toLim) {                     \
706     enum XML_Convert_Result res = XML_CONVERT_COMPLETED;                       \
707     UNUSED_P(enc);                                                             \
708     fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */  \
709     /* Avoid copying first half only of surrogate */                           \
710     if (fromLim - *fromP > ((toLim - *toP) << 1)                               \
711         && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) {                             \
712       fromLim -= 2;                                                            \
713       res = XML_CONVERT_INPUT_INCOMPLETE;                                      \
714     }                                                                          \
715     for (; *fromP < fromLim && *toP < toLim; *fromP += 2)                      \
716       *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP);                      \
717     if ((*toP == toLim) && (*fromP < fromLim))                                 \
718       return XML_CONVERT_OUTPUT_EXHAUSTED;                                     \
719     else                                                                       \
720       return res;                                                              \
721   }
722 
723 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
724 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
725 
726 DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)727 DEFINE_UTF16_TO_UTF16(little2_)
728 
729 #undef GET_LO
730 #undef GET_HI
731 
732 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
733 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
734 
735 DEFINE_UTF16_TO_UTF8(big2_)
736 DEFINE_UTF16_TO_UTF16(big2_)
737 
738 #undef GET_LO
739 #undef GET_HI
740 
741 #define LITTLE2_BYTE_TYPE(enc, p)                                              \
742   ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
743 #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
744 #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
745 #define LITTLE2_IS_NAME_CHAR_MINBPC(p)                                         \
746   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
747 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)                                       \
748   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
749 
750 #ifdef XML_MIN_SIZE
751 
752 static int PTRFASTCALL
753 little2_byteType(const ENCODING *enc, const char *p) {
754   return LITTLE2_BYTE_TYPE(enc, p);
755 }
756 
757 static int PTRFASTCALL
little2_byteToAscii(const ENCODING * enc,const char * p)758 little2_byteToAscii(const ENCODING *enc, const char *p) {
759   UNUSED_P(enc);
760   return LITTLE2_BYTE_TO_ASCII(p);
761 }
762 
763 static int PTRCALL
little2_charMatches(const ENCODING * enc,const char * p,int c)764 little2_charMatches(const ENCODING *enc, const char *p, int c) {
765   UNUSED_P(enc);
766   return LITTLE2_CHAR_MATCHES(p, c);
767 }
768 
769 static int PTRFASTCALL
little2_isNameMin(const ENCODING * enc,const char * p)770 little2_isNameMin(const ENCODING *enc, const char *p) {
771   UNUSED_P(enc);
772   return LITTLE2_IS_NAME_CHAR_MINBPC(p);
773 }
774 
775 static int PTRFASTCALL
little2_isNmstrtMin(const ENCODING * enc,const char * p)776 little2_isNmstrtMin(const ENCODING *enc, const char *p) {
777   UNUSED_P(enc);
778   return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
779 }
780 
781 #  undef VTABLE
782 #  define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
783 
784 #else /* not XML_MIN_SIZE */
785 
786 #  undef PREFIX
787 #  define PREFIX(ident) little2_##ident
788 #  define MINBPC(enc) 2
789 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
790 #  define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
791 #  define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
792 #  define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
793 #  define IS_NAME_CHAR(enc, p, n) 0
794 #  define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
795 #  define IS_NMSTRT_CHAR(enc, p, n) (0)
796 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
797 
798 #  define XML_TOK_IMPL_C
799 #  include "xmltok_impl.c"
800 #  undef XML_TOK_IMPL_C
801 
802 #  undef MINBPC
803 #  undef BYTE_TYPE
804 #  undef BYTE_TO_ASCII
805 #  undef CHAR_MATCHES
806 #  undef IS_NAME_CHAR
807 #  undef IS_NAME_CHAR_MINBPC
808 #  undef IS_NMSTRT_CHAR
809 #  undef IS_NMSTRT_CHAR_MINBPC
810 #  undef IS_INVALID_CHAR
811 
812 #endif /* not XML_MIN_SIZE */
813 
814 #ifdef XML_NS
815 
816 static const struct normal_encoding little2_encoding_ns
817     = {{VTABLE, 2, 0,
818 #  if BYTEORDER == 1234
819         1
820 #  else
821         0
822 #  endif
823        },
824        {
825 #  include "asciitab.h"
826 #  include "latin1tab.h"
827        },
828        STANDARD_VTABLE(little2_) NULL_VTABLE};
829 
830 #endif
831 
832 static const struct normal_encoding little2_encoding
833     = {{VTABLE, 2, 0,
834 #if BYTEORDER == 1234
835         1
836 #else
837         0
838 #endif
839        },
840        {
841 #define BT_COLON BT_NMSTRT
842 #include "asciitab.h"
843 #undef BT_COLON
844 #include "latin1tab.h"
845        },
846        STANDARD_VTABLE(little2_) NULL_VTABLE};
847 
848 #if BYTEORDER != 4321
849 
850 #  ifdef XML_NS
851 
852 static const struct normal_encoding internal_little2_encoding_ns
853     = {{VTABLE, 2, 0, 1},
854        {
855 #    include "iasciitab.h"
856 #    include "latin1tab.h"
857        },
858        STANDARD_VTABLE(little2_) NULL_VTABLE};
859 
860 #  endif
861 
862 static const struct normal_encoding internal_little2_encoding
863     = {{VTABLE, 2, 0, 1},
864        {
865 #  define BT_COLON BT_NMSTRT
866 #  include "iasciitab.h"
867 #  undef BT_COLON
868 #  include "latin1tab.h"
869        },
870        STANDARD_VTABLE(little2_) NULL_VTABLE};
871 
872 #endif
873 
874 #define BIG2_BYTE_TYPE(enc, p)                                                 \
875   ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
876 #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
877 #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
878 #define BIG2_IS_NAME_CHAR_MINBPC(p)                                            \
879   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
880 #define BIG2_IS_NMSTRT_CHAR_MINBPC(p)                                          \
881   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
882 
883 #ifdef XML_MIN_SIZE
884 
885 static int PTRFASTCALL
big2_byteType(const ENCODING * enc,const char * p)886 big2_byteType(const ENCODING *enc, const char *p) {
887   return BIG2_BYTE_TYPE(enc, p);
888 }
889 
890 static int PTRFASTCALL
big2_byteToAscii(const ENCODING * enc,const char * p)891 big2_byteToAscii(const ENCODING *enc, const char *p) {
892   UNUSED_P(enc);
893   return BIG2_BYTE_TO_ASCII(p);
894 }
895 
896 static int PTRCALL
big2_charMatches(const ENCODING * enc,const char * p,int c)897 big2_charMatches(const ENCODING *enc, const char *p, int c) {
898   UNUSED_P(enc);
899   return BIG2_CHAR_MATCHES(p, c);
900 }
901 
902 static int PTRFASTCALL
big2_isNameMin(const ENCODING * enc,const char * p)903 big2_isNameMin(const ENCODING *enc, const char *p) {
904   UNUSED_P(enc);
905   return BIG2_IS_NAME_CHAR_MINBPC(p);
906 }
907 
908 static int PTRFASTCALL
big2_isNmstrtMin(const ENCODING * enc,const char * p)909 big2_isNmstrtMin(const ENCODING *enc, const char *p) {
910   UNUSED_P(enc);
911   return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
912 }
913 
914 #  undef VTABLE
915 #  define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
916 
917 #else /* not XML_MIN_SIZE */
918 
919 #  undef PREFIX
920 #  define PREFIX(ident) big2_##ident
921 #  define MINBPC(enc) 2
922 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
923 #  define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
924 #  define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
925 #  define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
926 #  define IS_NAME_CHAR(enc, p, n) 0
927 #  define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
928 #  define IS_NMSTRT_CHAR(enc, p, n) (0)
929 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
930 
931 #  define XML_TOK_IMPL_C
932 #  include "xmltok_impl.c"
933 #  undef XML_TOK_IMPL_C
934 
935 #  undef MINBPC
936 #  undef BYTE_TYPE
937 #  undef BYTE_TO_ASCII
938 #  undef CHAR_MATCHES
939 #  undef IS_NAME_CHAR
940 #  undef IS_NAME_CHAR_MINBPC
941 #  undef IS_NMSTRT_CHAR
942 #  undef IS_NMSTRT_CHAR_MINBPC
943 #  undef IS_INVALID_CHAR
944 
945 #endif /* not XML_MIN_SIZE */
946 
947 #ifdef XML_NS
948 
949 static const struct normal_encoding big2_encoding_ns
950     = {{VTABLE, 2, 0,
951 #  if BYTEORDER == 4321
952         1
953 #  else
954         0
955 #  endif
956        },
957        {
958 #  include "asciitab.h"
959 #  include "latin1tab.h"
960        },
961        STANDARD_VTABLE(big2_) NULL_VTABLE};
962 
963 #endif
964 
965 static const struct normal_encoding big2_encoding
966     = {{VTABLE, 2, 0,
967 #if BYTEORDER == 4321
968         1
969 #else
970         0
971 #endif
972        },
973        {
974 #define BT_COLON BT_NMSTRT
975 #include "asciitab.h"
976 #undef BT_COLON
977 #include "latin1tab.h"
978        },
979        STANDARD_VTABLE(big2_) NULL_VTABLE};
980 
981 #if BYTEORDER != 1234
982 
983 #  ifdef XML_NS
984 
985 static const struct normal_encoding internal_big2_encoding_ns
986     = {{VTABLE, 2, 0, 1},
987        {
988 #    include "iasciitab.h"
989 #    include "latin1tab.h"
990        },
991        STANDARD_VTABLE(big2_) NULL_VTABLE};
992 
993 #  endif
994 
995 static const struct normal_encoding internal_big2_encoding
996     = {{VTABLE, 2, 0, 1},
997        {
998 #  define BT_COLON BT_NMSTRT
999 #  include "iasciitab.h"
1000 #  undef BT_COLON
1001 #  include "latin1tab.h"
1002        },
1003        STANDARD_VTABLE(big2_) NULL_VTABLE};
1004 
1005 #endif
1006 
1007 #undef PREFIX
1008 
1009 static int FASTCALL
streqci(const char * s1,const char * s2)1010 streqci(const char *s1, const char *s2) {
1011   for (;;) {
1012     char c1 = *s1++;
1013     char c2 = *s2++;
1014     if (ASCII_a <= c1 && c1 <= ASCII_z)
1015       c1 += ASCII_A - ASCII_a;
1016     if (ASCII_a <= c2 && c2 <= ASCII_z)
1017       /* The following line will never get executed.  streqci() is
1018        * only called from two places, both of which guarantee to put
1019        * upper-case strings into s2.
1020        */
1021       c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
1022     if (c1 != c2)
1023       return 0;
1024     if (! c1)
1025       break;
1026   }
1027   return 1;
1028 }
1029 
1030 static void PTRCALL
initUpdatePosition(const ENCODING * enc,const char * ptr,const char * end,POSITION * pos)1031 initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
1032                    POSITION *pos) {
1033   UNUSED_P(enc);
1034   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1035 }
1036 
1037 static int
toAscii(const ENCODING * enc,const char * ptr,const char * end)1038 toAscii(const ENCODING *enc, const char *ptr, const char *end) {
1039   char buf[1];
1040   char *p = buf;
1041   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1042   if (p == buf)
1043     return -1;
1044   else
1045     return buf[0];
1046 }
1047 
1048 static int FASTCALL
isSpace(int c)1049 isSpace(int c) {
1050   switch (c) {
1051   case 0x20:
1052   case 0xD:
1053   case 0xA:
1054   case 0x9:
1055     return 1;
1056   }
1057   return 0;
1058 }
1059 
1060 /* Return 1 if there's just optional white space or there's an S
1061    followed by name=val.
1062 */
1063 static int
parsePseudoAttribute(const ENCODING * enc,const char * ptr,const char * end,const char ** namePtr,const char ** nameEndPtr,const char ** valPtr,const char ** nextTokPtr)1064 parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
1065                      const char **namePtr, const char **nameEndPtr,
1066                      const char **valPtr, const char **nextTokPtr) {
1067   int c;
1068   char open;
1069   if (ptr == end) {
1070     *namePtr = NULL;
1071     return 1;
1072   }
1073   if (! isSpace(toAscii(enc, ptr, end))) {
1074     *nextTokPtr = ptr;
1075     return 0;
1076   }
1077   do {
1078     ptr += enc->minBytesPerChar;
1079   } while (isSpace(toAscii(enc, ptr, end)));
1080   if (ptr == end) {
1081     *namePtr = NULL;
1082     return 1;
1083   }
1084   *namePtr = ptr;
1085   for (;;) {
1086     c = toAscii(enc, ptr, end);
1087     if (c == -1) {
1088       *nextTokPtr = ptr;
1089       return 0;
1090     }
1091     if (c == ASCII_EQUALS) {
1092       *nameEndPtr = ptr;
1093       break;
1094     }
1095     if (isSpace(c)) {
1096       *nameEndPtr = ptr;
1097       do {
1098         ptr += enc->minBytesPerChar;
1099       } while (isSpace(c = toAscii(enc, ptr, end)));
1100       if (c != ASCII_EQUALS) {
1101         *nextTokPtr = ptr;
1102         return 0;
1103       }
1104       break;
1105     }
1106     ptr += enc->minBytesPerChar;
1107   }
1108   if (ptr == *namePtr) {
1109     *nextTokPtr = ptr;
1110     return 0;
1111   }
1112   ptr += enc->minBytesPerChar;
1113   c = toAscii(enc, ptr, end);
1114   while (isSpace(c)) {
1115     ptr += enc->minBytesPerChar;
1116     c = toAscii(enc, ptr, end);
1117   }
1118   if (c != ASCII_QUOT && c != ASCII_APOS) {
1119     *nextTokPtr = ptr;
1120     return 0;
1121   }
1122   open = (char)c;
1123   ptr += enc->minBytesPerChar;
1124   *valPtr = ptr;
1125   for (;; ptr += enc->minBytesPerChar) {
1126     c = toAscii(enc, ptr, end);
1127     if (c == open)
1128       break;
1129     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
1130         && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
1131         && c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
1132       *nextTokPtr = ptr;
1133       return 0;
1134     }
1135   }
1136   *nextTokPtr = ptr + enc->minBytesPerChar;
1137   return 1;
1138 }
1139 
1140 static const char KW_version[]
1141     = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
1142 
1143 static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
1144                                    ASCII_i, ASCII_n, ASCII_g, '\0'};
1145 
1146 static const char KW_standalone[]
1147     = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
1148        ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
1149 
1150 static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
1151 
1152 static const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
1153 
1154 static int
doParseXmlDecl(const ENCODING * (* encodingFinder)(const ENCODING *,const char *,const char *),int isGeneralTextEntity,const ENCODING * enc,const char * ptr,const char * end,const char ** badPtr,const char ** versionPtr,const char ** versionEndPtr,const char ** encodingName,const ENCODING ** encoding,int * standalone)1155 doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
1156                                                  const char *),
1157                int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
1158                const char *end, const char **badPtr, const char **versionPtr,
1159                const char **versionEndPtr, const char **encodingName,
1160                const ENCODING **encoding, int *standalone) {
1161   const char *val = NULL;
1162   const char *name = NULL;
1163   const char *nameEnd = NULL;
1164   ptr += 5 * enc->minBytesPerChar;
1165   end -= 2 * enc->minBytesPerChar;
1166   if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1167       || ! name) {
1168     *badPtr = ptr;
1169     return 0;
1170   }
1171   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1172     if (! isGeneralTextEntity) {
1173       *badPtr = name;
1174       return 0;
1175     }
1176   } else {
1177     if (versionPtr)
1178       *versionPtr = val;
1179     if (versionEndPtr)
1180       *versionEndPtr = ptr;
1181     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1182       *badPtr = ptr;
1183       return 0;
1184     }
1185     if (! name) {
1186       if (isGeneralTextEntity) {
1187         /* a TextDecl must have an EncodingDecl */
1188         *badPtr = ptr;
1189         return 0;
1190       }
1191       return 1;
1192     }
1193   }
1194   if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1195     int c = toAscii(enc, val, end);
1196     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
1197       *badPtr = val;
1198       return 0;
1199     }
1200     if (encodingName)
1201       *encodingName = val;
1202     if (encoding)
1203       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1204     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1205       *badPtr = ptr;
1206       return 0;
1207     }
1208     if (! name)
1209       return 1;
1210   }
1211   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1212       || isGeneralTextEntity) {
1213     *badPtr = name;
1214     return 0;
1215   }
1216   if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1217     if (standalone)
1218       *standalone = 1;
1219   } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1220     if (standalone)
1221       *standalone = 0;
1222   } else {
1223     *badPtr = val;
1224     return 0;
1225   }
1226   while (isSpace(toAscii(enc, ptr, end)))
1227     ptr += enc->minBytesPerChar;
1228   if (ptr != end) {
1229     *badPtr = ptr;
1230     return 0;
1231   }
1232   return 1;
1233 }
1234 
1235 static int FASTCALL
checkCharRefNumber(int result)1236 checkCharRefNumber(int result) {
1237   switch (result >> 8) {
1238   case 0xD8:
1239   case 0xD9:
1240   case 0xDA:
1241   case 0xDB:
1242   case 0xDC:
1243   case 0xDD:
1244   case 0xDE:
1245   case 0xDF:
1246     return -1;
1247   case 0:
1248     if (latin1_encoding.type[result] == BT_NONXML)
1249       return -1;
1250     break;
1251   case 0xFF:
1252     if (result == 0xFFFE || result == 0xFFFF)
1253       return -1;
1254     break;
1255   }
1256   return result;
1257 }
1258 
1259 int FASTCALL
XmlUtf8Encode(int c,char * buf)1260 XmlUtf8Encode(int c, char *buf) {
1261   enum {
1262     /* minN is minimum legal resulting value for N byte sequence */
1263     min2 = 0x80,
1264     min3 = 0x800,
1265     min4 = 0x10000
1266   };
1267 
1268   if (c < 0)
1269     return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
1270   if (c < min2) {
1271     buf[0] = (char)(c | UTF8_cval1);
1272     return 1;
1273   }
1274   if (c < min3) {
1275     buf[0] = (char)((c >> 6) | UTF8_cval2);
1276     buf[1] = (char)((c & 0x3f) | 0x80);
1277     return 2;
1278   }
1279   if (c < min4) {
1280     buf[0] = (char)((c >> 12) | UTF8_cval3);
1281     buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1282     buf[2] = (char)((c & 0x3f) | 0x80);
1283     return 3;
1284   }
1285   if (c < 0x110000) {
1286     buf[0] = (char)((c >> 18) | UTF8_cval4);
1287     buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1288     buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1289     buf[3] = (char)((c & 0x3f) | 0x80);
1290     return 4;
1291   }
1292   return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
1293 }
1294 
1295 int FASTCALL
XmlUtf16Encode(int charNum,unsigned short * buf)1296 XmlUtf16Encode(int charNum, unsigned short *buf) {
1297   if (charNum < 0)
1298     return 0;
1299   if (charNum < 0x10000) {
1300     buf[0] = (unsigned short)charNum;
1301     return 1;
1302   }
1303   if (charNum < 0x110000) {
1304     charNum -= 0x10000;
1305     buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1306     buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1307     return 2;
1308   }
1309   return 0;
1310 }
1311 
1312 struct unknown_encoding {
1313   struct normal_encoding normal;
1314   CONVERTER convert;
1315   void *userData;
1316   unsigned short utf16[256];
1317   char utf8[256][4];
1318 };
1319 
1320 #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
1321 
1322 int
XmlSizeOfUnknownEncoding(void)1323 XmlSizeOfUnknownEncoding(void) {
1324   return sizeof(struct unknown_encoding);
1325 }
1326 
1327 static int PTRFASTCALL
unknown_isName(const ENCODING * enc,const char * p)1328 unknown_isName(const ENCODING *enc, const char *p) {
1329   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1330   int c = uenc->convert(uenc->userData, p);
1331   if (c & ~0xFFFF)
1332     return 0;
1333   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1334 }
1335 
1336 static int PTRFASTCALL
unknown_isNmstrt(const ENCODING * enc,const char * p)1337 unknown_isNmstrt(const ENCODING *enc, const char *p) {
1338   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1339   int c = uenc->convert(uenc->userData, p);
1340   if (c & ~0xFFFF)
1341     return 0;
1342   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1343 }
1344 
1345 static int PTRFASTCALL
unknown_isInvalid(const ENCODING * enc,const char * p)1346 unknown_isInvalid(const ENCODING *enc, const char *p) {
1347   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1348   int c = uenc->convert(uenc->userData, p);
1349   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1350 }
1351 
1352 static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)1353 unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
1354                char **toP, const char *toLim) {
1355   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1356   char buf[XML_UTF8_ENCODE_MAX];
1357   for (;;) {
1358     const char *utf8;
1359     int n;
1360     if (*fromP == fromLim)
1361       return XML_CONVERT_COMPLETED;
1362     utf8 = uenc->utf8[(unsigned char)**fromP];
1363     n = *utf8++;
1364     if (n == 0) {
1365       int c = uenc->convert(uenc->userData, *fromP);
1366       n = XmlUtf8Encode(c, buf);
1367       if (n > toLim - *toP)
1368         return XML_CONVERT_OUTPUT_EXHAUSTED;
1369       utf8 = buf;
1370       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1371                  - (BT_LEAD2 - 2));
1372     } else {
1373       if (n > toLim - *toP)
1374         return XML_CONVERT_OUTPUT_EXHAUSTED;
1375       (*fromP)++;
1376     }
1377     memcpy(*toP, utf8, n);
1378     *toP += n;
1379   }
1380 }
1381 
1382 static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)1383 unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
1384                 unsigned short **toP, const unsigned short *toLim) {
1385   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1386   while (*fromP < fromLim && *toP < toLim) {
1387     unsigned short c = uenc->utf16[(unsigned char)**fromP];
1388     if (c == 0) {
1389       c = (unsigned short)uenc->convert(uenc->userData, *fromP);
1390       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1391                  - (BT_LEAD2 - 2));
1392     } else
1393       (*fromP)++;
1394     *(*toP)++ = c;
1395   }
1396 
1397   if ((*toP == toLim) && (*fromP < fromLim))
1398     return XML_CONVERT_OUTPUT_EXHAUSTED;
1399   else
1400     return XML_CONVERT_COMPLETED;
1401 }
1402 
1403 ENCODING *
XmlInitUnknownEncoding(void * mem,int * table,CONVERTER convert,void * userData)1404 XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
1405                        void *userData) {
1406   int i;
1407   struct unknown_encoding *e = (struct unknown_encoding *)mem;
1408   memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
1409   for (i = 0; i < 128; i++)
1410     if (latin1_encoding.type[i] != BT_OTHER
1411         && latin1_encoding.type[i] != BT_NONXML && table[i] != i)
1412       return 0;
1413   for (i = 0; i < 256; i++) {
1414     int c = table[i];
1415     if (c == -1) {
1416       e->normal.type[i] = BT_MALFORM;
1417       /* This shouldn't really get used. */
1418       e->utf16[i] = 0xFFFF;
1419       e->utf8[i][0] = 1;
1420       e->utf8[i][1] = 0;
1421     } else if (c < 0) {
1422       if (c < -4)
1423         return 0;
1424       /* Multi-byte sequences need a converter function */
1425       if (! convert)
1426         return 0;
1427       e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1428       e->utf8[i][0] = 0;
1429       e->utf16[i] = 0;
1430     } else if (c < 0x80) {
1431       if (latin1_encoding.type[c] != BT_OTHER
1432           && latin1_encoding.type[c] != BT_NONXML && c != i)
1433         return 0;
1434       e->normal.type[i] = latin1_encoding.type[c];
1435       e->utf8[i][0] = 1;
1436       e->utf8[i][1] = (char)c;
1437       e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
1438     } else if (checkCharRefNumber(c) < 0) {
1439       e->normal.type[i] = BT_NONXML;
1440       /* This shouldn't really get used. */
1441       e->utf16[i] = 0xFFFF;
1442       e->utf8[i][0] = 1;
1443       e->utf8[i][1] = 0;
1444     } else {
1445       if (c > 0xFFFF)
1446         return 0;
1447       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1448         e->normal.type[i] = BT_NMSTRT;
1449       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1450         e->normal.type[i] = BT_NAME;
1451       else
1452         e->normal.type[i] = BT_OTHER;
1453       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1454       e->utf16[i] = (unsigned short)c;
1455     }
1456   }
1457   e->userData = userData;
1458   e->convert = convert;
1459   if (convert) {
1460     e->normal.isName2 = unknown_isName;
1461     e->normal.isName3 = unknown_isName;
1462     e->normal.isName4 = unknown_isName;
1463     e->normal.isNmstrt2 = unknown_isNmstrt;
1464     e->normal.isNmstrt3 = unknown_isNmstrt;
1465     e->normal.isNmstrt4 = unknown_isNmstrt;
1466     e->normal.isInvalid2 = unknown_isInvalid;
1467     e->normal.isInvalid3 = unknown_isInvalid;
1468     e->normal.isInvalid4 = unknown_isInvalid;
1469   }
1470   e->normal.enc.utf8Convert = unknown_toUtf8;
1471   e->normal.enc.utf16Convert = unknown_toUtf16;
1472   return &(e->normal.enc);
1473 }
1474 
1475 /* If this enumeration is changed, getEncodingIndex and encodings
1476 must also be changed. */
1477 enum {
1478   UNKNOWN_ENC = -1,
1479   ISO_8859_1_ENC = 0,
1480   US_ASCII_ENC,
1481   UTF_8_ENC,
1482   UTF_16_ENC,
1483   UTF_16BE_ENC,
1484   UTF_16LE_ENC,
1485   /* must match encodingNames up to here */
1486   NO_ENC
1487 };
1488 
1489 static const char KW_ISO_8859_1[]
1490     = {ASCII_I, ASCII_S, ASCII_O,     ASCII_MINUS, ASCII_8, ASCII_8,
1491        ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1,     '\0'};
1492 static const char KW_US_ASCII[]
1493     = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
1494        ASCII_C, ASCII_I, ASCII_I,     '\0'};
1495 static const char KW_UTF_8[]
1496     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
1497 static const char KW_UTF_16[]
1498     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
1499 static const char KW_UTF_16BE[]
1500     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1501        ASCII_6, ASCII_B, ASCII_E, '\0'};
1502 static const char KW_UTF_16LE[]
1503     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1504        ASCII_6, ASCII_L, ASCII_E, '\0'};
1505 
1506 static int FASTCALL
getEncodingIndex(const char * name)1507 getEncodingIndex(const char *name) {
1508   static const char *const encodingNames[] = {
1509       KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
1510   };
1511   int i;
1512   if (name == NULL)
1513     return NO_ENC;
1514   for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
1515     if (streqci(name, encodingNames[i]))
1516       return i;
1517   return UNKNOWN_ENC;
1518 }
1519 
1520 /* For binary compatibility, we store the index of the encoding
1521    specified at initialization in the isUtf16 member.
1522 */
1523 
1524 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1525 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1526 
1527 /* This is what detects the encoding.  encodingTable maps from
1528    encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1529    the external (protocol) specified encoding; state is
1530    XML_CONTENT_STATE if we're parsing an external text entity, and
1531    XML_PROLOG_STATE otherwise.
1532 */
1533 
1534 static int
initScan(const ENCODING * const * encodingTable,const INIT_ENCODING * enc,int state,const char * ptr,const char * end,const char ** nextTokPtr)1535 initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
1536          int state, const char *ptr, const char *end, const char **nextTokPtr) {
1537   const ENCODING **encPtr;
1538 
1539   if (ptr >= end)
1540     return XML_TOK_NONE;
1541   encPtr = enc->encPtr;
1542   if (ptr + 1 == end) {
1543     /* only a single byte available for auto-detection */
1544 #ifndef XML_DTD /* FIXME */
1545     /* a well-formed document entity must have more than one byte */
1546     if (state != XML_CONTENT_STATE)
1547       return XML_TOK_PARTIAL;
1548 #endif
1549     /* so we're parsing an external text entity... */
1550     /* if UTF-16 was externally specified, then we need at least 2 bytes */
1551     switch (INIT_ENC_INDEX(enc)) {
1552     case UTF_16_ENC:
1553     case UTF_16LE_ENC:
1554     case UTF_16BE_ENC:
1555       return XML_TOK_PARTIAL;
1556     }
1557     switch ((unsigned char)*ptr) {
1558     case 0xFE:
1559     case 0xFF:
1560     case 0xEF: /* possibly first byte of UTF-8 BOM */
1561       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1562         break;
1563       /* fall through */
1564     case 0x00:
1565     case 0x3C:
1566       return XML_TOK_PARTIAL;
1567     }
1568   } else {
1569     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1570     case 0xFEFF:
1571       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1572         break;
1573       *nextTokPtr = ptr + 2;
1574       *encPtr = encodingTable[UTF_16BE_ENC];
1575       return XML_TOK_BOM;
1576     /* 00 3C is handled in the default case */
1577     case 0x3C00:
1578       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1579            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1580           && state == XML_CONTENT_STATE)
1581         break;
1582       *encPtr = encodingTable[UTF_16LE_ENC];
1583       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1584     case 0xFFFE:
1585       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1586         break;
1587       *nextTokPtr = ptr + 2;
1588       *encPtr = encodingTable[UTF_16LE_ENC];
1589       return XML_TOK_BOM;
1590     case 0xEFBB:
1591       /* Maybe a UTF-8 BOM (EF BB BF) */
1592       /* If there's an explicitly specified (external) encoding
1593          of ISO-8859-1 or some flavour of UTF-16
1594          and this is an external text entity,
1595          don't look for the BOM,
1596          because it might be a legal data.
1597       */
1598       if (state == XML_CONTENT_STATE) {
1599         int e = INIT_ENC_INDEX(enc);
1600         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
1601             || e == UTF_16_ENC)
1602           break;
1603       }
1604       if (ptr + 2 == end)
1605         return XML_TOK_PARTIAL;
1606       if ((unsigned char)ptr[2] == 0xBF) {
1607         *nextTokPtr = ptr + 3;
1608         *encPtr = encodingTable[UTF_8_ENC];
1609         return XML_TOK_BOM;
1610       }
1611       break;
1612     default:
1613       if (ptr[0] == '\0') {
1614         /* 0 isn't a legal data character. Furthermore a document
1615            entity can only start with ASCII characters.  So the only
1616            way this can fail to be big-endian UTF-16 if it it's an
1617            external parsed general entity that's labelled as
1618            UTF-16LE.
1619         */
1620         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1621           break;
1622         *encPtr = encodingTable[UTF_16BE_ENC];
1623         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1624       } else if (ptr[1] == '\0') {
1625         /* We could recover here in the case:
1626             - parsing an external entity
1627             - second byte is 0
1628             - no externally specified encoding
1629             - no encoding declaration
1630            by assuming UTF-16LE.  But we don't, because this would mean when
1631            presented just with a single byte, we couldn't reliably determine
1632            whether we needed further bytes.
1633         */
1634         if (state == XML_CONTENT_STATE)
1635           break;
1636         *encPtr = encodingTable[UTF_16LE_ENC];
1637         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1638       }
1639       break;
1640     }
1641   }
1642   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1643   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1644 }
1645 
1646 #define NS(x) x
1647 #define ns(x) x
1648 #define XML_TOK_NS_C
1649 #include "xmltok_ns.c"
1650 #undef XML_TOK_NS_C
1651 #undef NS
1652 #undef ns
1653 
1654 #ifdef XML_NS
1655 
1656 #  define NS(x) x##NS
1657 #  define ns(x) x##_ns
1658 
1659 #  define XML_TOK_NS_C
1660 #  include "xmltok_ns.c"
1661 #  undef XML_TOK_NS_C
1662 
1663 #  undef NS
1664 #  undef ns
1665 
1666 ENCODING *
XmlInitUnknownEncodingNS(void * mem,int * table,CONVERTER convert,void * userData)1667 XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
1668                          void *userData) {
1669   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1670   if (enc)
1671     ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1672   return enc;
1673 }
1674 
1675 #endif /* XML_NS */
1676 
1677 #endif /* LV_USE_XML */
1678 
1679