1 /* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Licensed under the MIT license:
44
45 Permission is hereby granted, free of charge, to any person obtaining
46 a copy of this software and associated documentation files (the
47 "Software"), to deal in the Software without restriction, including
48 without limitation the rights to use, copy, modify, merge, publish,
49 distribute, sublicense, and/or sell copies of the Software, and to permit
50 persons to whom the Software is furnished to do so, subject to the
51 following conditions:
52
53 The above copyright notice and this permission notice shall be included
54 in all copies or substantial portions of the Software.
55
56 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
57 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
58 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
59 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
60 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
61 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
62 USE OR OTHER DEALINGS IN THE SOFTWARE.
63 */
64
65 #include "../../lv_conf_internal.h"
66 #if LV_USE_XML
67
68 #define XML_BUILDING_EXPAT 1
69
70 #include "expat_config.h"
71
72 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
73 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
74 #endif
75
76 #if defined(XML_DTD) && XML_GE == 0
77 # error Either undefine XML_DTD or define XML_GE to 1.
78 #endif
79
80 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
81 || (XML_CONTEXT_BYTES + 0 < 0)
82 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
83 #endif
84
85 #if defined(HAVE_SYSCALL_GETRANDOM)
86 # if ! defined(_GNU_SOURCE)
87 # define _GNU_SOURCE 1 /* syscall prototype */
88 # endif
89 #endif
90
91 #ifdef _WIN32
92 /* force stdlib to define rand_s() */
93 # if ! defined(_CRT_RAND_S)
94 # define _CRT_RAND_S
95 # endif
96 #endif
97
98 #include <stdbool.h>
99 #include <stddef.h>
100 #include <string.h> /* memset(), memcpy() */
101 #include <assert.h>
102 #include <limits.h> /* UINT_MAX */
103 #include <stdio.h> /* fprintf */
104 #include <stdlib.h> /* getenv, rand_s */
105 #include <stdint.h> /* uintptr_t */
106 #include <math.h> /* isnan */
107
108 #ifdef _WIN32
109 # define getpid GetCurrentProcessId
110 #else
111 # include <sys/time.h> /* gettimeofday() */
112 # include <sys/types.h> /* getpid() */
113 # include <unistd.h> /* getpid() */
114 # include <fcntl.h> /* O_RDONLY */
115 # include <errno.h>
116 #endif
117
118 #ifdef _WIN32
119 # include "winconfig.h"
120 #endif
121
122 #include "ascii.h"
123 #include "expat.h"
124 #include "siphash.h"
125
126 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
127 # if defined(HAVE_GETRANDOM)
128 # include <sys/random.h> /* getrandom */
129 # else
130 # include <unistd.h> /* syscall */
131 # include <sys/syscall.h> /* SYS_getrandom */
132 # endif
133 # if ! defined(GRND_NONBLOCK)
134 # define GRND_NONBLOCK 0x0001
135 # endif /* defined(GRND_NONBLOCK) */
136 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
137
138 #if defined(HAVE_LIBBSD) \
139 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
140 # include <bsd/stdlib.h>
141 #endif
142
143 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
144 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
145 #endif
146
147 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
148 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
149 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
150 && ! defined(XML_POOR_ENTROPY)
151 # error You do not have support for any sources of high quality entropy \
152 enabled. For end user security, that is probably not what you want. \
153 \
154 Your options include: \
155 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
156 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
157 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
158 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
159 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
160 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
161 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
162 * Windows >=Vista (rand_s): _WIN32. \
163 \
164 If insist on not using any of these, bypass this error by defining \
165 XML_POOR_ENTROPY; you have been warned. \
166 \
167 If you have reasons to patch this detection code away or need changes \
168 to the build system, please open a bug. Thank you!
169 #endif
170
171 #ifdef XML_UNICODE
172 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
173 # define XmlConvert XmlUtf16Convert
174 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
175 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
176 # define XmlEncode XmlUtf16Encode
177 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
178 typedef unsigned short ICHAR;
179 #else
180 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
181 # define XmlConvert XmlUtf8Convert
182 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
183 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
184 # define XmlEncode XmlUtf8Encode
185 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
186 typedef char ICHAR;
187 #endif
188
189 #ifndef XML_NS
190
191 # define XmlInitEncodingNS XmlInitEncoding
192 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
193 # undef XmlGetInternalEncodingNS
194 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
195 # define XmlParseXmlDeclNS XmlParseXmlDecl
196
197 #endif
198
199 #ifdef XML_UNICODE
200
201 # ifdef XML_UNICODE_WCHAR_T
202 # define XML_T(x) (const wchar_t) x
203 # define XML_L(x) L##x
204 # else
205 # define XML_T(x) (const unsigned short)x
206 # define XML_L(x) x
207 # endif
208
209 #else
210
211 # define XML_T(x) x
212 # define XML_L(x) x
213
214 #endif
215
216 /* Round up n to be a multiple of sz, where sz is a power of 2. */
217 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
218
219 /* Do safe (NULL-aware) pointer arithmetic */
220 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
221
222 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
223
224 #include "internal.h"
225 #include "xmltok.h"
226 #include "xmlrole.h"
227
228 typedef const XML_Char *KEY;
229
230 typedef struct {
231 KEY name;
232 } NAMED;
233
234 typedef struct {
235 NAMED **v;
236 unsigned char power;
237 size_t size;
238 size_t used;
239 const XML_Memory_Handling_Suite *mem;
240 } HASH_TABLE;
241
242 static size_t keylen(KEY s);
243
244 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
245
246 /* For probing (after a collision) we need a step size relative prime
247 to the hash table size, which is a power of 2. We use double-hashing,
248 since we can calculate a second hash value cheaply by taking those bits
249 of the first hash value that were discarded (masked out) when the table
250 index was calculated: index = hash & mask, where mask = table->size - 1.
251 We limit the maximum step size to table->size / 4 (mask >> 2) and make
252 it odd, since odd numbers are always relative prime to a power of 2.
253 */
254 #define SECOND_HASH(hash, mask, power) \
255 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
256 #define PROBE_STEP(hash, mask, power) \
257 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
258
259 typedef struct {
260 NAMED **p;
261 NAMED **end;
262 } HASH_TABLE_ITER;
263
264 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
265 #define INIT_DATA_BUF_SIZE 1024
266 #define INIT_ATTS_SIZE 16
267 #define INIT_ATTS_VERSION 0xFFFFFFFF
268 #define INIT_BLOCK_SIZE 1024
269 #define INIT_BUFFER_SIZE 1024
270
271 #define EXPAND_SPARE 24
272
273 typedef struct binding {
274 struct prefix *prefix;
275 struct binding *nextTagBinding;
276 struct binding *prevPrefixBinding;
277 const struct attribute_id *attId;
278 XML_Char *uri;
279 int uriLen;
280 int uriAlloc;
281 } BINDING;
282
283 typedef struct prefix {
284 const XML_Char *name;
285 BINDING *binding;
286 } PREFIX;
287
288 typedef struct {
289 const XML_Char *str;
290 const XML_Char *localPart;
291 const XML_Char *prefix;
292 int strLen;
293 int uriLen;
294 int prefixLen;
295 } TAG_NAME;
296
297 /* TAG represents an open element.
298 The name of the element is stored in both the document and API
299 encodings. The memory buffer 'buf' is a separately-allocated
300 memory area which stores the name. During the XML_Parse()/
301 XML_ParseBuffer() when the element is open, the memory for the 'raw'
302 version of the name (in the document encoding) is shared with the
303 document buffer. If the element is open across calls to
304 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
305 contain the 'raw' name as well.
306
307 A parser reuses these structures, maintaining a list of allocated
308 TAG objects in a free list.
309 */
310 typedef struct tag {
311 struct tag *parent; /* parent of this element */
312 const char *rawName; /* tagName in the original encoding */
313 int rawNameLength;
314 TAG_NAME name; /* tagName in the API encoding */
315 char *buf; /* buffer for name components */
316 char *bufEnd; /* end of the buffer */
317 BINDING *bindings;
318 } TAG;
319
320 typedef struct {
321 const XML_Char *name;
322 const XML_Char *textPtr;
323 int textLen; /* length in XML_Chars */
324 int processed; /* # of processed bytes - when suspended */
325 const XML_Char *systemId;
326 const XML_Char *base;
327 const XML_Char *publicId;
328 const XML_Char *notation;
329 XML_Bool open;
330 XML_Bool is_param;
331 XML_Bool is_internal; /* true if declared in internal subset outside PE */
332 } ENTITY;
333
334 typedef struct {
335 enum XML_Content_Type type;
336 enum XML_Content_Quant quant;
337 const XML_Char *name;
338 int firstchild;
339 int lastchild;
340 int childcnt;
341 int nextsib;
342 } CONTENT_SCAFFOLD;
343
344 #define INIT_SCAFFOLD_ELEMENTS 32
345
346 typedef struct block {
347 struct block *next;
348 int size;
349 XML_Char s[1];
350 } BLOCK;
351
352 typedef struct {
353 BLOCK *blocks;
354 BLOCK *freeBlocks;
355 const XML_Char *end;
356 XML_Char *ptr;
357 XML_Char *start;
358 const XML_Memory_Handling_Suite *mem;
359 } STRING_POOL;
360
361 /* The XML_Char before the name is used to determine whether
362 an attribute has been specified. */
363 typedef struct attribute_id {
364 XML_Char *name;
365 PREFIX *prefix;
366 XML_Bool maybeTokenized;
367 XML_Bool xmlns;
368 } ATTRIBUTE_ID;
369
370 typedef struct {
371 const ATTRIBUTE_ID *id;
372 XML_Bool isCdata;
373 const XML_Char *value;
374 } DEFAULT_ATTRIBUTE;
375
376 typedef struct {
377 unsigned long version;
378 unsigned long hash;
379 const XML_Char *uriName;
380 } NS_ATT;
381
382 typedef struct {
383 const XML_Char *name;
384 PREFIX *prefix;
385 const ATTRIBUTE_ID *idAtt;
386 int nDefaultAtts;
387 int allocDefaultAtts;
388 DEFAULT_ATTRIBUTE *defaultAtts;
389 } ELEMENT_TYPE;
390
391 typedef struct {
392 HASH_TABLE generalEntities;
393 HASH_TABLE elementTypes;
394 HASH_TABLE attributeIds;
395 HASH_TABLE prefixes;
396 STRING_POOL pool;
397 STRING_POOL entityValuePool;
398 /* false once a parameter entity reference has been skipped */
399 XML_Bool keepProcessing;
400 /* true once an internal or external PE reference has been encountered;
401 this includes the reference to an external subset */
402 XML_Bool hasParamEntityRefs;
403 XML_Bool standalone;
404 #ifdef XML_DTD
405 /* indicates if external PE has been read */
406 XML_Bool paramEntityRead;
407 HASH_TABLE paramEntities;
408 #endif /* XML_DTD */
409 PREFIX defaultPrefix;
410 /* === scaffolding for building content model === */
411 XML_Bool in_eldecl;
412 CONTENT_SCAFFOLD *scaffold;
413 unsigned contentStringLen;
414 unsigned scaffSize;
415 unsigned scaffCount;
416 int scaffLevel;
417 int *scaffIndex;
418 } DTD;
419
420 typedef struct open_internal_entity {
421 const char *internalEventPtr;
422 const char *internalEventEndPtr;
423 struct open_internal_entity *next;
424 ENTITY *entity;
425 int startTagLevel;
426 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
427 } OPEN_INTERNAL_ENTITY;
428
429 enum XML_Account {
430 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
431 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
432 expansion */
433 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
434 };
435
436 #if XML_GE == 1
437 typedef unsigned long long XmlBigCount;
438 typedef struct accounting {
439 XmlBigCount countBytesDirect;
440 XmlBigCount countBytesIndirect;
441 unsigned long debugLevel;
442 float maximumAmplificationFactor; // >=1.0
443 unsigned long long activationThresholdBytes;
444 } ACCOUNTING;
445
446 typedef struct entity_stats {
447 unsigned int countEverOpened;
448 unsigned int currentDepth;
449 unsigned int maximumDepthSeen;
450 unsigned long debugLevel;
451 } ENTITY_STATS;
452 #endif /* XML_GE == 1 */
453
454 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
455 const char *end, const char **endPtr);
456
457 static Processor prologProcessor;
458 static Processor prologInitProcessor;
459 static Processor contentProcessor;
460 static Processor cdataSectionProcessor;
461 #ifdef XML_DTD
462 static Processor ignoreSectionProcessor;
463 static Processor externalParEntProcessor;
464 static Processor externalParEntInitProcessor;
465 static Processor entityValueProcessor;
466 static Processor entityValueInitProcessor;
467 #endif /* XML_DTD */
468 static Processor epilogProcessor;
469 static Processor errorProcessor;
470 static Processor externalEntityInitProcessor;
471 static Processor externalEntityInitProcessor2;
472 static Processor externalEntityInitProcessor3;
473 static Processor externalEntityContentProcessor;
474 static Processor internalEntityProcessor;
475
476 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
477 const XML_Char *encodingName);
478 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
479 const char *s, const char *next);
480 static enum XML_Error initializeEncoding(XML_Parser parser);
481 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
482 const char *s, const char *end, int tok,
483 const char *next, const char **nextPtr,
484 XML_Bool haveMore, XML_Bool allowClosingDoctype,
485 enum XML_Account account);
486 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
487 XML_Bool betweenDecl);
488 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
489 const ENCODING *enc, const char *start,
490 const char *end, const char **endPtr,
491 XML_Bool haveMore, enum XML_Account account);
492 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
493 const char **startPtr, const char *end,
494 const char **nextPtr, XML_Bool haveMore,
495 enum XML_Account account);
496 #ifdef XML_DTD
497 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
498 const char **startPtr, const char *end,
499 const char **nextPtr, XML_Bool haveMore);
500 #endif /* XML_DTD */
501
502 static void freeBindings(XML_Parser parser, BINDING *bindings);
503 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
504 const char *attStr, TAG_NAME *tagNamePtr,
505 BINDING **bindingsPtr,
506 enum XML_Account account);
507 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
508 const ATTRIBUTE_ID *attId, const XML_Char *uri,
509 BINDING **bindingsPtr);
510 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
511 XML_Bool isCdata, XML_Bool isId,
512 const XML_Char *value, XML_Parser parser);
513 static enum XML_Error storeAttributeValue(XML_Parser parser,
514 const ENCODING *enc, XML_Bool isCdata,
515 const char *ptr, const char *end,
516 STRING_POOL *pool,
517 enum XML_Account account);
518 static enum XML_Error appendAttributeValue(XML_Parser parser,
519 const ENCODING *enc,
520 XML_Bool isCdata, const char *ptr,
521 const char *end, STRING_POOL *pool,
522 enum XML_Account account);
523 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
524 const char *start, const char *end);
525 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
526 #if XML_GE == 1
527 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
528 const char *start, const char *end,
529 enum XML_Account account);
530 #else
531 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
532 #endif
533 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
534 const char *start, const char *end);
535 static int reportComment(XML_Parser parser, const ENCODING *enc,
536 const char *start, const char *end);
537 static void reportDefault(XML_Parser parser, const ENCODING *enc,
538 const char *start, const char *end);
539
540 static const XML_Char *getContext(XML_Parser parser);
541 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
542
543 static void FASTCALL normalizePublicId(XML_Char *s);
544
545 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
546 /* do not call if m_parentParser != NULL */
547 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
548 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
549 const XML_Memory_Handling_Suite *ms);
550 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
551 const XML_Memory_Handling_Suite *ms);
552 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
553 STRING_POOL *newPool, const HASH_TABLE *oldTable);
554 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
555 size_t createSize);
556 static void FASTCALL hashTableInit(HASH_TABLE *table,
557 const XML_Memory_Handling_Suite *ms);
558 static void FASTCALL hashTableClear(HASH_TABLE *table);
559 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
560 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
561 const HASH_TABLE *table);
562 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
563
564 static void FASTCALL poolInit(STRING_POOL *pool,
565 const XML_Memory_Handling_Suite *ms);
566 static void FASTCALL poolClear(STRING_POOL *pool);
567 static void FASTCALL poolDestroy(STRING_POOL *pool);
568 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
569 const char *ptr, const char *end);
570 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
571 const char *ptr, const char *end);
572 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
573 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
574 const XML_Char *s);
575 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
576 int n);
577 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
578 const XML_Char *s);
579
580 static int FASTCALL nextScaffoldPart(XML_Parser parser);
581 static XML_Content *build_model(XML_Parser parser);
582 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
583 const char *ptr, const char *end);
584
585 static XML_Char *copyString(const XML_Char *s,
586 const XML_Memory_Handling_Suite *memsuite);
587
588 static unsigned long generate_hash_secret_salt(XML_Parser parser);
589 static XML_Bool startParsing(XML_Parser parser);
590
591 static XML_Parser parserCreate(const XML_Char *encodingName,
592 const XML_Memory_Handling_Suite *memsuite,
593 const XML_Char *nameSep, DTD *dtd);
594
595 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
596
597 #if XML_GE == 1
598 static float accountingGetCurrentAmplification(XML_Parser rootParser);
599 static void accountingReportStats(XML_Parser originParser, const char *epilog);
600 static void accountingOnAbort(XML_Parser originParser);
601 static void accountingReportDiff(XML_Parser rootParser,
602 unsigned int levelsAwayFromRootParser,
603 const char *before, const char *after,
604 ptrdiff_t bytesMore, int source_line,
605 enum XML_Account account);
606 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
607 const char *before, const char *after,
608 int source_line,
609 enum XML_Account account);
610
611 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
612 const char *action, int sourceLine);
613 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
614 int sourceLine);
615 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
616 int sourceLine);
617
618 static XML_Parser getRootParserOf(XML_Parser parser,
619 unsigned int *outLevelDiff);
620 #endif /* XML_GE == 1 */
621
622 static unsigned long getDebugLevel(const char *variableName,
623 unsigned long defaultDebugLevel);
624
625 #define poolStart(pool) ((pool)->start)
626 #define poolLength(pool) ((pool)->ptr - (pool)->start)
627 #define poolChop(pool) ((void)--(pool->ptr))
628 #define poolLastChar(pool) (((pool)->ptr)[-1])
629 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
630 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
631 #define poolAppendChar(pool, c) \
632 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
633 ? 0 \
634 : ((*((pool)->ptr)++ = c), 1))
635
636 #if ! defined(XML_TESTING)
637 const
638 #endif
639 XML_Bool g_reparseDeferralEnabledDefault
640 = XML_TRUE; // write ONLY in runtests.c
641 #if defined(XML_TESTING)
642 unsigned int g_bytesScanned = 0; // used for testing only
643 #endif
644
645 struct XML_ParserStruct {
646 /* The first member must be m_userData so that the XML_GetUserData
647 macro works. */
648 void *m_userData;
649 void *m_handlerArg;
650
651 // How the four parse buffer pointers below relate in time and space:
652 //
653 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
654 // | | | |
655 // <--parsed-->| | |
656 // <---parsing--->| |
657 // <--unoccupied-->|
658 // <---------total-malloced/realloced-------->|
659
660 char *m_buffer; // malloc/realloc base pointer of parse buffer
661 const XML_Memory_Handling_Suite m_mem;
662 const char *m_bufferPtr; // first character to be parsed
663 char *m_bufferEnd; // past last character to be parsed
664 const char *m_bufferLim; // allocated end of m_buffer
665
666 XML_Index m_parseEndByteIndex;
667 const char *m_parseEndPtr;
668 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
669 XML_Bool m_reparseDeferralEnabled;
670 int m_lastBufferRequestSize;
671 XML_Char *m_dataBuf;
672 XML_Char *m_dataBufEnd;
673 XML_StartElementHandler m_startElementHandler;
674 XML_EndElementHandler m_endElementHandler;
675 XML_CharacterDataHandler m_characterDataHandler;
676 XML_ProcessingInstructionHandler m_processingInstructionHandler;
677 XML_CommentHandler m_commentHandler;
678 XML_StartCdataSectionHandler m_startCdataSectionHandler;
679 XML_EndCdataSectionHandler m_endCdataSectionHandler;
680 XML_DefaultHandler m_defaultHandler;
681 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
682 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
683 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
684 XML_NotationDeclHandler m_notationDeclHandler;
685 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
686 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
687 XML_NotStandaloneHandler m_notStandaloneHandler;
688 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
689 XML_Parser m_externalEntityRefHandlerArg;
690 XML_SkippedEntityHandler m_skippedEntityHandler;
691 XML_UnknownEncodingHandler m_unknownEncodingHandler;
692 XML_ElementDeclHandler m_elementDeclHandler;
693 XML_AttlistDeclHandler m_attlistDeclHandler;
694 XML_EntityDeclHandler m_entityDeclHandler;
695 XML_XmlDeclHandler m_xmlDeclHandler;
696 const ENCODING *m_encoding;
697 INIT_ENCODING m_initEncoding;
698 const ENCODING *m_internalEncoding;
699 const XML_Char *m_protocolEncodingName;
700 XML_Bool m_ns;
701 XML_Bool m_ns_triplets;
702 void *m_unknownEncodingMem;
703 void *m_unknownEncodingData;
704 void *m_unknownEncodingHandlerData;
705 void(XMLCALL *m_unknownEncodingRelease)(void *);
706 PROLOG_STATE m_prologState;
707 Processor *m_processor;
708 enum XML_Error m_errorCode;
709 const char *m_eventPtr;
710 const char *m_eventEndPtr;
711 const char *m_positionPtr;
712 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
713 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
714 XML_Bool m_defaultExpandInternalEntities;
715 int m_tagLevel;
716 ENTITY *m_declEntity;
717 const XML_Char *m_doctypeName;
718 const XML_Char *m_doctypeSysid;
719 const XML_Char *m_doctypePubid;
720 const XML_Char *m_declAttributeType;
721 const XML_Char *m_declNotationName;
722 const XML_Char *m_declNotationPublicId;
723 ELEMENT_TYPE *m_declElementType;
724 ATTRIBUTE_ID *m_declAttributeId;
725 XML_Bool m_declAttributeIsCdata;
726 XML_Bool m_declAttributeIsId;
727 DTD *m_dtd;
728 const XML_Char *m_curBase;
729 TAG *m_tagStack;
730 TAG *m_freeTagList;
731 BINDING *m_inheritedBindings;
732 BINDING *m_freeBindingList;
733 int m_attsSize;
734 int m_nSpecifiedAtts;
735 int m_idAttIndex;
736 ATTRIBUTE *m_atts;
737 NS_ATT *m_nsAtts;
738 unsigned long m_nsAttsVersion;
739 unsigned char m_nsAttsPower;
740 #ifdef XML_ATTR_INFO
741 XML_AttrInfo *m_attInfo;
742 #endif
743 POSITION m_position;
744 STRING_POOL m_tempPool;
745 STRING_POOL m_temp2Pool;
746 char *m_groupConnector;
747 unsigned int m_groupSize;
748 XML_Char m_namespaceSeparator;
749 XML_Parser m_parentParser;
750 XML_ParsingStatus m_parsingStatus;
751 #ifdef XML_DTD
752 XML_Bool m_isParamEntity;
753 XML_Bool m_useForeignDTD;
754 enum XML_ParamEntityParsing m_paramEntityParsing;
755 #endif
756 unsigned long m_hash_secret_salt;
757 #if XML_GE == 1
758 ACCOUNTING m_accounting;
759 ENTITY_STATS m_entity_stats;
760 #endif
761 };
762
763 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
764 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
765 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
766
767 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)768 XML_ParserCreate(const XML_Char *encodingName) {
769 return XML_ParserCreate_MM(encodingName, NULL, NULL);
770 }
771
772 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)773 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
774 XML_Char tmp[2] = {nsSep, 0};
775 return XML_ParserCreate_MM(encodingName, NULL, tmp);
776 }
777
778 // "xml=http://www.w3.org/XML/1998/namespace"
779 static const XML_Char implicitContext[]
780 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
781 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
782 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
783 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
784 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
785 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
786 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
787 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
788 '\0'};
789
790 /* To avoid warnings about unused functions: */
791 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
792
793 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
794
795 /* Obtain entropy on Linux 3.17+ */
796 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)797 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
798 int success = 0; /* full count bytes written? */
799 size_t bytesWrittenTotal = 0;
800 const unsigned int getrandomFlags = GRND_NONBLOCK;
801
802 do {
803 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
804 const size_t bytesToWrite = count - bytesWrittenTotal;
805
806 const int bytesWrittenMore =
807 # if defined(HAVE_GETRANDOM)
808 getrandom(currentTarget, bytesToWrite, getrandomFlags);
809 # else
810 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
811 # endif
812
813 if (bytesWrittenMore > 0) {
814 bytesWrittenTotal += bytesWrittenMore;
815 if (bytesWrittenTotal >= count)
816 success = 1;
817 }
818 } while (! success && (errno == EINTR));
819
820 return success;
821 }
822
823 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
824
825 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
826
827 /* Extract entropy from /dev/urandom */
828 static int
writeRandomBytes_dev_urandom(void * target,size_t count)829 writeRandomBytes_dev_urandom(void *target, size_t count) {
830 int success = 0; /* full count bytes written? */
831 size_t bytesWrittenTotal = 0;
832
833 const int fd = open("/dev/urandom", O_RDONLY);
834 if (fd < 0) {
835 return 0;
836 }
837
838 do {
839 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
840 const size_t bytesToWrite = count - bytesWrittenTotal;
841
842 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
843
844 if (bytesWrittenMore > 0) {
845 bytesWrittenTotal += bytesWrittenMore;
846 if (bytesWrittenTotal >= count)
847 success = 1;
848 }
849 } while (! success && (errno == EINTR));
850
851 close(fd);
852 return success;
853 }
854
855 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
856
857 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
858
859 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
860
861 static void
writeRandomBytes_arc4random(void * target,size_t count)862 writeRandomBytes_arc4random(void *target, size_t count) {
863 size_t bytesWrittenTotal = 0;
864
865 while (bytesWrittenTotal < count) {
866 const uint32_t random32 = arc4random();
867 size_t i = 0;
868
869 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
870 i++, bytesWrittenTotal++) {
871 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
872 ((uint8_t *)target)[bytesWrittenTotal] = random8;
873 }
874 }
875 }
876
877 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
878
879 #ifdef _WIN32
880
881 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
882 as it didn't declare it in its header prior to version 5.3.0 of its
883 runtime package (mingwrt, containing stdlib.h). The upstream fix
884 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
885 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
886 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
887 __declspec(dllimport) int rand_s(unsigned int *);
888 # endif
889
890 /* Obtain entropy on Windows using the rand_s() function which
891 * generates cryptographically secure random numbers. Internally it
892 * uses RtlGenRandom API which is present in Windows XP and later.
893 */
894 static int
writeRandomBytes_rand_s(void * target,size_t count)895 writeRandomBytes_rand_s(void *target, size_t count) {
896 size_t bytesWrittenTotal = 0;
897
898 while (bytesWrittenTotal < count) {
899 unsigned int random32 = 0;
900 size_t i = 0;
901
902 if (rand_s(&random32))
903 return 0; /* failure */
904
905 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
906 i++, bytesWrittenTotal++) {
907 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
908 ((uint8_t *)target)[bytesWrittenTotal] = random8;
909 }
910 }
911 return 1; /* success */
912 }
913
914 #endif /* _WIN32 */
915
916 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
917
918 static unsigned long
gather_time_entropy(void)919 gather_time_entropy(void) {
920 # ifdef _WIN32
921 FILETIME ft;
922 GetSystemTimeAsFileTime(&ft); /* never fails */
923 return ft.dwHighDateTime ^ ft.dwLowDateTime;
924 # else
925 struct timeval tv;
926 int gettimeofday_res;
927
928 gettimeofday_res = gettimeofday(&tv, NULL);
929
930 # if defined(NDEBUG)
931 (void)gettimeofday_res;
932 # else
933 assert(gettimeofday_res == 0);
934 # endif /* defined(NDEBUG) */
935
936 /* Microseconds time is <20 bits entropy */
937 return tv.tv_usec;
938 # endif
939 }
940
941 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
942
943 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)944 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
945 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
946 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
947 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
948 }
949 return entropy;
950 }
951
952 static unsigned long
generate_hash_secret_salt(XML_Parser parser)953 generate_hash_secret_salt(XML_Parser parser) {
954 unsigned long entropy;
955 (void)parser;
956
957 /* "Failproof" high quality providers: */
958 #if defined(HAVE_ARC4RANDOM_BUF)
959 arc4random_buf(&entropy, sizeof(entropy));
960 return ENTROPY_DEBUG("arc4random_buf", entropy);
961 #elif defined(HAVE_ARC4RANDOM)
962 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
963 return ENTROPY_DEBUG("arc4random", entropy);
964 #else
965 /* Try high quality providers first .. */
966 # ifdef _WIN32
967 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
968 return ENTROPY_DEBUG("rand_s", entropy);
969 }
970 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
971 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
972 return ENTROPY_DEBUG("getrandom", entropy);
973 }
974 # endif
975 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
976 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
977 return ENTROPY_DEBUG("/dev/urandom", entropy);
978 }
979 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
980 /* .. and self-made low quality for backup: */
981
982 /* Process ID is 0 bits entropy if attacker has local access */
983 entropy = gather_time_entropy() ^ getpid();
984
985 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
986 if (sizeof(unsigned long) == 4) {
987 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
988 } else {
989 return ENTROPY_DEBUG("fallback(8)",
990 entropy * (unsigned long)2305843009213693951ULL);
991 }
992 #endif
993 }
994
995 static unsigned long
get_hash_secret_salt(XML_Parser parser)996 get_hash_secret_salt(XML_Parser parser) {
997 if (parser->m_parentParser != NULL)
998 return get_hash_secret_salt(parser->m_parentParser);
999 return parser->m_hash_secret_salt;
1000 }
1001
1002 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1003 callProcessor(XML_Parser parser, const char *start, const char *end,
1004 const char **endPtr) {
1005 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1006
1007 if (parser->m_reparseDeferralEnabled
1008 && ! parser->m_parsingStatus.finalBuffer) {
1009 // Heuristic: don't try to parse a partial token again until the amount of
1010 // available data has increased significantly.
1011 const size_t had_before = parser->m_partialTokenBytesBefore;
1012 // ...but *do* try anyway if we're close to causing a reallocation.
1013 size_t available_buffer
1014 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1015 #if XML_CONTEXT_BYTES > 0
1016 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1017 #endif
1018 available_buffer
1019 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1020 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1021 const bool enough
1022 = (have_now >= 2 * had_before)
1023 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1024
1025 if (! enough) {
1026 *endPtr = start; // callers may expect this to be set
1027 return XML_ERROR_NONE;
1028 }
1029 }
1030 #if defined(XML_TESTING)
1031 g_bytesScanned += (unsigned)have_now;
1032 #endif
1033 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1034 if (ret == XML_ERROR_NONE) {
1035 // if we consumed nothing, remember what we had on this parse attempt.
1036 if (*endPtr == start) {
1037 parser->m_partialTokenBytesBefore = have_now;
1038 } else {
1039 parser->m_partialTokenBytesBefore = 0;
1040 }
1041 }
1042 return ret;
1043 }
1044
1045 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1046 startParsing(XML_Parser parser) {
1047 /* hash functions must be initialized before setContext() is called */
1048 if (parser->m_hash_secret_salt == 0)
1049 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1050 if (parser->m_ns) {
1051 /* implicit context only set for root parser, since child
1052 parsers (i.e. external entity parsers) will inherit it
1053 */
1054 return setContext(parser, implicitContext);
1055 }
1056 return XML_TRUE;
1057 }
1058
1059 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1060 XML_ParserCreate_MM(const XML_Char *encodingName,
1061 const XML_Memory_Handling_Suite *memsuite,
1062 const XML_Char *nameSep) {
1063 return parserCreate(encodingName, memsuite, nameSep, NULL);
1064 }
1065
1066 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1067 parserCreate(const XML_Char *encodingName,
1068 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1069 DTD *dtd) {
1070 XML_Parser parser;
1071
1072 if (memsuite) {
1073 XML_Memory_Handling_Suite *mtemp;
1074 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1075 if (parser != NULL) {
1076 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1077 mtemp->malloc_fcn = memsuite->malloc_fcn;
1078 mtemp->realloc_fcn = memsuite->realloc_fcn;
1079 mtemp->free_fcn = memsuite->free_fcn;
1080 }
1081 } else {
1082 XML_Memory_Handling_Suite *mtemp;
1083 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1084 if (parser != NULL) {
1085 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1086 mtemp->malloc_fcn = malloc;
1087 mtemp->realloc_fcn = realloc;
1088 mtemp->free_fcn = free;
1089 }
1090 }
1091
1092 if (! parser)
1093 return parser;
1094
1095 parser->m_buffer = NULL;
1096 parser->m_bufferLim = NULL;
1097
1098 parser->m_attsSize = INIT_ATTS_SIZE;
1099 parser->m_atts
1100 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1101 if (parser->m_atts == NULL) {
1102 FREE(parser, parser);
1103 return NULL;
1104 }
1105 #ifdef XML_ATTR_INFO
1106 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1107 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1108 if (parser->m_attInfo == NULL) {
1109 FREE(parser, parser->m_atts);
1110 FREE(parser, parser);
1111 return NULL;
1112 }
1113 #endif
1114 parser->m_dataBuf
1115 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1116 if (parser->m_dataBuf == NULL) {
1117 FREE(parser, parser->m_atts);
1118 #ifdef XML_ATTR_INFO
1119 FREE(parser, parser->m_attInfo);
1120 #endif
1121 FREE(parser, parser);
1122 return NULL;
1123 }
1124 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1125
1126 if (dtd)
1127 parser->m_dtd = dtd;
1128 else {
1129 parser->m_dtd = dtdCreate(&parser->m_mem);
1130 if (parser->m_dtd == NULL) {
1131 FREE(parser, parser->m_dataBuf);
1132 FREE(parser, parser->m_atts);
1133 #ifdef XML_ATTR_INFO
1134 FREE(parser, parser->m_attInfo);
1135 #endif
1136 FREE(parser, parser);
1137 return NULL;
1138 }
1139 }
1140
1141 parser->m_freeBindingList = NULL;
1142 parser->m_freeTagList = NULL;
1143 parser->m_freeInternalEntities = NULL;
1144
1145 parser->m_groupSize = 0;
1146 parser->m_groupConnector = NULL;
1147
1148 parser->m_unknownEncodingHandler = NULL;
1149 parser->m_unknownEncodingHandlerData = NULL;
1150
1151 parser->m_namespaceSeparator = ASCII_EXCL;
1152 parser->m_ns = XML_FALSE;
1153 parser->m_ns_triplets = XML_FALSE;
1154
1155 parser->m_nsAtts = NULL;
1156 parser->m_nsAttsVersion = 0;
1157 parser->m_nsAttsPower = 0;
1158
1159 parser->m_protocolEncodingName = NULL;
1160
1161 poolInit(&parser->m_tempPool, &(parser->m_mem));
1162 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1163 parserInit(parser, encodingName);
1164
1165 if (encodingName && ! parser->m_protocolEncodingName) {
1166 if (dtd) {
1167 // We need to stop the upcoming call to XML_ParserFree from happily
1168 // destroying parser->m_dtd because the DTD is shared with the parent
1169 // parser and the only guard that keeps XML_ParserFree from destroying
1170 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1171 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1172 parser->m_dtd = NULL;
1173 }
1174 XML_ParserFree(parser);
1175 return NULL;
1176 }
1177
1178 if (nameSep) {
1179 parser->m_ns = XML_TRUE;
1180 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1181 parser->m_namespaceSeparator = *nameSep;
1182 } else {
1183 parser->m_internalEncoding = XmlGetInternalEncoding();
1184 }
1185
1186 return parser;
1187 }
1188
1189 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1190 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1191 parser->m_processor = prologInitProcessor;
1192 XmlPrologStateInit(&parser->m_prologState);
1193 if (encodingName != NULL) {
1194 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1195 }
1196 parser->m_curBase = NULL;
1197 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1198 parser->m_userData = NULL;
1199 parser->m_handlerArg = NULL;
1200 parser->m_startElementHandler = NULL;
1201 parser->m_endElementHandler = NULL;
1202 parser->m_characterDataHandler = NULL;
1203 parser->m_processingInstructionHandler = NULL;
1204 parser->m_commentHandler = NULL;
1205 parser->m_startCdataSectionHandler = NULL;
1206 parser->m_endCdataSectionHandler = NULL;
1207 parser->m_defaultHandler = NULL;
1208 parser->m_startDoctypeDeclHandler = NULL;
1209 parser->m_endDoctypeDeclHandler = NULL;
1210 parser->m_unparsedEntityDeclHandler = NULL;
1211 parser->m_notationDeclHandler = NULL;
1212 parser->m_startNamespaceDeclHandler = NULL;
1213 parser->m_endNamespaceDeclHandler = NULL;
1214 parser->m_notStandaloneHandler = NULL;
1215 parser->m_externalEntityRefHandler = NULL;
1216 parser->m_externalEntityRefHandlerArg = parser;
1217 parser->m_skippedEntityHandler = NULL;
1218 parser->m_elementDeclHandler = NULL;
1219 parser->m_attlistDeclHandler = NULL;
1220 parser->m_entityDeclHandler = NULL;
1221 parser->m_xmlDeclHandler = NULL;
1222 parser->m_bufferPtr = parser->m_buffer;
1223 parser->m_bufferEnd = parser->m_buffer;
1224 parser->m_parseEndByteIndex = 0;
1225 parser->m_parseEndPtr = NULL;
1226 parser->m_partialTokenBytesBefore = 0;
1227 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1228 parser->m_lastBufferRequestSize = 0;
1229 parser->m_declElementType = NULL;
1230 parser->m_declAttributeId = NULL;
1231 parser->m_declEntity = NULL;
1232 parser->m_doctypeName = NULL;
1233 parser->m_doctypeSysid = NULL;
1234 parser->m_doctypePubid = NULL;
1235 parser->m_declAttributeType = NULL;
1236 parser->m_declNotationName = NULL;
1237 parser->m_declNotationPublicId = NULL;
1238 parser->m_declAttributeIsCdata = XML_FALSE;
1239 parser->m_declAttributeIsId = XML_FALSE;
1240 memset(&parser->m_position, 0, sizeof(POSITION));
1241 parser->m_errorCode = XML_ERROR_NONE;
1242 parser->m_eventPtr = NULL;
1243 parser->m_eventEndPtr = NULL;
1244 parser->m_positionPtr = NULL;
1245 parser->m_openInternalEntities = NULL;
1246 parser->m_defaultExpandInternalEntities = XML_TRUE;
1247 parser->m_tagLevel = 0;
1248 parser->m_tagStack = NULL;
1249 parser->m_inheritedBindings = NULL;
1250 parser->m_nSpecifiedAtts = 0;
1251 parser->m_unknownEncodingMem = NULL;
1252 parser->m_unknownEncodingRelease = NULL;
1253 parser->m_unknownEncodingData = NULL;
1254 parser->m_parentParser = NULL;
1255 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1256 #ifdef XML_DTD
1257 parser->m_isParamEntity = XML_FALSE;
1258 parser->m_useForeignDTD = XML_FALSE;
1259 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1260 #endif
1261 parser->m_hash_secret_salt = 0;
1262
1263 #if XML_GE == 1
1264 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1265 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1266 parser->m_accounting.maximumAmplificationFactor
1267 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1268 parser->m_accounting.activationThresholdBytes
1269 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1270
1271 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1272 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1273 #endif
1274 }
1275
1276 /* moves list of bindings to m_freeBindingList */
1277 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1278 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1279 while (bindings) {
1280 BINDING *b = bindings;
1281 bindings = bindings->nextTagBinding;
1282 b->nextTagBinding = parser->m_freeBindingList;
1283 parser->m_freeBindingList = b;
1284 }
1285 }
1286
1287 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1288 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1289 TAG *tStk;
1290 OPEN_INTERNAL_ENTITY *openEntityList;
1291
1292 if (parser == NULL)
1293 return XML_FALSE;
1294
1295 if (parser->m_parentParser)
1296 return XML_FALSE;
1297 /* move m_tagStack to m_freeTagList */
1298 tStk = parser->m_tagStack;
1299 while (tStk) {
1300 TAG *tag = tStk;
1301 tStk = tStk->parent;
1302 tag->parent = parser->m_freeTagList;
1303 moveToFreeBindingList(parser, tag->bindings);
1304 tag->bindings = NULL;
1305 parser->m_freeTagList = tag;
1306 }
1307 /* move m_openInternalEntities to m_freeInternalEntities */
1308 openEntityList = parser->m_openInternalEntities;
1309 while (openEntityList) {
1310 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1311 openEntityList = openEntity->next;
1312 openEntity->next = parser->m_freeInternalEntities;
1313 parser->m_freeInternalEntities = openEntity;
1314 }
1315 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1316 FREE(parser, parser->m_unknownEncodingMem);
1317 if (parser->m_unknownEncodingRelease)
1318 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1319 poolClear(&parser->m_tempPool);
1320 poolClear(&parser->m_temp2Pool);
1321 FREE(parser, (void *)parser->m_protocolEncodingName);
1322 parser->m_protocolEncodingName = NULL;
1323 parserInit(parser, encodingName);
1324 dtdReset(parser->m_dtd, &parser->m_mem);
1325 return XML_TRUE;
1326 }
1327
1328 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1329 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1330 if (parser == NULL)
1331 return XML_STATUS_ERROR;
1332 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1333 XXX There's no way for the caller to determine which of the
1334 XXX possible error cases caused the XML_STATUS_ERROR return.
1335 */
1336 if (parser->m_parsingStatus.parsing == XML_PARSING
1337 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1338 return XML_STATUS_ERROR;
1339
1340 /* Get rid of any previous encoding name */
1341 FREE(parser, (void *)parser->m_protocolEncodingName);
1342
1343 if (encodingName == NULL)
1344 /* No new encoding name */
1345 parser->m_protocolEncodingName = NULL;
1346 else {
1347 /* Copy the new encoding name into allocated memory */
1348 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1349 if (! parser->m_protocolEncodingName)
1350 return XML_STATUS_ERROR;
1351 }
1352 return XML_STATUS_OK;
1353 }
1354
1355 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1356 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1357 const XML_Char *encodingName) {
1358 XML_Parser parser = oldParser;
1359 DTD *newDtd = NULL;
1360 DTD *oldDtd;
1361 XML_StartElementHandler oldStartElementHandler;
1362 XML_EndElementHandler oldEndElementHandler;
1363 XML_CharacterDataHandler oldCharacterDataHandler;
1364 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1365 XML_CommentHandler oldCommentHandler;
1366 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1367 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1368 XML_DefaultHandler oldDefaultHandler;
1369 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1370 XML_NotationDeclHandler oldNotationDeclHandler;
1371 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1372 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1373 XML_NotStandaloneHandler oldNotStandaloneHandler;
1374 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1375 XML_SkippedEntityHandler oldSkippedEntityHandler;
1376 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1377 XML_ElementDeclHandler oldElementDeclHandler;
1378 XML_AttlistDeclHandler oldAttlistDeclHandler;
1379 XML_EntityDeclHandler oldEntityDeclHandler;
1380 XML_XmlDeclHandler oldXmlDeclHandler;
1381 ELEMENT_TYPE *oldDeclElementType;
1382
1383 void *oldUserData;
1384 void *oldHandlerArg;
1385 XML_Bool oldDefaultExpandInternalEntities;
1386 XML_Parser oldExternalEntityRefHandlerArg;
1387 #ifdef XML_DTD
1388 enum XML_ParamEntityParsing oldParamEntityParsing;
1389 int oldInEntityValue;
1390 #endif
1391 XML_Bool oldns_triplets;
1392 /* Note that the new parser shares the same hash secret as the old
1393 parser, so that dtdCopy and copyEntityTable can lookup values
1394 from hash tables associated with either parser without us having
1395 to worry which hash secrets each table has.
1396 */
1397 unsigned long oldhash_secret_salt;
1398 XML_Bool oldReparseDeferralEnabled;
1399
1400 /* Validate the oldParser parameter before we pull everything out of it */
1401 if (oldParser == NULL)
1402 return NULL;
1403
1404 /* Stash the original parser contents on the stack */
1405 oldDtd = parser->m_dtd;
1406 oldStartElementHandler = parser->m_startElementHandler;
1407 oldEndElementHandler = parser->m_endElementHandler;
1408 oldCharacterDataHandler = parser->m_characterDataHandler;
1409 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1410 oldCommentHandler = parser->m_commentHandler;
1411 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1412 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1413 oldDefaultHandler = parser->m_defaultHandler;
1414 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1415 oldNotationDeclHandler = parser->m_notationDeclHandler;
1416 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1417 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1418 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1419 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1420 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1421 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1422 oldElementDeclHandler = parser->m_elementDeclHandler;
1423 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1424 oldEntityDeclHandler = parser->m_entityDeclHandler;
1425 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1426 oldDeclElementType = parser->m_declElementType;
1427
1428 oldUserData = parser->m_userData;
1429 oldHandlerArg = parser->m_handlerArg;
1430 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1431 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1432 #ifdef XML_DTD
1433 oldParamEntityParsing = parser->m_paramEntityParsing;
1434 oldInEntityValue = parser->m_prologState.inEntityValue;
1435 #endif
1436 oldns_triplets = parser->m_ns_triplets;
1437 /* Note that the new parser shares the same hash secret as the old
1438 parser, so that dtdCopy and copyEntityTable can lookup values
1439 from hash tables associated with either parser without us having
1440 to worry which hash secrets each table has.
1441 */
1442 oldhash_secret_salt = parser->m_hash_secret_salt;
1443 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1444
1445 #ifdef XML_DTD
1446 if (! context)
1447 newDtd = oldDtd;
1448 #endif /* XML_DTD */
1449
1450 /* Note that the magical uses of the pre-processor to make field
1451 access look more like C++ require that `parser' be overwritten
1452 here. This makes this function more painful to follow than it
1453 would be otherwise.
1454 */
1455 if (parser->m_ns) {
1456 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1457 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1458 } else {
1459 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1460 }
1461
1462 if (! parser)
1463 return NULL;
1464
1465 parser->m_startElementHandler = oldStartElementHandler;
1466 parser->m_endElementHandler = oldEndElementHandler;
1467 parser->m_characterDataHandler = oldCharacterDataHandler;
1468 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1469 parser->m_commentHandler = oldCommentHandler;
1470 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1471 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1472 parser->m_defaultHandler = oldDefaultHandler;
1473 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1474 parser->m_notationDeclHandler = oldNotationDeclHandler;
1475 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1476 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1477 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1478 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1479 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1480 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1481 parser->m_elementDeclHandler = oldElementDeclHandler;
1482 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1483 parser->m_entityDeclHandler = oldEntityDeclHandler;
1484 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1485 parser->m_declElementType = oldDeclElementType;
1486 parser->m_userData = oldUserData;
1487 if (oldUserData == oldHandlerArg)
1488 parser->m_handlerArg = parser->m_userData;
1489 else
1490 parser->m_handlerArg = parser;
1491 if (oldExternalEntityRefHandlerArg != oldParser)
1492 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1493 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1494 parser->m_ns_triplets = oldns_triplets;
1495 parser->m_hash_secret_salt = oldhash_secret_salt;
1496 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1497 parser->m_parentParser = oldParser;
1498 #ifdef XML_DTD
1499 parser->m_paramEntityParsing = oldParamEntityParsing;
1500 parser->m_prologState.inEntityValue = oldInEntityValue;
1501 if (context) {
1502 #endif /* XML_DTD */
1503 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1504 || ! setContext(parser, context)) {
1505 XML_ParserFree(parser);
1506 return NULL;
1507 }
1508 parser->m_processor = externalEntityInitProcessor;
1509 #ifdef XML_DTD
1510 } else {
1511 /* The DTD instance referenced by parser->m_dtd is shared between the
1512 document's root parser and external PE parsers, therefore one does not
1513 need to call setContext. In addition, one also *must* not call
1514 setContext, because this would overwrite existing prefix->binding
1515 pointers in parser->m_dtd with ones that get destroyed with the external
1516 PE parser. This would leave those prefixes with dangling pointers.
1517 */
1518 parser->m_isParamEntity = XML_TRUE;
1519 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1520 parser->m_processor = externalParEntInitProcessor;
1521 }
1522 #endif /* XML_DTD */
1523 return parser;
1524 }
1525
1526 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1527 destroyBindings(BINDING *bindings, XML_Parser parser) {
1528 for (;;) {
1529 BINDING *b = bindings;
1530 if (! b)
1531 break;
1532 bindings = b->nextTagBinding;
1533 FREE(parser, b->uri);
1534 FREE(parser, b);
1535 }
1536 }
1537
1538 void XMLCALL
XML_ParserFree(XML_Parser parser)1539 XML_ParserFree(XML_Parser parser) {
1540 TAG *tagList;
1541 OPEN_INTERNAL_ENTITY *entityList;
1542 if (parser == NULL)
1543 return;
1544 /* free m_tagStack and m_freeTagList */
1545 tagList = parser->m_tagStack;
1546 for (;;) {
1547 TAG *p;
1548 if (tagList == NULL) {
1549 if (parser->m_freeTagList == NULL)
1550 break;
1551 tagList = parser->m_freeTagList;
1552 parser->m_freeTagList = NULL;
1553 }
1554 p = tagList;
1555 tagList = tagList->parent;
1556 FREE(parser, p->buf);
1557 destroyBindings(p->bindings, parser);
1558 FREE(parser, p);
1559 }
1560 /* free m_openInternalEntities and m_freeInternalEntities */
1561 entityList = parser->m_openInternalEntities;
1562 for (;;) {
1563 OPEN_INTERNAL_ENTITY *openEntity;
1564 if (entityList == NULL) {
1565 if (parser->m_freeInternalEntities == NULL)
1566 break;
1567 entityList = parser->m_freeInternalEntities;
1568 parser->m_freeInternalEntities = NULL;
1569 }
1570 openEntity = entityList;
1571 entityList = entityList->next;
1572 FREE(parser, openEntity);
1573 }
1574
1575 destroyBindings(parser->m_freeBindingList, parser);
1576 destroyBindings(parser->m_inheritedBindings, parser);
1577 poolDestroy(&parser->m_tempPool);
1578 poolDestroy(&parser->m_temp2Pool);
1579 FREE(parser, (void *)parser->m_protocolEncodingName);
1580 #ifdef XML_DTD
1581 /* external parameter entity parsers share the DTD structure
1582 parser->m_dtd with the root parser, so we must not destroy it
1583 */
1584 if (! parser->m_isParamEntity && parser->m_dtd)
1585 #else
1586 if (parser->m_dtd)
1587 #endif /* XML_DTD */
1588 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1589 &parser->m_mem);
1590 FREE(parser, (void *)parser->m_atts);
1591 #ifdef XML_ATTR_INFO
1592 FREE(parser, (void *)parser->m_attInfo);
1593 #endif
1594 FREE(parser, parser->m_groupConnector);
1595 FREE(parser, parser->m_buffer);
1596 FREE(parser, parser->m_dataBuf);
1597 FREE(parser, parser->m_nsAtts);
1598 FREE(parser, parser->m_unknownEncodingMem);
1599 if (parser->m_unknownEncodingRelease)
1600 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1601 FREE(parser, parser);
1602 }
1603
1604 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1605 XML_UseParserAsHandlerArg(XML_Parser parser) {
1606 if (parser != NULL)
1607 parser->m_handlerArg = parser;
1608 }
1609
1610 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1611 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1612 if (parser == NULL)
1613 return XML_ERROR_INVALID_ARGUMENT;
1614 #ifdef XML_DTD
1615 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1616 if (parser->m_parsingStatus.parsing == XML_PARSING
1617 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1618 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1619 parser->m_useForeignDTD = useDTD;
1620 return XML_ERROR_NONE;
1621 #else
1622 UNUSED_P(useDTD);
1623 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1624 #endif
1625 }
1626
1627 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1628 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1629 if (parser == NULL)
1630 return;
1631 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1632 if (parser->m_parsingStatus.parsing == XML_PARSING
1633 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1634 return;
1635 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1636 }
1637
1638 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1639 XML_SetUserData(XML_Parser parser, void *p) {
1640 if (parser == NULL)
1641 return;
1642 if (parser->m_handlerArg == parser->m_userData)
1643 parser->m_handlerArg = parser->m_userData = p;
1644 else
1645 parser->m_userData = p;
1646 }
1647
1648 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1649 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1650 if (parser == NULL)
1651 return XML_STATUS_ERROR;
1652 if (p) {
1653 p = poolCopyString(&parser->m_dtd->pool, p);
1654 if (! p)
1655 return XML_STATUS_ERROR;
1656 parser->m_curBase = p;
1657 } else
1658 parser->m_curBase = NULL;
1659 return XML_STATUS_OK;
1660 }
1661
1662 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1663 XML_GetBase(XML_Parser parser) {
1664 if (parser == NULL)
1665 return NULL;
1666 return parser->m_curBase;
1667 }
1668
1669 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1670 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1671 if (parser == NULL)
1672 return -1;
1673 return parser->m_nSpecifiedAtts;
1674 }
1675
1676 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1677 XML_GetIdAttributeIndex(XML_Parser parser) {
1678 if (parser == NULL)
1679 return -1;
1680 return parser->m_idAttIndex;
1681 }
1682
1683 #ifdef XML_ATTR_INFO
1684 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1685 XML_GetAttributeInfo(XML_Parser parser) {
1686 if (parser == NULL)
1687 return NULL;
1688 return parser->m_attInfo;
1689 }
1690 #endif
1691
1692 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1693 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1694 XML_EndElementHandler end) {
1695 if (parser == NULL)
1696 return;
1697 parser->m_startElementHandler = start;
1698 parser->m_endElementHandler = end;
1699 }
1700
1701 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1702 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1703 if (parser != NULL)
1704 parser->m_startElementHandler = start;
1705 }
1706
1707 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1708 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1709 if (parser != NULL)
1710 parser->m_endElementHandler = end;
1711 }
1712
1713 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1714 XML_SetCharacterDataHandler(XML_Parser parser,
1715 XML_CharacterDataHandler handler) {
1716 if (parser != NULL)
1717 parser->m_characterDataHandler = handler;
1718 }
1719
1720 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1721 XML_SetProcessingInstructionHandler(XML_Parser parser,
1722 XML_ProcessingInstructionHandler handler) {
1723 if (parser != NULL)
1724 parser->m_processingInstructionHandler = handler;
1725 }
1726
1727 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1728 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1729 if (parser != NULL)
1730 parser->m_commentHandler = handler;
1731 }
1732
1733 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1734 XML_SetCdataSectionHandler(XML_Parser parser,
1735 XML_StartCdataSectionHandler start,
1736 XML_EndCdataSectionHandler end) {
1737 if (parser == NULL)
1738 return;
1739 parser->m_startCdataSectionHandler = start;
1740 parser->m_endCdataSectionHandler = end;
1741 }
1742
1743 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1744 XML_SetStartCdataSectionHandler(XML_Parser parser,
1745 XML_StartCdataSectionHandler start) {
1746 if (parser != NULL)
1747 parser->m_startCdataSectionHandler = start;
1748 }
1749
1750 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1751 XML_SetEndCdataSectionHandler(XML_Parser parser,
1752 XML_EndCdataSectionHandler end) {
1753 if (parser != NULL)
1754 parser->m_endCdataSectionHandler = end;
1755 }
1756
1757 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1758 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1759 if (parser == NULL)
1760 return;
1761 parser->m_defaultHandler = handler;
1762 parser->m_defaultExpandInternalEntities = XML_FALSE;
1763 }
1764
1765 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1766 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1767 if (parser == NULL)
1768 return;
1769 parser->m_defaultHandler = handler;
1770 parser->m_defaultExpandInternalEntities = XML_TRUE;
1771 }
1772
1773 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1774 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1775 XML_EndDoctypeDeclHandler end) {
1776 if (parser == NULL)
1777 return;
1778 parser->m_startDoctypeDeclHandler = start;
1779 parser->m_endDoctypeDeclHandler = end;
1780 }
1781
1782 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1783 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1784 XML_StartDoctypeDeclHandler start) {
1785 if (parser != NULL)
1786 parser->m_startDoctypeDeclHandler = start;
1787 }
1788
1789 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1790 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1791 if (parser != NULL)
1792 parser->m_endDoctypeDeclHandler = end;
1793 }
1794
1795 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1796 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1797 XML_UnparsedEntityDeclHandler handler) {
1798 if (parser != NULL)
1799 parser->m_unparsedEntityDeclHandler = handler;
1800 }
1801
1802 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1803 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1804 if (parser != NULL)
1805 parser->m_notationDeclHandler = handler;
1806 }
1807
1808 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1809 XML_SetNamespaceDeclHandler(XML_Parser parser,
1810 XML_StartNamespaceDeclHandler start,
1811 XML_EndNamespaceDeclHandler end) {
1812 if (parser == NULL)
1813 return;
1814 parser->m_startNamespaceDeclHandler = start;
1815 parser->m_endNamespaceDeclHandler = end;
1816 }
1817
1818 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1819 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1820 XML_StartNamespaceDeclHandler start) {
1821 if (parser != NULL)
1822 parser->m_startNamespaceDeclHandler = start;
1823 }
1824
1825 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1826 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1827 XML_EndNamespaceDeclHandler end) {
1828 if (parser != NULL)
1829 parser->m_endNamespaceDeclHandler = end;
1830 }
1831
1832 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1833 XML_SetNotStandaloneHandler(XML_Parser parser,
1834 XML_NotStandaloneHandler handler) {
1835 if (parser != NULL)
1836 parser->m_notStandaloneHandler = handler;
1837 }
1838
1839 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1840 XML_SetExternalEntityRefHandler(XML_Parser parser,
1841 XML_ExternalEntityRefHandler handler) {
1842 if (parser != NULL)
1843 parser->m_externalEntityRefHandler = handler;
1844 }
1845
1846 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1847 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1848 if (parser == NULL)
1849 return;
1850 if (arg)
1851 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1852 else
1853 parser->m_externalEntityRefHandlerArg = parser;
1854 }
1855
1856 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1857 XML_SetSkippedEntityHandler(XML_Parser parser,
1858 XML_SkippedEntityHandler handler) {
1859 if (parser != NULL)
1860 parser->m_skippedEntityHandler = handler;
1861 }
1862
1863 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1864 XML_SetUnknownEncodingHandler(XML_Parser parser,
1865 XML_UnknownEncodingHandler handler, void *data) {
1866 if (parser == NULL)
1867 return;
1868 parser->m_unknownEncodingHandler = handler;
1869 parser->m_unknownEncodingHandlerData = data;
1870 }
1871
1872 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1873 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1874 if (parser != NULL)
1875 parser->m_elementDeclHandler = eldecl;
1876 }
1877
1878 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1879 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1880 if (parser != NULL)
1881 parser->m_attlistDeclHandler = attdecl;
1882 }
1883
1884 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1885 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1886 if (parser != NULL)
1887 parser->m_entityDeclHandler = handler;
1888 }
1889
1890 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1891 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1892 if (parser != NULL)
1893 parser->m_xmlDeclHandler = handler;
1894 }
1895
1896 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1897 XML_SetParamEntityParsing(XML_Parser parser,
1898 enum XML_ParamEntityParsing peParsing) {
1899 if (parser == NULL)
1900 return 0;
1901 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1902 if (parser->m_parsingStatus.parsing == XML_PARSING
1903 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1904 return 0;
1905 #ifdef XML_DTD
1906 parser->m_paramEntityParsing = peParsing;
1907 return 1;
1908 #else
1909 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1910 #endif
1911 }
1912
1913 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1914 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1915 if (parser == NULL)
1916 return 0;
1917 if (parser->m_parentParser)
1918 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1919 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1920 if (parser->m_parsingStatus.parsing == XML_PARSING
1921 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1922 return 0;
1923 parser->m_hash_secret_salt = hash_salt;
1924 return 1;
1925 }
1926
1927 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1928 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1929 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1930 if (parser != NULL)
1931 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1932 return XML_STATUS_ERROR;
1933 }
1934 switch (parser->m_parsingStatus.parsing) {
1935 case XML_SUSPENDED:
1936 parser->m_errorCode = XML_ERROR_SUSPENDED;
1937 return XML_STATUS_ERROR;
1938 case XML_FINISHED:
1939 parser->m_errorCode = XML_ERROR_FINISHED;
1940 return XML_STATUS_ERROR;
1941 case XML_INITIALIZED:
1942 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1943 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1944 return XML_STATUS_ERROR;
1945 }
1946 /* fall through */
1947 default:
1948 parser->m_parsingStatus.parsing = XML_PARSING;
1949 }
1950
1951 #if XML_CONTEXT_BYTES == 0
1952 if (parser->m_bufferPtr == parser->m_bufferEnd) {
1953 const char *end;
1954 int nLeftOver;
1955 enum XML_Status result;
1956 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1957 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1958 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1959 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1960 parser->m_processor = errorProcessor;
1961 return XML_STATUS_ERROR;
1962 }
1963 // though this isn't a buffer request, we assume that `len` is the app's
1964 // preferred buffer fill size, and therefore save it here.
1965 parser->m_lastBufferRequestSize = len;
1966 parser->m_parseEndByteIndex += len;
1967 parser->m_positionPtr = s;
1968 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1969
1970 parser->m_errorCode
1971 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1972
1973 if (parser->m_errorCode != XML_ERROR_NONE) {
1974 parser->m_eventEndPtr = parser->m_eventPtr;
1975 parser->m_processor = errorProcessor;
1976 return XML_STATUS_ERROR;
1977 } else {
1978 switch (parser->m_parsingStatus.parsing) {
1979 case XML_SUSPENDED:
1980 result = XML_STATUS_SUSPENDED;
1981 break;
1982 case XML_INITIALIZED:
1983 case XML_PARSING:
1984 if (isFinal) {
1985 parser->m_parsingStatus.parsing = XML_FINISHED;
1986 return XML_STATUS_OK;
1987 }
1988 /* fall through */
1989 default:
1990 result = XML_STATUS_OK;
1991 }
1992 }
1993
1994 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1995 &parser->m_position);
1996 nLeftOver = s + len - end;
1997 if (nLeftOver) {
1998 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1999 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2000 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2001 parser->m_parsingStatus.parsing = XML_PARSING;
2002 void *const temp = XML_GetBuffer(parser, nLeftOver);
2003 parser->m_parsingStatus.parsing = originalStatus;
2004 // GetBuffer may have overwritten this, but we want to remember what the
2005 // app requested, not how many bytes were left over after parsing.
2006 parser->m_lastBufferRequestSize = len;
2007 if (temp == NULL) {
2008 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2009 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2010 parser->m_processor = errorProcessor;
2011 return XML_STATUS_ERROR;
2012 }
2013 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2014 // don't have any data to preserve, and can copy straight into the start
2015 // of the buffer rather than the GetBuffer return pointer (which may be
2016 // pointing further into the allocated buffer).
2017 memcpy(parser->m_buffer, end, nLeftOver);
2018 }
2019 parser->m_bufferPtr = parser->m_buffer;
2020 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2021 parser->m_positionPtr = parser->m_bufferPtr;
2022 parser->m_parseEndPtr = parser->m_bufferEnd;
2023 parser->m_eventPtr = parser->m_bufferPtr;
2024 parser->m_eventEndPtr = parser->m_bufferPtr;
2025 return result;
2026 }
2027 #endif /* XML_CONTEXT_BYTES == 0 */
2028 void *buff = XML_GetBuffer(parser, len);
2029 if (buff == NULL)
2030 return XML_STATUS_ERROR;
2031 if (len > 0) {
2032 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2033 memcpy(buff, s, len);
2034 }
2035 return XML_ParseBuffer(parser, len, isFinal);
2036 }
2037
2038 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2039 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2040 const char *start;
2041 enum XML_Status result = XML_STATUS_OK;
2042
2043 if (parser == NULL)
2044 return XML_STATUS_ERROR;
2045
2046 if (len < 0) {
2047 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2048 return XML_STATUS_ERROR;
2049 }
2050
2051 switch (parser->m_parsingStatus.parsing) {
2052 case XML_SUSPENDED:
2053 parser->m_errorCode = XML_ERROR_SUSPENDED;
2054 return XML_STATUS_ERROR;
2055 case XML_FINISHED:
2056 parser->m_errorCode = XML_ERROR_FINISHED;
2057 return XML_STATUS_ERROR;
2058 case XML_INITIALIZED:
2059 /* Has someone called XML_GetBuffer successfully before? */
2060 if (! parser->m_bufferPtr) {
2061 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2062 return XML_STATUS_ERROR;
2063 }
2064
2065 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2066 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2067 return XML_STATUS_ERROR;
2068 }
2069 /* fall through */
2070 default:
2071 parser->m_parsingStatus.parsing = XML_PARSING;
2072 }
2073
2074 start = parser->m_bufferPtr;
2075 parser->m_positionPtr = start;
2076 parser->m_bufferEnd += len;
2077 parser->m_parseEndPtr = parser->m_bufferEnd;
2078 parser->m_parseEndByteIndex += len;
2079 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2080
2081 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2082 &parser->m_bufferPtr);
2083
2084 if (parser->m_errorCode != XML_ERROR_NONE) {
2085 parser->m_eventEndPtr = parser->m_eventPtr;
2086 parser->m_processor = errorProcessor;
2087 return XML_STATUS_ERROR;
2088 } else {
2089 switch (parser->m_parsingStatus.parsing) {
2090 case XML_SUSPENDED:
2091 result = XML_STATUS_SUSPENDED;
2092 break;
2093 case XML_INITIALIZED:
2094 case XML_PARSING:
2095 if (isFinal) {
2096 parser->m_parsingStatus.parsing = XML_FINISHED;
2097 return result;
2098 }
2099 default:; /* should not happen */
2100 }
2101 }
2102
2103 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2104 parser->m_bufferPtr, &parser->m_position);
2105 parser->m_positionPtr = parser->m_bufferPtr;
2106 return result;
2107 }
2108
2109 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2110 XML_GetBuffer(XML_Parser parser, int len) {
2111 if (parser == NULL)
2112 return NULL;
2113 if (len < 0) {
2114 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2115 return NULL;
2116 }
2117 switch (parser->m_parsingStatus.parsing) {
2118 case XML_SUSPENDED:
2119 parser->m_errorCode = XML_ERROR_SUSPENDED;
2120 return NULL;
2121 case XML_FINISHED:
2122 parser->m_errorCode = XML_ERROR_FINISHED;
2123 return NULL;
2124 default:;
2125 }
2126
2127 // whether or not the request succeeds, `len` seems to be the app's preferred
2128 // buffer fill size; remember it.
2129 parser->m_lastBufferRequestSize = len;
2130 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2131 || parser->m_buffer == NULL) {
2132 #if XML_CONTEXT_BYTES > 0
2133 int keep;
2134 #endif /* XML_CONTEXT_BYTES > 0 */
2135 /* Do not invoke signed arithmetic overflow: */
2136 int neededSize = (int)((unsigned)len
2137 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2138 parser->m_bufferEnd, parser->m_bufferPtr));
2139 if (neededSize < 0) {
2140 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2141 return NULL;
2142 }
2143 #if XML_CONTEXT_BYTES > 0
2144 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2145 if (keep > XML_CONTEXT_BYTES)
2146 keep = XML_CONTEXT_BYTES;
2147 /* Detect and prevent integer overflow */
2148 if (keep > INT_MAX - neededSize) {
2149 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2150 return NULL;
2151 }
2152 neededSize += keep;
2153 #endif /* XML_CONTEXT_BYTES > 0 */
2154 if (parser->m_buffer && parser->m_bufferPtr
2155 && neededSize
2156 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2157 #if XML_CONTEXT_BYTES > 0
2158 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2159 int offset
2160 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2161 - keep;
2162 /* The buffer pointers cannot be NULL here; we have at least some bytes
2163 * in the buffer */
2164 memmove(parser->m_buffer, &parser->m_buffer[offset],
2165 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2166 parser->m_bufferEnd -= offset;
2167 parser->m_bufferPtr -= offset;
2168 }
2169 #else
2170 memmove(parser->m_buffer, parser->m_bufferPtr,
2171 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2172 parser->m_bufferEnd
2173 = parser->m_buffer
2174 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2175 parser->m_bufferPtr = parser->m_buffer;
2176 #endif /* XML_CONTEXT_BYTES > 0 */
2177 } else {
2178 char *newBuf;
2179 int bufferSize
2180 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2181 if (bufferSize == 0)
2182 bufferSize = INIT_BUFFER_SIZE;
2183 do {
2184 /* Do not invoke signed arithmetic overflow: */
2185 bufferSize = (int)(2U * (unsigned)bufferSize);
2186 } while (bufferSize < neededSize && bufferSize > 0);
2187 if (bufferSize <= 0) {
2188 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2189 return NULL;
2190 }
2191 newBuf = (char *)MALLOC(parser, bufferSize);
2192 if (newBuf == 0) {
2193 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2194 return NULL;
2195 }
2196 parser->m_bufferLim = newBuf + bufferSize;
2197 #if XML_CONTEXT_BYTES > 0
2198 if (parser->m_bufferPtr) {
2199 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2200 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2201 + keep);
2202 FREE(parser, parser->m_buffer);
2203 parser->m_buffer = newBuf;
2204 parser->m_bufferEnd
2205 = parser->m_buffer
2206 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2207 + keep;
2208 parser->m_bufferPtr = parser->m_buffer + keep;
2209 } else {
2210 /* This must be a brand new buffer with no data in it yet */
2211 parser->m_bufferEnd = newBuf;
2212 parser->m_bufferPtr = parser->m_buffer = newBuf;
2213 }
2214 #else
2215 if (parser->m_bufferPtr) {
2216 memcpy(newBuf, parser->m_bufferPtr,
2217 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2218 FREE(parser, parser->m_buffer);
2219 parser->m_bufferEnd
2220 = newBuf
2221 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2222 } else {
2223 /* This must be a brand new buffer with no data in it yet */
2224 parser->m_bufferEnd = newBuf;
2225 }
2226 parser->m_bufferPtr = parser->m_buffer = newBuf;
2227 #endif /* XML_CONTEXT_BYTES > 0 */
2228 }
2229 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2230 parser->m_positionPtr = NULL;
2231 }
2232 return parser->m_bufferEnd;
2233 }
2234
2235 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2236 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2237 if (parser == NULL)
2238 return XML_STATUS_ERROR;
2239 switch (parser->m_parsingStatus.parsing) {
2240 case XML_SUSPENDED:
2241 if (resumable) {
2242 parser->m_errorCode = XML_ERROR_SUSPENDED;
2243 return XML_STATUS_ERROR;
2244 }
2245 parser->m_parsingStatus.parsing = XML_FINISHED;
2246 break;
2247 case XML_FINISHED:
2248 parser->m_errorCode = XML_ERROR_FINISHED;
2249 return XML_STATUS_ERROR;
2250 default:
2251 if (resumable) {
2252 #ifdef XML_DTD
2253 if (parser->m_isParamEntity) {
2254 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2255 return XML_STATUS_ERROR;
2256 }
2257 #endif
2258 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2259 } else
2260 parser->m_parsingStatus.parsing = XML_FINISHED;
2261 }
2262 return XML_STATUS_OK;
2263 }
2264
2265 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2266 XML_ResumeParser(XML_Parser parser) {
2267 enum XML_Status result = XML_STATUS_OK;
2268
2269 if (parser == NULL)
2270 return XML_STATUS_ERROR;
2271 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2272 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2273 return XML_STATUS_ERROR;
2274 }
2275 parser->m_parsingStatus.parsing = XML_PARSING;
2276
2277 parser->m_errorCode = callProcessor(
2278 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2279
2280 if (parser->m_errorCode != XML_ERROR_NONE) {
2281 parser->m_eventEndPtr = parser->m_eventPtr;
2282 parser->m_processor = errorProcessor;
2283 return XML_STATUS_ERROR;
2284 } else {
2285 switch (parser->m_parsingStatus.parsing) {
2286 case XML_SUSPENDED:
2287 result = XML_STATUS_SUSPENDED;
2288 break;
2289 case XML_INITIALIZED:
2290 case XML_PARSING:
2291 if (parser->m_parsingStatus.finalBuffer) {
2292 parser->m_parsingStatus.parsing = XML_FINISHED;
2293 return result;
2294 }
2295 default:;
2296 }
2297 }
2298
2299 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2300 parser->m_bufferPtr, &parser->m_position);
2301 parser->m_positionPtr = parser->m_bufferPtr;
2302 return result;
2303 }
2304
2305 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2306 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2307 if (parser == NULL)
2308 return;
2309 assert(status != NULL);
2310 *status = parser->m_parsingStatus;
2311 }
2312
2313 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2314 XML_GetErrorCode(XML_Parser parser) {
2315 if (parser == NULL)
2316 return XML_ERROR_INVALID_ARGUMENT;
2317 return parser->m_errorCode;
2318 }
2319
2320 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2321 XML_GetCurrentByteIndex(XML_Parser parser) {
2322 if (parser == NULL)
2323 return -1;
2324 if (parser->m_eventPtr)
2325 return (XML_Index)(parser->m_parseEndByteIndex
2326 - (parser->m_parseEndPtr - parser->m_eventPtr));
2327 return -1;
2328 }
2329
2330 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2331 XML_GetCurrentByteCount(XML_Parser parser) {
2332 if (parser == NULL)
2333 return 0;
2334 if (parser->m_eventEndPtr && parser->m_eventPtr)
2335 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2336 return 0;
2337 }
2338
2339 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2340 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2341 #if XML_CONTEXT_BYTES > 0
2342 if (parser == NULL)
2343 return NULL;
2344 if (parser->m_eventPtr && parser->m_buffer) {
2345 if (offset != NULL)
2346 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2347 if (size != NULL)
2348 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2349 return parser->m_buffer;
2350 }
2351 #else
2352 (void)parser;
2353 (void)offset;
2354 (void)size;
2355 #endif /* XML_CONTEXT_BYTES > 0 */
2356 return (const char *)0;
2357 }
2358
2359 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2360 XML_GetCurrentLineNumber(XML_Parser parser) {
2361 if (parser == NULL)
2362 return 0;
2363 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2364 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2365 parser->m_eventPtr, &parser->m_position);
2366 parser->m_positionPtr = parser->m_eventPtr;
2367 }
2368 return parser->m_position.lineNumber + 1;
2369 }
2370
2371 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2372 XML_GetCurrentColumnNumber(XML_Parser parser) {
2373 if (parser == NULL)
2374 return 0;
2375 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2376 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2377 parser->m_eventPtr, &parser->m_position);
2378 parser->m_positionPtr = parser->m_eventPtr;
2379 }
2380 return parser->m_position.columnNumber;
2381 }
2382
2383 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2384 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2385 if (parser != NULL)
2386 FREE(parser, model);
2387 }
2388
2389 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2390 XML_MemMalloc(XML_Parser parser, size_t size) {
2391 if (parser == NULL)
2392 return NULL;
2393 return MALLOC(parser, size);
2394 }
2395
2396 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2397 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2398 if (parser == NULL)
2399 return NULL;
2400 return REALLOC(parser, ptr, size);
2401 }
2402
2403 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2404 XML_MemFree(XML_Parser parser, void *ptr) {
2405 if (parser != NULL)
2406 FREE(parser, ptr);
2407 }
2408
2409 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2410 XML_DefaultCurrent(XML_Parser parser) {
2411 if (parser == NULL)
2412 return;
2413 if (parser->m_defaultHandler) {
2414 if (parser->m_openInternalEntities)
2415 reportDefault(parser, parser->m_internalEncoding,
2416 parser->m_openInternalEntities->internalEventPtr,
2417 parser->m_openInternalEntities->internalEventEndPtr);
2418 else
2419 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2420 parser->m_eventEndPtr);
2421 }
2422 }
2423
2424 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2425 XML_ErrorString(enum XML_Error code) {
2426 switch (code) {
2427 case XML_ERROR_NONE:
2428 return NULL;
2429 case XML_ERROR_NO_MEMORY:
2430 return XML_L("out of memory");
2431 case XML_ERROR_SYNTAX:
2432 return XML_L("syntax error");
2433 case XML_ERROR_NO_ELEMENTS:
2434 return XML_L("no element found");
2435 case XML_ERROR_INVALID_TOKEN:
2436 return XML_L("not well-formed (invalid token)");
2437 case XML_ERROR_UNCLOSED_TOKEN:
2438 return XML_L("unclosed token");
2439 case XML_ERROR_PARTIAL_CHAR:
2440 return XML_L("partial character");
2441 case XML_ERROR_TAG_MISMATCH:
2442 return XML_L("mismatched tag");
2443 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2444 return XML_L("duplicate attribute");
2445 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2446 return XML_L("junk after document element");
2447 case XML_ERROR_PARAM_ENTITY_REF:
2448 return XML_L("illegal parameter entity reference");
2449 case XML_ERROR_UNDEFINED_ENTITY:
2450 return XML_L("undefined entity");
2451 case XML_ERROR_RECURSIVE_ENTITY_REF:
2452 return XML_L("recursive entity reference");
2453 case XML_ERROR_ASYNC_ENTITY:
2454 return XML_L("asynchronous entity");
2455 case XML_ERROR_BAD_CHAR_REF:
2456 return XML_L("reference to invalid character number");
2457 case XML_ERROR_BINARY_ENTITY_REF:
2458 return XML_L("reference to binary entity");
2459 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2460 return XML_L("reference to external entity in attribute");
2461 case XML_ERROR_MISPLACED_XML_PI:
2462 return XML_L("XML or text declaration not at start of entity");
2463 case XML_ERROR_UNKNOWN_ENCODING:
2464 return XML_L("unknown encoding");
2465 case XML_ERROR_INCORRECT_ENCODING:
2466 return XML_L("encoding specified in XML declaration is incorrect");
2467 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2468 return XML_L("unclosed CDATA section");
2469 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2470 return XML_L("error in processing external entity reference");
2471 case XML_ERROR_NOT_STANDALONE:
2472 return XML_L("document is not standalone");
2473 case XML_ERROR_UNEXPECTED_STATE:
2474 return XML_L("unexpected parser state - please send a bug report");
2475 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2476 return XML_L("entity declared in parameter entity");
2477 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2478 return XML_L("requested feature requires XML_DTD support in Expat");
2479 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2480 return XML_L("cannot change setting once parsing has begun");
2481 /* Added in 1.95.7. */
2482 case XML_ERROR_UNBOUND_PREFIX:
2483 return XML_L("unbound prefix");
2484 /* Added in 1.95.8. */
2485 case XML_ERROR_UNDECLARING_PREFIX:
2486 return XML_L("must not undeclare prefix");
2487 case XML_ERROR_INCOMPLETE_PE:
2488 return XML_L("incomplete markup in parameter entity");
2489 case XML_ERROR_XML_DECL:
2490 return XML_L("XML declaration not well-formed");
2491 case XML_ERROR_TEXT_DECL:
2492 return XML_L("text declaration not well-formed");
2493 case XML_ERROR_PUBLICID:
2494 return XML_L("illegal character(s) in public id");
2495 case XML_ERROR_SUSPENDED:
2496 return XML_L("parser suspended");
2497 case XML_ERROR_NOT_SUSPENDED:
2498 return XML_L("parser not suspended");
2499 case XML_ERROR_ABORTED:
2500 return XML_L("parsing aborted");
2501 case XML_ERROR_FINISHED:
2502 return XML_L("parsing finished");
2503 case XML_ERROR_SUSPEND_PE:
2504 return XML_L("cannot suspend in external parameter entity");
2505 /* Added in 2.0.0. */
2506 case XML_ERROR_RESERVED_PREFIX_XML:
2507 return XML_L(
2508 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2509 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2510 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2511 case XML_ERROR_RESERVED_NAMESPACE_URI:
2512 return XML_L(
2513 "prefix must not be bound to one of the reserved namespace names");
2514 /* Added in 2.2.5. */
2515 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2516 return XML_L("invalid argument");
2517 /* Added in 2.3.0. */
2518 case XML_ERROR_NO_BUFFER:
2519 return XML_L(
2520 "a successful prior call to function XML_GetBuffer is required");
2521 /* Added in 2.4.0. */
2522 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2523 return XML_L(
2524 "limit on input amplification factor (from DTD and entities) breached");
2525 }
2526 return NULL;
2527 }
2528
2529 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2530 XML_ExpatVersion(void) {
2531 /* V1 is used to string-ize the version number. However, it would
2532 string-ize the actual version macro *names* unless we get them
2533 substituted before being passed to V1. CPP is defined to expand
2534 a macro, then rescan for more expansions. Thus, we use V2 to expand
2535 the version macros, then CPP will expand the resulting V1() macro
2536 with the correct numerals. */
2537 /* ### I'm assuming cpp is portable in this respect... */
2538
2539 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2540 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2541
2542 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2543
2544 #undef V1
2545 #undef V2
2546 }
2547
2548 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2549 XML_ExpatVersionInfo(void) {
2550 XML_Expat_Version version;
2551
2552 version.major = XML_MAJOR_VERSION;
2553 version.minor = XML_MINOR_VERSION;
2554 version.micro = XML_MICRO_VERSION;
2555
2556 return version;
2557 }
2558
2559 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2560 XML_GetFeatureList(void) {
2561 static const XML_Feature features[] = {
2562 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2563 sizeof(XML_Char)},
2564 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2565 sizeof(XML_LChar)},
2566 #ifdef XML_UNICODE
2567 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2568 #endif
2569 #ifdef XML_UNICODE_WCHAR_T
2570 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2571 #endif
2572 #ifdef XML_DTD
2573 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2574 #endif
2575 #if XML_CONTEXT_BYTES > 0
2576 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2577 XML_CONTEXT_BYTES},
2578 #endif
2579 #ifdef XML_MIN_SIZE
2580 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2581 #endif
2582 #ifdef XML_NS
2583 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2584 #endif
2585 #ifdef XML_LARGE_SIZE
2586 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2587 #endif
2588 #ifdef XML_ATTR_INFO
2589 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2590 #endif
2591 #if XML_GE == 1
2592 /* Added in Expat 2.4.0 for XML_DTD defined and
2593 * added in Expat 2.6.0 for XML_GE == 1. */
2594 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2595 XML_L("XML_BLAP_MAX_AMP"),
2596 (long int)
2597 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2598 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2599 XML_L("XML_BLAP_ACT_THRES"),
2600 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2601 /* Added in Expat 2.6.0. */
2602 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2603 #endif
2604 {XML_FEATURE_END, NULL, 0}};
2605
2606 return features;
2607 }
2608
2609 #if XML_GE == 1
2610 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2611 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2612 XML_Parser parser, float maximumAmplificationFactor) {
2613 if ((parser == NULL) || (parser->m_parentParser != NULL)
2614 || isnan(maximumAmplificationFactor)
2615 || (maximumAmplificationFactor < 1.0f)) {
2616 return XML_FALSE;
2617 }
2618 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2619 return XML_TRUE;
2620 }
2621
2622 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2623 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2624 XML_Parser parser, unsigned long long activationThresholdBytes) {
2625 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2626 return XML_FALSE;
2627 }
2628 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2629 return XML_TRUE;
2630 }
2631 #endif /* XML_GE == 1 */
2632
2633 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2634 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2635 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2636 parser->m_reparseDeferralEnabled = enabled;
2637 return XML_TRUE;
2638 }
2639 return XML_FALSE;
2640 }
2641
2642 /* Initially tag->rawName always points into the parse buffer;
2643 for those TAG instances opened while the current parse buffer was
2644 processed, and not yet closed, we need to store tag->rawName in a more
2645 permanent location, since the parse buffer is about to be discarded.
2646 */
2647 static XML_Bool
storeRawNames(XML_Parser parser)2648 storeRawNames(XML_Parser parser) {
2649 TAG *tag = parser->m_tagStack;
2650 while (tag) {
2651 int bufSize;
2652 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2653 size_t rawNameLen;
2654 char *rawNameBuf = tag->buf + nameLen;
2655 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2656 at the first entry that has already been copied; everything
2657 below it in the stack is already been accounted for in a
2658 previous call to this function.
2659 */
2660 if (tag->rawName == rawNameBuf)
2661 break;
2662 /* For reuse purposes we need to ensure that the
2663 size of tag->buf is a multiple of sizeof(XML_Char).
2664 */
2665 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2666 /* Detect and prevent integer overflow. */
2667 if (rawNameLen > (size_t)INT_MAX - nameLen)
2668 return XML_FALSE;
2669 bufSize = nameLen + (int)rawNameLen;
2670 if (bufSize > tag->bufEnd - tag->buf) {
2671 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2672 if (temp == NULL)
2673 return XML_FALSE;
2674 /* if tag->name.str points to tag->buf (only when namespace
2675 processing is off) then we have to update it
2676 */
2677 if (tag->name.str == (XML_Char *)tag->buf)
2678 tag->name.str = (XML_Char *)temp;
2679 /* if tag->name.localPart is set (when namespace processing is on)
2680 then update it as well, since it will always point into tag->buf
2681 */
2682 if (tag->name.localPart)
2683 tag->name.localPart
2684 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2685 tag->buf = temp;
2686 tag->bufEnd = temp + bufSize;
2687 rawNameBuf = temp + nameLen;
2688 }
2689 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2690 tag->rawName = rawNameBuf;
2691 tag = tag->parent;
2692 }
2693 return XML_TRUE;
2694 }
2695
2696 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2697 contentProcessor(XML_Parser parser, const char *start, const char *end,
2698 const char **endPtr) {
2699 enum XML_Error result = doContent(
2700 parser, 0, parser->m_encoding, start, end, endPtr,
2701 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2702 if (result == XML_ERROR_NONE) {
2703 if (! storeRawNames(parser))
2704 return XML_ERROR_NO_MEMORY;
2705 }
2706 return result;
2707 }
2708
2709 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2710 externalEntityInitProcessor(XML_Parser parser, const char *start,
2711 const char *end, const char **endPtr) {
2712 enum XML_Error result = initializeEncoding(parser);
2713 if (result != XML_ERROR_NONE)
2714 return result;
2715 parser->m_processor = externalEntityInitProcessor2;
2716 return externalEntityInitProcessor2(parser, start, end, endPtr);
2717 }
2718
2719 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2720 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2721 const char *end, const char **endPtr) {
2722 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2723 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2724 switch (tok) {
2725 case XML_TOK_BOM:
2726 #if XML_GE == 1
2727 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2728 XML_ACCOUNT_DIRECT)) {
2729 accountingOnAbort(parser);
2730 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2731 }
2732 #endif /* XML_GE == 1 */
2733
2734 /* If we are at the end of the buffer, this would cause the next stage,
2735 i.e. externalEntityInitProcessor3, to pass control directly to
2736 doContent (by detecting XML_TOK_NONE) without processing any xml text
2737 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2738 */
2739 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2740 *endPtr = next;
2741 return XML_ERROR_NONE;
2742 }
2743 start = next;
2744 break;
2745 case XML_TOK_PARTIAL:
2746 if (! parser->m_parsingStatus.finalBuffer) {
2747 *endPtr = start;
2748 return XML_ERROR_NONE;
2749 }
2750 parser->m_eventPtr = start;
2751 return XML_ERROR_UNCLOSED_TOKEN;
2752 case XML_TOK_PARTIAL_CHAR:
2753 if (! parser->m_parsingStatus.finalBuffer) {
2754 *endPtr = start;
2755 return XML_ERROR_NONE;
2756 }
2757 parser->m_eventPtr = start;
2758 return XML_ERROR_PARTIAL_CHAR;
2759 }
2760 parser->m_processor = externalEntityInitProcessor3;
2761 return externalEntityInitProcessor3(parser, start, end, endPtr);
2762 }
2763
2764 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2765 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2766 const char *end, const char **endPtr) {
2767 int tok;
2768 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2769 parser->m_eventPtr = start;
2770 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2771 /* Note: These bytes are accounted later in:
2772 - processXmlDecl
2773 - externalEntityContentProcessor
2774 */
2775 parser->m_eventEndPtr = next;
2776
2777 switch (tok) {
2778 case XML_TOK_XML_DECL: {
2779 enum XML_Error result;
2780 result = processXmlDecl(parser, 1, start, next);
2781 if (result != XML_ERROR_NONE)
2782 return result;
2783 switch (parser->m_parsingStatus.parsing) {
2784 case XML_SUSPENDED:
2785 *endPtr = next;
2786 return XML_ERROR_NONE;
2787 case XML_FINISHED:
2788 return XML_ERROR_ABORTED;
2789 default:
2790 start = next;
2791 }
2792 } break;
2793 case XML_TOK_PARTIAL:
2794 if (! parser->m_parsingStatus.finalBuffer) {
2795 *endPtr = start;
2796 return XML_ERROR_NONE;
2797 }
2798 return XML_ERROR_UNCLOSED_TOKEN;
2799 case XML_TOK_PARTIAL_CHAR:
2800 if (! parser->m_parsingStatus.finalBuffer) {
2801 *endPtr = start;
2802 return XML_ERROR_NONE;
2803 }
2804 return XML_ERROR_PARTIAL_CHAR;
2805 }
2806 parser->m_processor = externalEntityContentProcessor;
2807 parser->m_tagLevel = 1;
2808 return externalEntityContentProcessor(parser, start, end, endPtr);
2809 }
2810
2811 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2812 externalEntityContentProcessor(XML_Parser parser, const char *start,
2813 const char *end, const char **endPtr) {
2814 enum XML_Error result
2815 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2816 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2817 XML_ACCOUNT_ENTITY_EXPANSION);
2818 if (result == XML_ERROR_NONE) {
2819 if (! storeRawNames(parser))
2820 return XML_ERROR_NO_MEMORY;
2821 }
2822 return result;
2823 }
2824
2825 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2826 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2827 const char *s, const char *end, const char **nextPtr,
2828 XML_Bool haveMore, enum XML_Account account) {
2829 /* save one level of indirection */
2830 DTD *const dtd = parser->m_dtd;
2831
2832 const char **eventPP;
2833 const char **eventEndPP;
2834 if (enc == parser->m_encoding) {
2835 eventPP = &parser->m_eventPtr;
2836 eventEndPP = &parser->m_eventEndPtr;
2837 } else {
2838 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2839 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2840 }
2841 *eventPP = s;
2842
2843 for (;;) {
2844 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2845 int tok = XmlContentTok(enc, s, end, &next);
2846 #if XML_GE == 1
2847 const char *accountAfter
2848 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2849 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2850 : next;
2851 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2852 account)) {
2853 accountingOnAbort(parser);
2854 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2855 }
2856 #endif
2857 *eventEndPP = next;
2858 switch (tok) {
2859 case XML_TOK_TRAILING_CR:
2860 if (haveMore) {
2861 *nextPtr = s;
2862 return XML_ERROR_NONE;
2863 }
2864 *eventEndPP = end;
2865 if (parser->m_characterDataHandler) {
2866 XML_Char c = 0xA;
2867 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2868 } else if (parser->m_defaultHandler)
2869 reportDefault(parser, enc, s, end);
2870 /* We are at the end of the final buffer, should we check for
2871 XML_SUSPENDED, XML_FINISHED?
2872 */
2873 if (startTagLevel == 0)
2874 return XML_ERROR_NO_ELEMENTS;
2875 if (parser->m_tagLevel != startTagLevel)
2876 return XML_ERROR_ASYNC_ENTITY;
2877 *nextPtr = end;
2878 return XML_ERROR_NONE;
2879 case XML_TOK_NONE:
2880 if (haveMore) {
2881 *nextPtr = s;
2882 return XML_ERROR_NONE;
2883 }
2884 if (startTagLevel > 0) {
2885 if (parser->m_tagLevel != startTagLevel)
2886 return XML_ERROR_ASYNC_ENTITY;
2887 *nextPtr = s;
2888 return XML_ERROR_NONE;
2889 }
2890 return XML_ERROR_NO_ELEMENTS;
2891 case XML_TOK_INVALID:
2892 *eventPP = next;
2893 return XML_ERROR_INVALID_TOKEN;
2894 case XML_TOK_PARTIAL:
2895 if (haveMore) {
2896 *nextPtr = s;
2897 return XML_ERROR_NONE;
2898 }
2899 return XML_ERROR_UNCLOSED_TOKEN;
2900 case XML_TOK_PARTIAL_CHAR:
2901 if (haveMore) {
2902 *nextPtr = s;
2903 return XML_ERROR_NONE;
2904 }
2905 return XML_ERROR_PARTIAL_CHAR;
2906 case XML_TOK_ENTITY_REF: {
2907 const XML_Char *name;
2908 ENTITY *entity;
2909 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2910 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2911 if (ch) {
2912 #if XML_GE == 1
2913 /* NOTE: We are replacing 4-6 characters original input for 1 character
2914 * so there is no amplification and hence recording without
2915 * protection. */
2916 accountingDiffTolerated(parser, tok, (char *)&ch,
2917 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2918 XML_ACCOUNT_ENTITY_EXPANSION);
2919 #endif /* XML_GE == 1 */
2920 if (parser->m_characterDataHandler)
2921 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2922 else if (parser->m_defaultHandler)
2923 reportDefault(parser, enc, s, next);
2924 break;
2925 }
2926 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2927 next - enc->minBytesPerChar);
2928 if (! name)
2929 return XML_ERROR_NO_MEMORY;
2930 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2931 poolDiscard(&dtd->pool);
2932 /* First, determine if a check for an existing declaration is needed;
2933 if yes, check that the entity exists, and that it is internal,
2934 otherwise call the skipped entity or default handler.
2935 */
2936 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2937 if (! entity)
2938 return XML_ERROR_UNDEFINED_ENTITY;
2939 else if (! entity->is_internal)
2940 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2941 } else if (! entity) {
2942 if (parser->m_skippedEntityHandler)
2943 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2944 else if (parser->m_defaultHandler)
2945 reportDefault(parser, enc, s, next);
2946 break;
2947 }
2948 if (entity->open)
2949 return XML_ERROR_RECURSIVE_ENTITY_REF;
2950 if (entity->notation)
2951 return XML_ERROR_BINARY_ENTITY_REF;
2952 if (entity->textPtr) {
2953 enum XML_Error result;
2954 if (! parser->m_defaultExpandInternalEntities) {
2955 if (parser->m_skippedEntityHandler)
2956 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2957 0);
2958 else if (parser->m_defaultHandler)
2959 reportDefault(parser, enc, s, next);
2960 break;
2961 }
2962 result = processInternalEntity(parser, entity, XML_FALSE);
2963 if (result != XML_ERROR_NONE)
2964 return result;
2965 } else if (parser->m_externalEntityRefHandler) {
2966 const XML_Char *context;
2967 entity->open = XML_TRUE;
2968 context = getContext(parser);
2969 entity->open = XML_FALSE;
2970 if (! context)
2971 return XML_ERROR_NO_MEMORY;
2972 if (! parser->m_externalEntityRefHandler(
2973 parser->m_externalEntityRefHandlerArg, context, entity->base,
2974 entity->systemId, entity->publicId))
2975 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2976 poolDiscard(&parser->m_tempPool);
2977 } else if (parser->m_defaultHandler)
2978 reportDefault(parser, enc, s, next);
2979 break;
2980 }
2981 case XML_TOK_START_TAG_NO_ATTS:
2982 /* fall through */
2983 case XML_TOK_START_TAG_WITH_ATTS: {
2984 TAG *tag;
2985 enum XML_Error result;
2986 XML_Char *toPtr;
2987 if (parser->m_freeTagList) {
2988 tag = parser->m_freeTagList;
2989 parser->m_freeTagList = parser->m_freeTagList->parent;
2990 } else {
2991 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2992 if (! tag)
2993 return XML_ERROR_NO_MEMORY;
2994 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2995 if (! tag->buf) {
2996 FREE(parser, tag);
2997 return XML_ERROR_NO_MEMORY;
2998 }
2999 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3000 }
3001 tag->bindings = NULL;
3002 tag->parent = parser->m_tagStack;
3003 parser->m_tagStack = tag;
3004 tag->name.localPart = NULL;
3005 tag->name.prefix = NULL;
3006 tag->rawName = s + enc->minBytesPerChar;
3007 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3008 ++parser->m_tagLevel;
3009 {
3010 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3011 const char *fromPtr = tag->rawName;
3012 toPtr = (XML_Char *)tag->buf;
3013 for (;;) {
3014 int bufSize;
3015 int convLen;
3016 const enum XML_Convert_Result convert_res
3017 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3018 (ICHAR *)tag->bufEnd - 1);
3019 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3020 if ((fromPtr >= rawNameEnd)
3021 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3022 tag->name.strLen = convLen;
3023 break;
3024 }
3025 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3026 {
3027 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3028 if (temp == NULL)
3029 return XML_ERROR_NO_MEMORY;
3030 tag->buf = temp;
3031 tag->bufEnd = temp + bufSize;
3032 toPtr = (XML_Char *)temp + convLen;
3033 }
3034 }
3035 }
3036 tag->name.str = (XML_Char *)tag->buf;
3037 *toPtr = XML_T('\0');
3038 result
3039 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3040 if (result)
3041 return result;
3042 if (parser->m_startElementHandler)
3043 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3044 (const XML_Char **)parser->m_atts);
3045 else if (parser->m_defaultHandler)
3046 reportDefault(parser, enc, s, next);
3047 poolClear(&parser->m_tempPool);
3048 break;
3049 }
3050 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3051 /* fall through */
3052 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3053 const char *rawName = s + enc->minBytesPerChar;
3054 enum XML_Error result;
3055 BINDING *bindings = NULL;
3056 XML_Bool noElmHandlers = XML_TRUE;
3057 TAG_NAME name;
3058 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3059 rawName + XmlNameLength(enc, rawName));
3060 if (! name.str)
3061 return XML_ERROR_NO_MEMORY;
3062 poolFinish(&parser->m_tempPool);
3063 result = storeAtts(parser, enc, s, &name, &bindings,
3064 XML_ACCOUNT_NONE /* token spans whole start tag */);
3065 if (result != XML_ERROR_NONE) {
3066 freeBindings(parser, bindings);
3067 return result;
3068 }
3069 poolFinish(&parser->m_tempPool);
3070 if (parser->m_startElementHandler) {
3071 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3072 (const XML_Char **)parser->m_atts);
3073 noElmHandlers = XML_FALSE;
3074 }
3075 if (parser->m_endElementHandler) {
3076 if (parser->m_startElementHandler)
3077 *eventPP = *eventEndPP;
3078 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3079 noElmHandlers = XML_FALSE;
3080 }
3081 if (noElmHandlers && parser->m_defaultHandler)
3082 reportDefault(parser, enc, s, next);
3083 poolClear(&parser->m_tempPool);
3084 freeBindings(parser, bindings);
3085 }
3086 if ((parser->m_tagLevel == 0)
3087 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3088 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3089 parser->m_processor = epilogProcessor;
3090 else
3091 return epilogProcessor(parser, next, end, nextPtr);
3092 }
3093 break;
3094 case XML_TOK_END_TAG:
3095 if (parser->m_tagLevel == startTagLevel)
3096 return XML_ERROR_ASYNC_ENTITY;
3097 else {
3098 int len;
3099 const char *rawName;
3100 TAG *tag = parser->m_tagStack;
3101 rawName = s + enc->minBytesPerChar * 2;
3102 len = XmlNameLength(enc, rawName);
3103 if (len != tag->rawNameLength
3104 || memcmp(tag->rawName, rawName, len) != 0) {
3105 *eventPP = rawName;
3106 return XML_ERROR_TAG_MISMATCH;
3107 }
3108 parser->m_tagStack = tag->parent;
3109 tag->parent = parser->m_freeTagList;
3110 parser->m_freeTagList = tag;
3111 --parser->m_tagLevel;
3112 if (parser->m_endElementHandler) {
3113 const XML_Char *localPart;
3114 const XML_Char *prefix;
3115 XML_Char *uri;
3116 localPart = tag->name.localPart;
3117 if (parser->m_ns && localPart) {
3118 /* localPart and prefix may have been overwritten in
3119 tag->name.str, since this points to the binding->uri
3120 buffer which gets reused; so we have to add them again
3121 */
3122 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3123 /* don't need to check for space - already done in storeAtts() */
3124 while (*localPart)
3125 *uri++ = *localPart++;
3126 prefix = tag->name.prefix;
3127 if (parser->m_ns_triplets && prefix) {
3128 *uri++ = parser->m_namespaceSeparator;
3129 while (*prefix)
3130 *uri++ = *prefix++;
3131 }
3132 *uri = XML_T('\0');
3133 }
3134 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3135 } else if (parser->m_defaultHandler)
3136 reportDefault(parser, enc, s, next);
3137 while (tag->bindings) {
3138 BINDING *b = tag->bindings;
3139 if (parser->m_endNamespaceDeclHandler)
3140 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3141 b->prefix->name);
3142 tag->bindings = tag->bindings->nextTagBinding;
3143 b->nextTagBinding = parser->m_freeBindingList;
3144 parser->m_freeBindingList = b;
3145 b->prefix->binding = b->prevPrefixBinding;
3146 }
3147 if ((parser->m_tagLevel == 0)
3148 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3149 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3150 parser->m_processor = epilogProcessor;
3151 else
3152 return epilogProcessor(parser, next, end, nextPtr);
3153 }
3154 }
3155 break;
3156 case XML_TOK_CHAR_REF: {
3157 int n = XmlCharRefNumber(enc, s);
3158 if (n < 0)
3159 return XML_ERROR_BAD_CHAR_REF;
3160 if (parser->m_characterDataHandler) {
3161 XML_Char buf[XML_ENCODE_MAX];
3162 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3163 XmlEncode(n, (ICHAR *)buf));
3164 } else if (parser->m_defaultHandler)
3165 reportDefault(parser, enc, s, next);
3166 } break;
3167 case XML_TOK_XML_DECL:
3168 return XML_ERROR_MISPLACED_XML_PI;
3169 case XML_TOK_DATA_NEWLINE:
3170 if (parser->m_characterDataHandler) {
3171 XML_Char c = 0xA;
3172 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3173 } else if (parser->m_defaultHandler)
3174 reportDefault(parser, enc, s, next);
3175 break;
3176 case XML_TOK_CDATA_SECT_OPEN: {
3177 enum XML_Error result;
3178 if (parser->m_startCdataSectionHandler)
3179 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3180 /* BEGIN disabled code */
3181 /* Suppose you doing a transformation on a document that involves
3182 changing only the character data. You set up a defaultHandler
3183 and a characterDataHandler. The defaultHandler simply copies
3184 characters through. The characterDataHandler does the
3185 transformation and writes the characters out escaping them as
3186 necessary. This case will fail to work if we leave out the
3187 following two lines (because & and < inside CDATA sections will
3188 be incorrectly escaped).
3189
3190 However, now we have a start/endCdataSectionHandler, so it seems
3191 easier to let the user deal with this.
3192 */
3193 else if ((0) && parser->m_characterDataHandler)
3194 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3195 0);
3196 /* END disabled code */
3197 else if (parser->m_defaultHandler)
3198 reportDefault(parser, enc, s, next);
3199 result
3200 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3201 if (result != XML_ERROR_NONE)
3202 return result;
3203 else if (! next) {
3204 parser->m_processor = cdataSectionProcessor;
3205 return result;
3206 }
3207 } break;
3208 case XML_TOK_TRAILING_RSQB:
3209 if (haveMore) {
3210 *nextPtr = s;
3211 return XML_ERROR_NONE;
3212 }
3213 if (parser->m_characterDataHandler) {
3214 if (MUST_CONVERT(enc, s)) {
3215 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3216 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3217 parser->m_characterDataHandler(
3218 parser->m_handlerArg, parser->m_dataBuf,
3219 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3220 } else
3221 parser->m_characterDataHandler(
3222 parser->m_handlerArg, (const XML_Char *)s,
3223 (int)((const XML_Char *)end - (const XML_Char *)s));
3224 } else if (parser->m_defaultHandler)
3225 reportDefault(parser, enc, s, end);
3226 /* We are at the end of the final buffer, should we check for
3227 XML_SUSPENDED, XML_FINISHED?
3228 */
3229 if (startTagLevel == 0) {
3230 *eventPP = end;
3231 return XML_ERROR_NO_ELEMENTS;
3232 }
3233 if (parser->m_tagLevel != startTagLevel) {
3234 *eventPP = end;
3235 return XML_ERROR_ASYNC_ENTITY;
3236 }
3237 *nextPtr = end;
3238 return XML_ERROR_NONE;
3239 case XML_TOK_DATA_CHARS: {
3240 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3241 if (charDataHandler) {
3242 if (MUST_CONVERT(enc, s)) {
3243 for (;;) {
3244 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3245 const enum XML_Convert_Result convert_res = XmlConvert(
3246 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3247 *eventEndPP = s;
3248 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3249 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3250 if ((convert_res == XML_CONVERT_COMPLETED)
3251 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3252 break;
3253 *eventPP = s;
3254 }
3255 } else
3256 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3257 (int)((const XML_Char *)next - (const XML_Char *)s));
3258 } else if (parser->m_defaultHandler)
3259 reportDefault(parser, enc, s, next);
3260 } break;
3261 case XML_TOK_PI:
3262 if (! reportProcessingInstruction(parser, enc, s, next))
3263 return XML_ERROR_NO_MEMORY;
3264 break;
3265 case XML_TOK_COMMENT:
3266 if (! reportComment(parser, enc, s, next))
3267 return XML_ERROR_NO_MEMORY;
3268 break;
3269 default:
3270 /* All of the tokens produced by XmlContentTok() have their own
3271 * explicit cases, so this default is not strictly necessary.
3272 * However it is a useful safety net, so we retain the code and
3273 * simply exclude it from the coverage tests.
3274 *
3275 * LCOV_EXCL_START
3276 */
3277 if (parser->m_defaultHandler)
3278 reportDefault(parser, enc, s, next);
3279 break;
3280 /* LCOV_EXCL_STOP */
3281 }
3282 *eventPP = s = next;
3283 switch (parser->m_parsingStatus.parsing) {
3284 case XML_SUSPENDED:
3285 *nextPtr = next;
3286 return XML_ERROR_NONE;
3287 case XML_FINISHED:
3288 return XML_ERROR_ABORTED;
3289 default:;
3290 }
3291 }
3292 /* not reached */
3293 }
3294
3295 /* This function does not call free() on the allocated memory, merely
3296 * moving it to the parser's m_freeBindingList where it can be freed or
3297 * reused as appropriate.
3298 */
3299 static void
freeBindings(XML_Parser parser,BINDING * bindings)3300 freeBindings(XML_Parser parser, BINDING *bindings) {
3301 while (bindings) {
3302 BINDING *b = bindings;
3303
3304 /* m_startNamespaceDeclHandler will have been called for this
3305 * binding in addBindings(), so call the end handler now.
3306 */
3307 if (parser->m_endNamespaceDeclHandler)
3308 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3309
3310 bindings = bindings->nextTagBinding;
3311 b->nextTagBinding = parser->m_freeBindingList;
3312 parser->m_freeBindingList = b;
3313 b->prefix->binding = b->prevPrefixBinding;
3314 }
3315 }
3316
3317 /* Precondition: all arguments must be non-NULL;
3318 Purpose:
3319 - normalize attributes
3320 - check attributes for well-formedness
3321 - generate namespace aware attribute names (URI, prefix)
3322 - build list of attributes for startElementHandler
3323 - default attributes
3324 - process namespace declarations (check and report them)
3325 - generate namespace aware element name (URI, prefix)
3326 */
3327 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3328 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3329 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3330 enum XML_Account account) {
3331 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3332 ELEMENT_TYPE *elementType;
3333 int nDefaultAtts;
3334 const XML_Char **appAtts; /* the attribute list for the application */
3335 int attIndex = 0;
3336 int prefixLen;
3337 int i;
3338 int n;
3339 XML_Char *uri;
3340 int nPrefixes = 0;
3341 BINDING *binding;
3342 const XML_Char *localPart;
3343
3344 /* lookup the element type name */
3345 elementType
3346 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3347 if (! elementType) {
3348 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3349 if (! name)
3350 return XML_ERROR_NO_MEMORY;
3351 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3352 sizeof(ELEMENT_TYPE));
3353 if (! elementType)
3354 return XML_ERROR_NO_MEMORY;
3355 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3356 return XML_ERROR_NO_MEMORY;
3357 }
3358 nDefaultAtts = elementType->nDefaultAtts;
3359
3360 /* get the attributes from the tokenizer */
3361 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3362
3363 /* Detect and prevent integer overflow */
3364 if (n > INT_MAX - nDefaultAtts) {
3365 return XML_ERROR_NO_MEMORY;
3366 }
3367
3368 if (n + nDefaultAtts > parser->m_attsSize) {
3369 int oldAttsSize = parser->m_attsSize;
3370 ATTRIBUTE *temp;
3371 #ifdef XML_ATTR_INFO
3372 XML_AttrInfo *temp2;
3373 #endif
3374
3375 /* Detect and prevent integer overflow */
3376 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3377 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3378 return XML_ERROR_NO_MEMORY;
3379 }
3380
3381 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3382
3383 /* Detect and prevent integer overflow.
3384 * The preprocessor guard addresses the "always false" warning
3385 * from -Wtype-limits on platforms where
3386 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3387 #if UINT_MAX >= SIZE_MAX
3388 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3389 parser->m_attsSize = oldAttsSize;
3390 return XML_ERROR_NO_MEMORY;
3391 }
3392 #endif
3393
3394 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3395 parser->m_attsSize * sizeof(ATTRIBUTE));
3396 if (temp == NULL) {
3397 parser->m_attsSize = oldAttsSize;
3398 return XML_ERROR_NO_MEMORY;
3399 }
3400 parser->m_atts = temp;
3401 #ifdef XML_ATTR_INFO
3402 /* Detect and prevent integer overflow.
3403 * The preprocessor guard addresses the "always false" warning
3404 * from -Wtype-limits on platforms where
3405 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3406 # if UINT_MAX >= SIZE_MAX
3407 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3408 parser->m_attsSize = oldAttsSize;
3409 return XML_ERROR_NO_MEMORY;
3410 }
3411 # endif
3412
3413 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3414 parser->m_attsSize * sizeof(XML_AttrInfo));
3415 if (temp2 == NULL) {
3416 parser->m_attsSize = oldAttsSize;
3417 return XML_ERROR_NO_MEMORY;
3418 }
3419 parser->m_attInfo = temp2;
3420 #endif
3421 if (n > oldAttsSize)
3422 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3423 }
3424
3425 appAtts = (const XML_Char **)parser->m_atts;
3426 for (i = 0; i < n; i++) {
3427 ATTRIBUTE *currAtt = &parser->m_atts[i];
3428 #ifdef XML_ATTR_INFO
3429 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3430 #endif
3431 /* add the name and value to the attribute list */
3432 ATTRIBUTE_ID *attId
3433 = getAttributeId(parser, enc, currAtt->name,
3434 currAtt->name + XmlNameLength(enc, currAtt->name));
3435 if (! attId)
3436 return XML_ERROR_NO_MEMORY;
3437 #ifdef XML_ATTR_INFO
3438 currAttInfo->nameStart
3439 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3440 currAttInfo->nameEnd
3441 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3442 currAttInfo->valueStart = parser->m_parseEndByteIndex
3443 - (parser->m_parseEndPtr - currAtt->valuePtr);
3444 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3445 - (parser->m_parseEndPtr - currAtt->valueEnd);
3446 #endif
3447 /* Detect duplicate attributes by their QNames. This does not work when
3448 namespace processing is turned on and different prefixes for the same
3449 namespace are used. For this case we have a check further down.
3450 */
3451 if ((attId->name)[-1]) {
3452 if (enc == parser->m_encoding)
3453 parser->m_eventPtr = parser->m_atts[i].name;
3454 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3455 }
3456 (attId->name)[-1] = 1;
3457 appAtts[attIndex++] = attId->name;
3458 if (! parser->m_atts[i].normalized) {
3459 enum XML_Error result;
3460 XML_Bool isCdata = XML_TRUE;
3461
3462 /* figure out whether declared as other than CDATA */
3463 if (attId->maybeTokenized) {
3464 int j;
3465 for (j = 0; j < nDefaultAtts; j++) {
3466 if (attId == elementType->defaultAtts[j].id) {
3467 isCdata = elementType->defaultAtts[j].isCdata;
3468 break;
3469 }
3470 }
3471 }
3472
3473 /* normalize the attribute value */
3474 result = storeAttributeValue(
3475 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3476 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3477 if (result)
3478 return result;
3479 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3480 poolFinish(&parser->m_tempPool);
3481 } else {
3482 /* the value did not need normalizing */
3483 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3484 parser->m_atts[i].valuePtr,
3485 parser->m_atts[i].valueEnd);
3486 if (appAtts[attIndex] == 0)
3487 return XML_ERROR_NO_MEMORY;
3488 poolFinish(&parser->m_tempPool);
3489 }
3490 /* handle prefixed attribute names */
3491 if (attId->prefix) {
3492 if (attId->xmlns) {
3493 /* deal with namespace declarations here */
3494 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3495 appAtts[attIndex], bindingsPtr);
3496 if (result)
3497 return result;
3498 --attIndex;
3499 } else {
3500 /* deal with other prefixed names later */
3501 attIndex++;
3502 nPrefixes++;
3503 (attId->name)[-1] = 2;
3504 }
3505 } else
3506 attIndex++;
3507 }
3508
3509 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3510 parser->m_nSpecifiedAtts = attIndex;
3511 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3512 for (i = 0; i < attIndex; i += 2)
3513 if (appAtts[i] == elementType->idAtt->name) {
3514 parser->m_idAttIndex = i;
3515 break;
3516 }
3517 } else
3518 parser->m_idAttIndex = -1;
3519
3520 /* do attribute defaulting */
3521 for (i = 0; i < nDefaultAtts; i++) {
3522 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3523 if (! (da->id->name)[-1] && da->value) {
3524 if (da->id->prefix) {
3525 if (da->id->xmlns) {
3526 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3527 da->value, bindingsPtr);
3528 if (result)
3529 return result;
3530 } else {
3531 (da->id->name)[-1] = 2;
3532 nPrefixes++;
3533 appAtts[attIndex++] = da->id->name;
3534 appAtts[attIndex++] = da->value;
3535 }
3536 } else {
3537 (da->id->name)[-1] = 1;
3538 appAtts[attIndex++] = da->id->name;
3539 appAtts[attIndex++] = da->value;
3540 }
3541 }
3542 }
3543 appAtts[attIndex] = 0;
3544
3545 /* expand prefixed attribute names, check for duplicates,
3546 and clear flags that say whether attributes were specified */
3547 i = 0;
3548 if (nPrefixes) {
3549 int j; /* hash table index */
3550 unsigned long version = parser->m_nsAttsVersion;
3551
3552 /* Detect and prevent invalid shift */
3553 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3554 return XML_ERROR_NO_MEMORY;
3555 }
3556
3557 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3558 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3559 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3560 if ((nPrefixes << 1)
3561 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3562 NS_ATT *temp;
3563 /* hash table size must also be a power of 2 and >= 8 */
3564 while (nPrefixes >> parser->m_nsAttsPower++)
3565 ;
3566 if (parser->m_nsAttsPower < 3)
3567 parser->m_nsAttsPower = 3;
3568
3569 /* Detect and prevent invalid shift */
3570 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3571 /* Restore actual size of memory in m_nsAtts */
3572 parser->m_nsAttsPower = oldNsAttsPower;
3573 return XML_ERROR_NO_MEMORY;
3574 }
3575
3576 nsAttsSize = 1u << parser->m_nsAttsPower;
3577
3578 /* Detect and prevent integer overflow.
3579 * The preprocessor guard addresses the "always false" warning
3580 * from -Wtype-limits on platforms where
3581 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3582 #if UINT_MAX >= SIZE_MAX
3583 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3584 /* Restore actual size of memory in m_nsAtts */
3585 parser->m_nsAttsPower = oldNsAttsPower;
3586 return XML_ERROR_NO_MEMORY;
3587 }
3588 #endif
3589
3590 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3591 nsAttsSize * sizeof(NS_ATT));
3592 if (! temp) {
3593 /* Restore actual size of memory in m_nsAtts */
3594 parser->m_nsAttsPower = oldNsAttsPower;
3595 return XML_ERROR_NO_MEMORY;
3596 }
3597 parser->m_nsAtts = temp;
3598 version = 0; /* force re-initialization of m_nsAtts hash table */
3599 }
3600 /* using a version flag saves us from initializing m_nsAtts every time */
3601 if (! version) { /* initialize version flags when version wraps around */
3602 version = INIT_ATTS_VERSION;
3603 for (j = nsAttsSize; j != 0;)
3604 parser->m_nsAtts[--j].version = version;
3605 }
3606 parser->m_nsAttsVersion = --version;
3607
3608 /* expand prefixed names and check for duplicates */
3609 for (; i < attIndex; i += 2) {
3610 const XML_Char *s = appAtts[i];
3611 if (s[-1] == 2) { /* prefixed */
3612 ATTRIBUTE_ID *id;
3613 const BINDING *b;
3614 unsigned long uriHash;
3615 struct siphash sip_state;
3616 struct sipkey sip_key;
3617
3618 copy_salt_to_sipkey(parser, &sip_key);
3619 sip24_init(&sip_state, &sip_key);
3620
3621 ((XML_Char *)s)[-1] = 0; /* clear flag */
3622 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3623 if (! id || ! id->prefix) {
3624 /* This code is walking through the appAtts array, dealing
3625 * with (in this case) a prefixed attribute name. To be in
3626 * the array, the attribute must have already been bound, so
3627 * has to have passed through the hash table lookup once
3628 * already. That implies that an entry for it already
3629 * exists, so the lookup above will return a pointer to
3630 * already allocated memory. There is no opportunaity for
3631 * the allocator to fail, so the condition above cannot be
3632 * fulfilled.
3633 *
3634 * Since it is difficult to be certain that the above
3635 * analysis is complete, we retain the test and merely
3636 * remove the code from coverage tests.
3637 */
3638 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3639 }
3640 b = id->prefix->binding;
3641 if (! b)
3642 return XML_ERROR_UNBOUND_PREFIX;
3643
3644 for (j = 0; j < b->uriLen; j++) {
3645 const XML_Char c = b->uri[j];
3646 if (! poolAppendChar(&parser->m_tempPool, c))
3647 return XML_ERROR_NO_MEMORY;
3648 }
3649
3650 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3651
3652 while (*s++ != XML_T(ASCII_COLON))
3653 ;
3654
3655 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3656
3657 do { /* copies null terminator */
3658 if (! poolAppendChar(&parser->m_tempPool, *s))
3659 return XML_ERROR_NO_MEMORY;
3660 } while (*s++);
3661
3662 uriHash = (unsigned long)sip24_final(&sip_state);
3663
3664 { /* Check hash table for duplicate of expanded name (uriName).
3665 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3666 */
3667 unsigned char step = 0;
3668 unsigned long mask = nsAttsSize - 1;
3669 j = uriHash & mask; /* index into hash table */
3670 while (parser->m_nsAtts[j].version == version) {
3671 /* for speed we compare stored hash values first */
3672 if (uriHash == parser->m_nsAtts[j].hash) {
3673 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3674 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3675 /* s1 is null terminated, but not s2 */
3676 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3677 ;
3678 if (*s1 == 0)
3679 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3680 }
3681 if (! step)
3682 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3683 j < step ? (j += nsAttsSize - step) : (j -= step);
3684 }
3685 }
3686
3687 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3688 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3689 s = b->prefix->name;
3690 do {
3691 if (! poolAppendChar(&parser->m_tempPool, *s))
3692 return XML_ERROR_NO_MEMORY;
3693 } while (*s++);
3694 }
3695
3696 /* store expanded name in attribute list */
3697 s = poolStart(&parser->m_tempPool);
3698 poolFinish(&parser->m_tempPool);
3699 appAtts[i] = s;
3700
3701 /* fill empty slot with new version, uriName and hash value */
3702 parser->m_nsAtts[j].version = version;
3703 parser->m_nsAtts[j].hash = uriHash;
3704 parser->m_nsAtts[j].uriName = s;
3705
3706 if (! --nPrefixes) {
3707 i += 2;
3708 break;
3709 }
3710 } else /* not prefixed */
3711 ((XML_Char *)s)[-1] = 0; /* clear flag */
3712 }
3713 }
3714 /* clear flags for the remaining attributes */
3715 for (; i < attIndex; i += 2)
3716 ((XML_Char *)(appAtts[i]))[-1] = 0;
3717 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3718 binding->attId->name[-1] = 0;
3719
3720 if (! parser->m_ns)
3721 return XML_ERROR_NONE;
3722
3723 /* expand the element type name */
3724 if (elementType->prefix) {
3725 binding = elementType->prefix->binding;
3726 if (! binding)
3727 return XML_ERROR_UNBOUND_PREFIX;
3728 localPart = tagNamePtr->str;
3729 while (*localPart++ != XML_T(ASCII_COLON))
3730 ;
3731 } else if (dtd->defaultPrefix.binding) {
3732 binding = dtd->defaultPrefix.binding;
3733 localPart = tagNamePtr->str;
3734 } else
3735 return XML_ERROR_NONE;
3736 prefixLen = 0;
3737 if (parser->m_ns_triplets && binding->prefix->name) {
3738 for (; binding->prefix->name[prefixLen++];)
3739 ; /* prefixLen includes null terminator */
3740 }
3741 tagNamePtr->localPart = localPart;
3742 tagNamePtr->uriLen = binding->uriLen;
3743 tagNamePtr->prefix = binding->prefix->name;
3744 tagNamePtr->prefixLen = prefixLen;
3745 for (i = 0; localPart[i++];)
3746 ; /* i includes null terminator */
3747
3748 /* Detect and prevent integer overflow */
3749 if (binding->uriLen > INT_MAX - prefixLen
3750 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3751 return XML_ERROR_NO_MEMORY;
3752 }
3753
3754 n = i + binding->uriLen + prefixLen;
3755 if (n > binding->uriAlloc) {
3756 TAG *p;
3757
3758 /* Detect and prevent integer overflow */
3759 if (n > INT_MAX - EXPAND_SPARE) {
3760 return XML_ERROR_NO_MEMORY;
3761 }
3762 /* Detect and prevent integer overflow.
3763 * The preprocessor guard addresses the "always false" warning
3764 * from -Wtype-limits on platforms where
3765 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3766 #if UINT_MAX >= SIZE_MAX
3767 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3768 return XML_ERROR_NO_MEMORY;
3769 }
3770 #endif
3771
3772 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3773 if (! uri)
3774 return XML_ERROR_NO_MEMORY;
3775 binding->uriAlloc = n + EXPAND_SPARE;
3776 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3777 for (p = parser->m_tagStack; p; p = p->parent)
3778 if (p->name.str == binding->uri)
3779 p->name.str = uri;
3780 FREE(parser, binding->uri);
3781 binding->uri = uri;
3782 }
3783 /* if m_namespaceSeparator != '\0' then uri includes it already */
3784 uri = binding->uri + binding->uriLen;
3785 memcpy(uri, localPart, i * sizeof(XML_Char));
3786 /* we always have a namespace separator between localPart and prefix */
3787 if (prefixLen) {
3788 uri += i - 1;
3789 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3790 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3791 }
3792 tagNamePtr->str = binding->uri;
3793 return XML_ERROR_NONE;
3794 }
3795
3796 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3797 is_rfc3986_uri_char(XML_Char candidate) {
3798 // For the RFC 3986 ANBF grammar see
3799 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3800
3801 switch (candidate) {
3802 // From rule "ALPHA" (uppercase half)
3803 case 'A':
3804 case 'B':
3805 case 'C':
3806 case 'D':
3807 case 'E':
3808 case 'F':
3809 case 'G':
3810 case 'H':
3811 case 'I':
3812 case 'J':
3813 case 'K':
3814 case 'L':
3815 case 'M':
3816 case 'N':
3817 case 'O':
3818 case 'P':
3819 case 'Q':
3820 case 'R':
3821 case 'S':
3822 case 'T':
3823 case 'U':
3824 case 'V':
3825 case 'W':
3826 case 'X':
3827 case 'Y':
3828 case 'Z':
3829
3830 // From rule "ALPHA" (lowercase half)
3831 case 'a':
3832 case 'b':
3833 case 'c':
3834 case 'd':
3835 case 'e':
3836 case 'f':
3837 case 'g':
3838 case 'h':
3839 case 'i':
3840 case 'j':
3841 case 'k':
3842 case 'l':
3843 case 'm':
3844 case 'n':
3845 case 'o':
3846 case 'p':
3847 case 'q':
3848 case 'r':
3849 case 's':
3850 case 't':
3851 case 'u':
3852 case 'v':
3853 case 'w':
3854 case 'x':
3855 case 'y':
3856 case 'z':
3857
3858 // From rule "DIGIT"
3859 case '0':
3860 case '1':
3861 case '2':
3862 case '3':
3863 case '4':
3864 case '5':
3865 case '6':
3866 case '7':
3867 case '8':
3868 case '9':
3869
3870 // From rule "pct-encoded"
3871 case '%':
3872
3873 // From rule "unreserved"
3874 case '-':
3875 case '.':
3876 case '_':
3877 case '~':
3878
3879 // From rule "gen-delims"
3880 case ':':
3881 case '/':
3882 case '?':
3883 case '#':
3884 case '[':
3885 case ']':
3886 case '@':
3887
3888 // From rule "sub-delims"
3889 case '!':
3890 case '$':
3891 case '&':
3892 case '\'':
3893 case '(':
3894 case ')':
3895 case '*':
3896 case '+':
3897 case ',':
3898 case ';':
3899 case '=':
3900 return XML_TRUE;
3901
3902 default:
3903 return XML_FALSE;
3904 }
3905 }
3906
3907 /* addBinding() overwrites the value of prefix->binding without checking.
3908 Therefore one must keep track of the old value outside of addBinding().
3909 */
3910 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3911 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3912 const XML_Char *uri, BINDING **bindingsPtr) {
3913 // "http://www.w3.org/XML/1998/namespace"
3914 static const XML_Char xmlNamespace[]
3915 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3916 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3917 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3918 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3919 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3920 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3921 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3922 ASCII_e, '\0'};
3923 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3924 // "http://www.w3.org/2000/xmlns/"
3925 static const XML_Char xmlnsNamespace[]
3926 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3927 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3928 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3929 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3930 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3931 static const int xmlnsLen
3932 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3933
3934 XML_Bool mustBeXML = XML_FALSE;
3935 XML_Bool isXML = XML_TRUE;
3936 XML_Bool isXMLNS = XML_TRUE;
3937
3938 BINDING *b;
3939 int len;
3940
3941 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3942 if (*uri == XML_T('\0') && prefix->name)
3943 return XML_ERROR_UNDECLARING_PREFIX;
3944
3945 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3946 && prefix->name[1] == XML_T(ASCII_m)
3947 && prefix->name[2] == XML_T(ASCII_l)) {
3948 /* Not allowed to bind xmlns */
3949 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3950 && prefix->name[5] == XML_T('\0'))
3951 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3952
3953 if (prefix->name[3] == XML_T('\0'))
3954 mustBeXML = XML_TRUE;
3955 }
3956
3957 for (len = 0; uri[len]; len++) {
3958 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3959 isXML = XML_FALSE;
3960
3961 if (! mustBeXML && isXMLNS
3962 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3963 isXMLNS = XML_FALSE;
3964
3965 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3966 // today (and is not REQUIRED to do so with regard to the XML 1.0
3967 // namespaces specification) we have to at least make sure, that
3968 // the application on top of Expat (that is likely splitting expanded
3969 // element names ("qualified names") of form
3970 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3971 // in its element handler code) cannot be confused by an attacker
3972 // putting additional namespace separator characters into namespace
3973 // declarations. That would be ambiguous and not to be expected.
3974 //
3975 // While the HTML API docs of function XML_ParserCreateNS have been
3976 // advising against use of a namespace separator character that can
3977 // appear in a URI for >20 years now, some widespread applications
3978 // are using URI characters (':' (colon) in particular) for a
3979 // namespace separator, in practice. To keep these applications
3980 // functional, we only reject namespaces URIs containing the
3981 // application-chosen namespace separator if the chosen separator
3982 // is a non-URI character with regard to RFC 3986.
3983 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3984 && ! is_rfc3986_uri_char(uri[len])) {
3985 return XML_ERROR_SYNTAX;
3986 }
3987 }
3988 isXML = isXML && len == xmlLen;
3989 isXMLNS = isXMLNS && len == xmlnsLen;
3990
3991 if (mustBeXML != isXML)
3992 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3993 : XML_ERROR_RESERVED_NAMESPACE_URI;
3994
3995 if (isXMLNS)
3996 return XML_ERROR_RESERVED_NAMESPACE_URI;
3997
3998 if (parser->m_namespaceSeparator)
3999 len++;
4000 if (parser->m_freeBindingList) {
4001 b = parser->m_freeBindingList;
4002 if (len > b->uriAlloc) {
4003 /* Detect and prevent integer overflow */
4004 if (len > INT_MAX - EXPAND_SPARE) {
4005 return XML_ERROR_NO_MEMORY;
4006 }
4007
4008 /* Detect and prevent integer overflow.
4009 * The preprocessor guard addresses the "always false" warning
4010 * from -Wtype-limits on platforms where
4011 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4012 #if UINT_MAX >= SIZE_MAX
4013 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4014 return XML_ERROR_NO_MEMORY;
4015 }
4016 #endif
4017
4018 XML_Char *temp = (XML_Char *)REALLOC(
4019 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4020 if (temp == NULL)
4021 return XML_ERROR_NO_MEMORY;
4022 b->uri = temp;
4023 b->uriAlloc = len + EXPAND_SPARE;
4024 }
4025 parser->m_freeBindingList = b->nextTagBinding;
4026 } else {
4027 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4028 if (! b)
4029 return XML_ERROR_NO_MEMORY;
4030
4031 /* Detect and prevent integer overflow */
4032 if (len > INT_MAX - EXPAND_SPARE) {
4033 return XML_ERROR_NO_MEMORY;
4034 }
4035 /* Detect and prevent integer overflow.
4036 * The preprocessor guard addresses the "always false" warning
4037 * from -Wtype-limits on platforms where
4038 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4039 #if UINT_MAX >= SIZE_MAX
4040 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4041 return XML_ERROR_NO_MEMORY;
4042 }
4043 #endif
4044
4045 b->uri
4046 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4047 if (! b->uri) {
4048 FREE(parser, b);
4049 return XML_ERROR_NO_MEMORY;
4050 }
4051 b->uriAlloc = len + EXPAND_SPARE;
4052 }
4053 b->uriLen = len;
4054 memcpy(b->uri, uri, len * sizeof(XML_Char));
4055 if (parser->m_namespaceSeparator)
4056 b->uri[len - 1] = parser->m_namespaceSeparator;
4057 b->prefix = prefix;
4058 b->attId = attId;
4059 b->prevPrefixBinding = prefix->binding;
4060 /* NULL binding when default namespace undeclared */
4061 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4062 prefix->binding = NULL;
4063 else
4064 prefix->binding = b;
4065 b->nextTagBinding = *bindingsPtr;
4066 *bindingsPtr = b;
4067 /* if attId == NULL then we are not starting a namespace scope */
4068 if (attId && parser->m_startNamespaceDeclHandler)
4069 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4070 prefix->binding ? uri : 0);
4071 return XML_ERROR_NONE;
4072 }
4073
4074 /* The idea here is to avoid using stack for each CDATA section when
4075 the whole file is parsed with one call.
4076 */
4077 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4078 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4079 const char **endPtr) {
4080 enum XML_Error result = doCdataSection(
4081 parser, parser->m_encoding, &start, end, endPtr,
4082 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4083 if (result != XML_ERROR_NONE)
4084 return result;
4085 if (start) {
4086 if (parser->m_parentParser) { /* we are parsing an external entity */
4087 parser->m_processor = externalEntityContentProcessor;
4088 return externalEntityContentProcessor(parser, start, end, endPtr);
4089 } else {
4090 parser->m_processor = contentProcessor;
4091 return contentProcessor(parser, start, end, endPtr);
4092 }
4093 }
4094 return result;
4095 }
4096
4097 /* startPtr gets set to non-null if the section is closed, and to null if
4098 the section is not yet closed.
4099 */
4100 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4101 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4102 const char *end, const char **nextPtr, XML_Bool haveMore,
4103 enum XML_Account account) {
4104 const char *s = *startPtr;
4105 const char **eventPP;
4106 const char **eventEndPP;
4107 if (enc == parser->m_encoding) {
4108 eventPP = &parser->m_eventPtr;
4109 *eventPP = s;
4110 eventEndPP = &parser->m_eventEndPtr;
4111 } else {
4112 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4113 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4114 }
4115 *eventPP = s;
4116 *startPtr = NULL;
4117
4118 for (;;) {
4119 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4120 int tok = XmlCdataSectionTok(enc, s, end, &next);
4121 #if XML_GE == 1
4122 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4123 accountingOnAbort(parser);
4124 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4125 }
4126 #else
4127 UNUSED_P(account);
4128 #endif
4129 *eventEndPP = next;
4130 switch (tok) {
4131 case XML_TOK_CDATA_SECT_CLOSE:
4132 if (parser->m_endCdataSectionHandler)
4133 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4134 /* BEGIN disabled code */
4135 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4136 else if ((0) && parser->m_characterDataHandler)
4137 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4138 0);
4139 /* END disabled code */
4140 else if (parser->m_defaultHandler)
4141 reportDefault(parser, enc, s, next);
4142 *startPtr = next;
4143 *nextPtr = next;
4144 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4145 return XML_ERROR_ABORTED;
4146 else
4147 return XML_ERROR_NONE;
4148 case XML_TOK_DATA_NEWLINE:
4149 if (parser->m_characterDataHandler) {
4150 XML_Char c = 0xA;
4151 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4152 } else if (parser->m_defaultHandler)
4153 reportDefault(parser, enc, s, next);
4154 break;
4155 case XML_TOK_DATA_CHARS: {
4156 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4157 if (charDataHandler) {
4158 if (MUST_CONVERT(enc, s)) {
4159 for (;;) {
4160 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4161 const enum XML_Convert_Result convert_res = XmlConvert(
4162 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4163 *eventEndPP = next;
4164 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4165 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4166 if ((convert_res == XML_CONVERT_COMPLETED)
4167 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4168 break;
4169 *eventPP = s;
4170 }
4171 } else
4172 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4173 (int)((const XML_Char *)next - (const XML_Char *)s));
4174 } else if (parser->m_defaultHandler)
4175 reportDefault(parser, enc, s, next);
4176 } break;
4177 case XML_TOK_INVALID:
4178 *eventPP = next;
4179 return XML_ERROR_INVALID_TOKEN;
4180 case XML_TOK_PARTIAL_CHAR:
4181 if (haveMore) {
4182 *nextPtr = s;
4183 return XML_ERROR_NONE;
4184 }
4185 return XML_ERROR_PARTIAL_CHAR;
4186 case XML_TOK_PARTIAL:
4187 case XML_TOK_NONE:
4188 if (haveMore) {
4189 *nextPtr = s;
4190 return XML_ERROR_NONE;
4191 }
4192 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4193 default:
4194 /* Every token returned by XmlCdataSectionTok() has its own
4195 * explicit case, so this default case will never be executed.
4196 * We retain it as a safety net and exclude it from the coverage
4197 * statistics.
4198 *
4199 * LCOV_EXCL_START
4200 */
4201 *eventPP = next;
4202 return XML_ERROR_UNEXPECTED_STATE;
4203 /* LCOV_EXCL_STOP */
4204 }
4205
4206 *eventPP = s = next;
4207 switch (parser->m_parsingStatus.parsing) {
4208 case XML_SUSPENDED:
4209 *nextPtr = next;
4210 return XML_ERROR_NONE;
4211 case XML_FINISHED:
4212 return XML_ERROR_ABORTED;
4213 default:;
4214 }
4215 }
4216 /* not reached */
4217 }
4218
4219 #ifdef XML_DTD
4220
4221 /* The idea here is to avoid using stack for each IGNORE section when
4222 the whole file is parsed with one call.
4223 */
4224 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4225 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4226 const char **endPtr) {
4227 enum XML_Error result
4228 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4229 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4230 if (result != XML_ERROR_NONE)
4231 return result;
4232 if (start) {
4233 parser->m_processor = prologProcessor;
4234 return prologProcessor(parser, start, end, endPtr);
4235 }
4236 return result;
4237 }
4238
4239 /* startPtr gets set to non-null is the section is closed, and to null
4240 if the section is not yet closed.
4241 */
4242 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4243 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4244 const char *end, const char **nextPtr, XML_Bool haveMore) {
4245 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4246 int tok;
4247 const char *s = *startPtr;
4248 const char **eventPP;
4249 const char **eventEndPP;
4250 if (enc == parser->m_encoding) {
4251 eventPP = &parser->m_eventPtr;
4252 *eventPP = s;
4253 eventEndPP = &parser->m_eventEndPtr;
4254 } else {
4255 /* It's not entirely clear, but it seems the following two lines
4256 * of code cannot be executed. The only occasions on which 'enc'
4257 * is not 'encoding' are when this function is called
4258 * from the internal entity processing, and IGNORE sections are an
4259 * error in internal entities.
4260 *
4261 * Since it really isn't clear that this is true, we keep the code
4262 * and just remove it from our coverage tests.
4263 *
4264 * LCOV_EXCL_START
4265 */
4266 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4267 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4268 /* LCOV_EXCL_STOP */
4269 }
4270 *eventPP = s;
4271 *startPtr = NULL;
4272 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4273 # if XML_GE == 1
4274 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4275 XML_ACCOUNT_DIRECT)) {
4276 accountingOnAbort(parser);
4277 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4278 }
4279 # endif
4280 *eventEndPP = next;
4281 switch (tok) {
4282 case XML_TOK_IGNORE_SECT:
4283 if (parser->m_defaultHandler)
4284 reportDefault(parser, enc, s, next);
4285 *startPtr = next;
4286 *nextPtr = next;
4287 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4288 return XML_ERROR_ABORTED;
4289 else
4290 return XML_ERROR_NONE;
4291 case XML_TOK_INVALID:
4292 *eventPP = next;
4293 return XML_ERROR_INVALID_TOKEN;
4294 case XML_TOK_PARTIAL_CHAR:
4295 if (haveMore) {
4296 *nextPtr = s;
4297 return XML_ERROR_NONE;
4298 }
4299 return XML_ERROR_PARTIAL_CHAR;
4300 case XML_TOK_PARTIAL:
4301 case XML_TOK_NONE:
4302 if (haveMore) {
4303 *nextPtr = s;
4304 return XML_ERROR_NONE;
4305 }
4306 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4307 default:
4308 /* All of the tokens that XmlIgnoreSectionTok() returns have
4309 * explicit cases to handle them, so this default case is never
4310 * executed. We keep it as a safety net anyway, and remove it
4311 * from our test coverage statistics.
4312 *
4313 * LCOV_EXCL_START
4314 */
4315 *eventPP = next;
4316 return XML_ERROR_UNEXPECTED_STATE;
4317 /* LCOV_EXCL_STOP */
4318 }
4319 /* not reached */
4320 }
4321
4322 #endif /* XML_DTD */
4323
4324 static enum XML_Error
initializeEncoding(XML_Parser parser)4325 initializeEncoding(XML_Parser parser) {
4326 const char *s;
4327 #ifdef XML_UNICODE
4328 char encodingBuf[128];
4329 /* See comments about `protocolEncodingName` in parserInit() */
4330 if (! parser->m_protocolEncodingName)
4331 s = NULL;
4332 else {
4333 int i;
4334 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4335 if (i == sizeof(encodingBuf) - 1
4336 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4337 encodingBuf[0] = '\0';
4338 break;
4339 }
4340 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4341 }
4342 encodingBuf[i] = '\0';
4343 s = encodingBuf;
4344 }
4345 #else
4346 s = parser->m_protocolEncodingName;
4347 #endif
4348 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4349 &parser->m_initEncoding, &parser->m_encoding, s))
4350 return XML_ERROR_NONE;
4351 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4352 }
4353
4354 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4355 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4356 const char *next) {
4357 const char *encodingName = NULL;
4358 const XML_Char *storedEncName = NULL;
4359 const ENCODING *newEncoding = NULL;
4360 const char *version = NULL;
4361 const char *versionend = NULL;
4362 const XML_Char *storedversion = NULL;
4363 int standalone = -1;
4364
4365 #if XML_GE == 1
4366 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4367 XML_ACCOUNT_DIRECT)) {
4368 accountingOnAbort(parser);
4369 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4370 }
4371 #endif
4372
4373 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4374 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4375 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4376 if (isGeneralTextEntity)
4377 return XML_ERROR_TEXT_DECL;
4378 else
4379 return XML_ERROR_XML_DECL;
4380 }
4381 if (! isGeneralTextEntity && standalone == 1) {
4382 parser->m_dtd->standalone = XML_TRUE;
4383 #ifdef XML_DTD
4384 if (parser->m_paramEntityParsing
4385 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4386 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4387 #endif /* XML_DTD */
4388 }
4389 if (parser->m_xmlDeclHandler) {
4390 if (encodingName != NULL) {
4391 storedEncName = poolStoreString(
4392 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4393 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4394 if (! storedEncName)
4395 return XML_ERROR_NO_MEMORY;
4396 poolFinish(&parser->m_temp2Pool);
4397 }
4398 if (version) {
4399 storedversion
4400 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4401 versionend - parser->m_encoding->minBytesPerChar);
4402 if (! storedversion)
4403 return XML_ERROR_NO_MEMORY;
4404 }
4405 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4406 standalone);
4407 } else if (parser->m_defaultHandler)
4408 reportDefault(parser, parser->m_encoding, s, next);
4409 if (parser->m_protocolEncodingName == NULL) {
4410 if (newEncoding) {
4411 /* Check that the specified encoding does not conflict with what
4412 * the parser has already deduced. Do we have the same number
4413 * of bytes in the smallest representation of a character? If
4414 * this is UTF-16, is it the same endianness?
4415 */
4416 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4417 || (newEncoding->minBytesPerChar == 2
4418 && newEncoding != parser->m_encoding)) {
4419 parser->m_eventPtr = encodingName;
4420 return XML_ERROR_INCORRECT_ENCODING;
4421 }
4422 parser->m_encoding = newEncoding;
4423 } else if (encodingName) {
4424 enum XML_Error result;
4425 if (! storedEncName) {
4426 storedEncName = poolStoreString(
4427 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4428 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4429 if (! storedEncName)
4430 return XML_ERROR_NO_MEMORY;
4431 }
4432 result = handleUnknownEncoding(parser, storedEncName);
4433 poolClear(&parser->m_temp2Pool);
4434 if (result == XML_ERROR_UNKNOWN_ENCODING)
4435 parser->m_eventPtr = encodingName;
4436 return result;
4437 }
4438 }
4439
4440 if (storedEncName || storedversion)
4441 poolClear(&parser->m_temp2Pool);
4442
4443 return XML_ERROR_NONE;
4444 }
4445
4446 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4447 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4448 if (parser->m_unknownEncodingHandler) {
4449 XML_Encoding info;
4450 int i;
4451 for (i = 0; i < 256; i++)
4452 info.map[i] = -1;
4453 info.convert = NULL;
4454 info.data = NULL;
4455 info.release = NULL;
4456 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4457 encodingName, &info)) {
4458 ENCODING *enc;
4459 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4460 if (! parser->m_unknownEncodingMem) {
4461 if (info.release)
4462 info.release(info.data);
4463 return XML_ERROR_NO_MEMORY;
4464 }
4465 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4466 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4467 if (enc) {
4468 parser->m_unknownEncodingData = info.data;
4469 parser->m_unknownEncodingRelease = info.release;
4470 parser->m_encoding = enc;
4471 return XML_ERROR_NONE;
4472 }
4473 }
4474 if (info.release != NULL)
4475 info.release(info.data);
4476 }
4477 return XML_ERROR_UNKNOWN_ENCODING;
4478 }
4479
4480 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4481 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4482 const char **nextPtr) {
4483 enum XML_Error result = initializeEncoding(parser);
4484 if (result != XML_ERROR_NONE)
4485 return result;
4486 parser->m_processor = prologProcessor;
4487 return prologProcessor(parser, s, end, nextPtr);
4488 }
4489
4490 #ifdef XML_DTD
4491
4492 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4493 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4494 const char **nextPtr) {
4495 enum XML_Error result = initializeEncoding(parser);
4496 if (result != XML_ERROR_NONE)
4497 return result;
4498
4499 /* we know now that XML_Parse(Buffer) has been called,
4500 so we consider the external parameter entity read */
4501 parser->m_dtd->paramEntityRead = XML_TRUE;
4502
4503 if (parser->m_prologState.inEntityValue) {
4504 parser->m_processor = entityValueInitProcessor;
4505 return entityValueInitProcessor(parser, s, end, nextPtr);
4506 } else {
4507 parser->m_processor = externalParEntProcessor;
4508 return externalParEntProcessor(parser, s, end, nextPtr);
4509 }
4510 }
4511
4512 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4513 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4514 const char **nextPtr) {
4515 int tok;
4516 const char *start = s;
4517 const char *next = start;
4518 parser->m_eventPtr = start;
4519
4520 for (;;) {
4521 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4522 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4523 - storeEntityValue
4524 - processXmlDecl
4525 */
4526 parser->m_eventEndPtr = next;
4527 if (tok <= 0) {
4528 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4529 *nextPtr = s;
4530 return XML_ERROR_NONE;
4531 }
4532 switch (tok) {
4533 case XML_TOK_INVALID:
4534 return XML_ERROR_INVALID_TOKEN;
4535 case XML_TOK_PARTIAL:
4536 return XML_ERROR_UNCLOSED_TOKEN;
4537 case XML_TOK_PARTIAL_CHAR:
4538 return XML_ERROR_PARTIAL_CHAR;
4539 case XML_TOK_NONE: /* start == end */
4540 default:
4541 break;
4542 }
4543 /* found end of entity value - can store it now */
4544 return storeEntityValue(parser, parser->m_encoding, s, end,
4545 XML_ACCOUNT_DIRECT);
4546 } else if (tok == XML_TOK_XML_DECL) {
4547 enum XML_Error result;
4548 result = processXmlDecl(parser, 0, start, next);
4549 if (result != XML_ERROR_NONE)
4550 return result;
4551 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4552 * that to happen, a parameter entity parsing handler must have attempted
4553 * to suspend the parser, which fails and raises an error. The parser can
4554 * be aborted, but can't be suspended.
4555 */
4556 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4557 return XML_ERROR_ABORTED;
4558 *nextPtr = next;
4559 /* stop scanning for text declaration - we found one */
4560 parser->m_processor = entityValueProcessor;
4561 return entityValueProcessor(parser, next, end, nextPtr);
4562 }
4563 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4564 must move s and nextPtr forward to consume the BOM.
4565
4566 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4567 would leave the BOM in the buffer and return. On the next call to this
4568 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4569 is not valid to have multiple BOMs.
4570 */
4571 else if (tok == XML_TOK_BOM) {
4572 # if XML_GE == 1
4573 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4574 XML_ACCOUNT_DIRECT)) {
4575 accountingOnAbort(parser);
4576 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4577 }
4578 # endif
4579
4580 *nextPtr = next;
4581 s = next;
4582 }
4583 /* If we get this token, we have the start of what might be a
4584 normal tag, but not a declaration (i.e. it doesn't begin with
4585 "<!"). In a DTD context, that isn't legal.
4586 */
4587 else if (tok == XML_TOK_INSTANCE_START) {
4588 *nextPtr = next;
4589 return XML_ERROR_SYNTAX;
4590 }
4591 start = next;
4592 parser->m_eventPtr = start;
4593 }
4594 }
4595
4596 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4597 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4598 const char **nextPtr) {
4599 const char *next = s;
4600 int tok;
4601
4602 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4603 if (tok <= 0) {
4604 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4605 *nextPtr = s;
4606 return XML_ERROR_NONE;
4607 }
4608 switch (tok) {
4609 case XML_TOK_INVALID:
4610 return XML_ERROR_INVALID_TOKEN;
4611 case XML_TOK_PARTIAL:
4612 return XML_ERROR_UNCLOSED_TOKEN;
4613 case XML_TOK_PARTIAL_CHAR:
4614 return XML_ERROR_PARTIAL_CHAR;
4615 case XML_TOK_NONE: /* start == end */
4616 default:
4617 break;
4618 }
4619 }
4620 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4621 However, when parsing an external subset, doProlog will not accept a BOM
4622 as valid, and report a syntax error, so we have to skip the BOM, and
4623 account for the BOM bytes.
4624 */
4625 else if (tok == XML_TOK_BOM) {
4626 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4627 XML_ACCOUNT_DIRECT)) {
4628 accountingOnAbort(parser);
4629 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4630 }
4631
4632 s = next;
4633 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4634 }
4635
4636 parser->m_processor = prologProcessor;
4637 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4638 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4639 XML_ACCOUNT_DIRECT);
4640 }
4641
4642 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4643 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4644 const char **nextPtr) {
4645 const char *start = s;
4646 const char *next = s;
4647 const ENCODING *enc = parser->m_encoding;
4648 int tok;
4649
4650 for (;;) {
4651 tok = XmlPrologTok(enc, start, end, &next);
4652 /* Note: These bytes are accounted later in:
4653 - storeEntityValue
4654 */
4655 if (tok <= 0) {
4656 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4657 *nextPtr = s;
4658 return XML_ERROR_NONE;
4659 }
4660 switch (tok) {
4661 case XML_TOK_INVALID:
4662 return XML_ERROR_INVALID_TOKEN;
4663 case XML_TOK_PARTIAL:
4664 return XML_ERROR_UNCLOSED_TOKEN;
4665 case XML_TOK_PARTIAL_CHAR:
4666 return XML_ERROR_PARTIAL_CHAR;
4667 case XML_TOK_NONE: /* start == end */
4668 default:
4669 break;
4670 }
4671 /* found end of entity value - can store it now */
4672 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4673 }
4674 start = next;
4675 }
4676 }
4677
4678 #endif /* XML_DTD */
4679
4680 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4681 prologProcessor(XML_Parser parser, const char *s, const char *end,
4682 const char **nextPtr) {
4683 const char *next = s;
4684 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4685 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4686 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4687 XML_ACCOUNT_DIRECT);
4688 }
4689
4690 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4691 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4692 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4693 XML_Bool allowClosingDoctype, enum XML_Account account) {
4694 #ifdef XML_DTD
4695 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4696 #endif /* XML_DTD */
4697 static const XML_Char atypeCDATA[]
4698 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4699 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4700 static const XML_Char atypeIDREF[]
4701 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4702 static const XML_Char atypeIDREFS[]
4703 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4704 static const XML_Char atypeENTITY[]
4705 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4706 static const XML_Char atypeENTITIES[]
4707 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4708 ASCII_I, ASCII_E, ASCII_S, '\0'};
4709 static const XML_Char atypeNMTOKEN[]
4710 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4711 static const XML_Char atypeNMTOKENS[]
4712 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4713 ASCII_E, ASCII_N, ASCII_S, '\0'};
4714 static const XML_Char notationPrefix[]
4715 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4716 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4717 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4718 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4719
4720 #ifndef XML_DTD
4721 UNUSED_P(account);
4722 #endif
4723
4724 /* save one level of indirection */
4725 DTD *const dtd = parser->m_dtd;
4726
4727 const char **eventPP;
4728 const char **eventEndPP;
4729 enum XML_Content_Quant quant;
4730
4731 if (enc == parser->m_encoding) {
4732 eventPP = &parser->m_eventPtr;
4733 eventEndPP = &parser->m_eventEndPtr;
4734 } else {
4735 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4736 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4737 }
4738
4739 for (;;) {
4740 int role;
4741 XML_Bool handleDefault = XML_TRUE;
4742 *eventPP = s;
4743 *eventEndPP = next;
4744 if (tok <= 0) {
4745 if (haveMore && tok != XML_TOK_INVALID) {
4746 *nextPtr = s;
4747 return XML_ERROR_NONE;
4748 }
4749 switch (tok) {
4750 case XML_TOK_INVALID:
4751 *eventPP = next;
4752 return XML_ERROR_INVALID_TOKEN;
4753 case XML_TOK_PARTIAL:
4754 return XML_ERROR_UNCLOSED_TOKEN;
4755 case XML_TOK_PARTIAL_CHAR:
4756 return XML_ERROR_PARTIAL_CHAR;
4757 case -XML_TOK_PROLOG_S:
4758 tok = -tok;
4759 break;
4760 case XML_TOK_NONE:
4761 #ifdef XML_DTD
4762 /* for internal PE NOT referenced between declarations */
4763 if (enc != parser->m_encoding
4764 && ! parser->m_openInternalEntities->betweenDecl) {
4765 *nextPtr = s;
4766 return XML_ERROR_NONE;
4767 }
4768 /* WFC: PE Between Declarations - must check that PE contains
4769 complete markup, not only for external PEs, but also for
4770 internal PEs if the reference occurs between declarations.
4771 */
4772 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4773 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4774 == XML_ROLE_ERROR)
4775 return XML_ERROR_INCOMPLETE_PE;
4776 *nextPtr = s;
4777 return XML_ERROR_NONE;
4778 }
4779 #endif /* XML_DTD */
4780 return XML_ERROR_NO_ELEMENTS;
4781 default:
4782 tok = -tok;
4783 next = end;
4784 break;
4785 }
4786 }
4787 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4788 #if XML_GE == 1
4789 switch (role) {
4790 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4791 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4792 # ifdef XML_DTD
4793 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4794 # endif
4795 break;
4796 default:
4797 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4798 accountingOnAbort(parser);
4799 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4800 }
4801 }
4802 #endif
4803 switch (role) {
4804 case XML_ROLE_XML_DECL: {
4805 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4806 if (result != XML_ERROR_NONE)
4807 return result;
4808 enc = parser->m_encoding;
4809 handleDefault = XML_FALSE;
4810 } break;
4811 case XML_ROLE_DOCTYPE_NAME:
4812 if (parser->m_startDoctypeDeclHandler) {
4813 parser->m_doctypeName
4814 = poolStoreString(&parser->m_tempPool, enc, s, next);
4815 if (! parser->m_doctypeName)
4816 return XML_ERROR_NO_MEMORY;
4817 poolFinish(&parser->m_tempPool);
4818 parser->m_doctypePubid = NULL;
4819 handleDefault = XML_FALSE;
4820 }
4821 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4822 break;
4823 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4824 if (parser->m_startDoctypeDeclHandler) {
4825 parser->m_startDoctypeDeclHandler(
4826 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4827 parser->m_doctypePubid, 1);
4828 parser->m_doctypeName = NULL;
4829 poolClear(&parser->m_tempPool);
4830 handleDefault = XML_FALSE;
4831 }
4832 break;
4833 #ifdef XML_DTD
4834 case XML_ROLE_TEXT_DECL: {
4835 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4836 if (result != XML_ERROR_NONE)
4837 return result;
4838 enc = parser->m_encoding;
4839 handleDefault = XML_FALSE;
4840 } break;
4841 #endif /* XML_DTD */
4842 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4843 #ifdef XML_DTD
4844 parser->m_useForeignDTD = XML_FALSE;
4845 parser->m_declEntity = (ENTITY *)lookup(
4846 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4847 if (! parser->m_declEntity)
4848 return XML_ERROR_NO_MEMORY;
4849 #endif /* XML_DTD */
4850 dtd->hasParamEntityRefs = XML_TRUE;
4851 if (parser->m_startDoctypeDeclHandler) {
4852 XML_Char *pubId;
4853 if (! XmlIsPublicId(enc, s, next, eventPP))
4854 return XML_ERROR_PUBLICID;
4855 pubId = poolStoreString(&parser->m_tempPool, enc,
4856 s + enc->minBytesPerChar,
4857 next - enc->minBytesPerChar);
4858 if (! pubId)
4859 return XML_ERROR_NO_MEMORY;
4860 normalizePublicId(pubId);
4861 poolFinish(&parser->m_tempPool);
4862 parser->m_doctypePubid = pubId;
4863 handleDefault = XML_FALSE;
4864 goto alreadyChecked;
4865 }
4866 /* fall through */
4867 case XML_ROLE_ENTITY_PUBLIC_ID:
4868 if (! XmlIsPublicId(enc, s, next, eventPP))
4869 return XML_ERROR_PUBLICID;
4870 alreadyChecked:
4871 if (dtd->keepProcessing && parser->m_declEntity) {
4872 XML_Char *tem
4873 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4874 next - enc->minBytesPerChar);
4875 if (! tem)
4876 return XML_ERROR_NO_MEMORY;
4877 normalizePublicId(tem);
4878 parser->m_declEntity->publicId = tem;
4879 poolFinish(&dtd->pool);
4880 /* Don't suppress the default handler if we fell through from
4881 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4882 */
4883 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4884 handleDefault = XML_FALSE;
4885 }
4886 break;
4887 case XML_ROLE_DOCTYPE_CLOSE:
4888 if (allowClosingDoctype != XML_TRUE) {
4889 /* Must not close doctype from within expanded parameter entities */
4890 return XML_ERROR_INVALID_TOKEN;
4891 }
4892
4893 if (parser->m_doctypeName) {
4894 parser->m_startDoctypeDeclHandler(
4895 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4896 parser->m_doctypePubid, 0);
4897 poolClear(&parser->m_tempPool);
4898 handleDefault = XML_FALSE;
4899 }
4900 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4901 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4902 was not set, indicating an external subset
4903 */
4904 #ifdef XML_DTD
4905 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4906 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4907 dtd->hasParamEntityRefs = XML_TRUE;
4908 if (parser->m_paramEntityParsing
4909 && parser->m_externalEntityRefHandler) {
4910 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4911 externalSubsetName, sizeof(ENTITY));
4912 if (! entity) {
4913 /* The external subset name "#" will have already been
4914 * inserted into the hash table at the start of the
4915 * external entity parsing, so no allocation will happen
4916 * and lookup() cannot fail.
4917 */
4918 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4919 }
4920 if (parser->m_useForeignDTD)
4921 entity->base = parser->m_curBase;
4922 dtd->paramEntityRead = XML_FALSE;
4923 if (! parser->m_externalEntityRefHandler(
4924 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4925 entity->systemId, entity->publicId))
4926 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4927 if (dtd->paramEntityRead) {
4928 if (! dtd->standalone && parser->m_notStandaloneHandler
4929 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4930 return XML_ERROR_NOT_STANDALONE;
4931 }
4932 /* if we didn't read the foreign DTD then this means that there
4933 is no external subset and we must reset dtd->hasParamEntityRefs
4934 */
4935 else if (! parser->m_doctypeSysid)
4936 dtd->hasParamEntityRefs = hadParamEntityRefs;
4937 /* end of DTD - no need to update dtd->keepProcessing */
4938 }
4939 parser->m_useForeignDTD = XML_FALSE;
4940 }
4941 #endif /* XML_DTD */
4942 if (parser->m_endDoctypeDeclHandler) {
4943 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4944 handleDefault = XML_FALSE;
4945 }
4946 break;
4947 case XML_ROLE_INSTANCE_START:
4948 #ifdef XML_DTD
4949 /* if there is no DOCTYPE declaration then now is the
4950 last chance to read the foreign DTD
4951 */
4952 if (parser->m_useForeignDTD) {
4953 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4954 dtd->hasParamEntityRefs = XML_TRUE;
4955 if (parser->m_paramEntityParsing
4956 && parser->m_externalEntityRefHandler) {
4957 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4958 externalSubsetName, sizeof(ENTITY));
4959 if (! entity)
4960 return XML_ERROR_NO_MEMORY;
4961 entity->base = parser->m_curBase;
4962 dtd->paramEntityRead = XML_FALSE;
4963 if (! parser->m_externalEntityRefHandler(
4964 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4965 entity->systemId, entity->publicId))
4966 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4967 if (dtd->paramEntityRead) {
4968 if (! dtd->standalone && parser->m_notStandaloneHandler
4969 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4970 return XML_ERROR_NOT_STANDALONE;
4971 }
4972 /* if we didn't read the foreign DTD then this means that there
4973 is no external subset and we must reset dtd->hasParamEntityRefs
4974 */
4975 else
4976 dtd->hasParamEntityRefs = hadParamEntityRefs;
4977 /* end of DTD - no need to update dtd->keepProcessing */
4978 }
4979 }
4980 #endif /* XML_DTD */
4981 parser->m_processor = contentProcessor;
4982 return contentProcessor(parser, s, end, nextPtr);
4983 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4984 parser->m_declElementType = getElementType(parser, enc, s, next);
4985 if (! parser->m_declElementType)
4986 return XML_ERROR_NO_MEMORY;
4987 goto checkAttListDeclHandler;
4988 case XML_ROLE_ATTRIBUTE_NAME:
4989 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4990 if (! parser->m_declAttributeId)
4991 return XML_ERROR_NO_MEMORY;
4992 parser->m_declAttributeIsCdata = XML_FALSE;
4993 parser->m_declAttributeType = NULL;
4994 parser->m_declAttributeIsId = XML_FALSE;
4995 goto checkAttListDeclHandler;
4996 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4997 parser->m_declAttributeIsCdata = XML_TRUE;
4998 parser->m_declAttributeType = atypeCDATA;
4999 goto checkAttListDeclHandler;
5000 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5001 parser->m_declAttributeIsId = XML_TRUE;
5002 parser->m_declAttributeType = atypeID;
5003 goto checkAttListDeclHandler;
5004 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5005 parser->m_declAttributeType = atypeIDREF;
5006 goto checkAttListDeclHandler;
5007 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5008 parser->m_declAttributeType = atypeIDREFS;
5009 goto checkAttListDeclHandler;
5010 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5011 parser->m_declAttributeType = atypeENTITY;
5012 goto checkAttListDeclHandler;
5013 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5014 parser->m_declAttributeType = atypeENTITIES;
5015 goto checkAttListDeclHandler;
5016 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5017 parser->m_declAttributeType = atypeNMTOKEN;
5018 goto checkAttListDeclHandler;
5019 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5020 parser->m_declAttributeType = atypeNMTOKENS;
5021 checkAttListDeclHandler:
5022 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5023 handleDefault = XML_FALSE;
5024 break;
5025 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5026 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5027 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5028 const XML_Char *prefix;
5029 if (parser->m_declAttributeType) {
5030 prefix = enumValueSep;
5031 } else {
5032 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5033 : enumValueStart);
5034 }
5035 if (! poolAppendString(&parser->m_tempPool, prefix))
5036 return XML_ERROR_NO_MEMORY;
5037 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5038 return XML_ERROR_NO_MEMORY;
5039 parser->m_declAttributeType = parser->m_tempPool.start;
5040 handleDefault = XML_FALSE;
5041 }
5042 break;
5043 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5044 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5045 if (dtd->keepProcessing) {
5046 if (! defineAttribute(parser->m_declElementType,
5047 parser->m_declAttributeId,
5048 parser->m_declAttributeIsCdata,
5049 parser->m_declAttributeIsId, 0, parser))
5050 return XML_ERROR_NO_MEMORY;
5051 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5052 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5053 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5054 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5055 /* Enumerated or Notation type */
5056 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5057 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5058 return XML_ERROR_NO_MEMORY;
5059 parser->m_declAttributeType = parser->m_tempPool.start;
5060 poolFinish(&parser->m_tempPool);
5061 }
5062 *eventEndPP = s;
5063 parser->m_attlistDeclHandler(
5064 parser->m_handlerArg, parser->m_declElementType->name,
5065 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5066 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5067 handleDefault = XML_FALSE;
5068 }
5069 }
5070 poolClear(&parser->m_tempPool);
5071 break;
5072 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5073 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5074 if (dtd->keepProcessing) {
5075 const XML_Char *attVal;
5076 enum XML_Error result = storeAttributeValue(
5077 parser, enc, parser->m_declAttributeIsCdata,
5078 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5079 XML_ACCOUNT_NONE);
5080 if (result)
5081 return result;
5082 attVal = poolStart(&dtd->pool);
5083 poolFinish(&dtd->pool);
5084 /* ID attributes aren't allowed to have a default */
5085 if (! defineAttribute(
5086 parser->m_declElementType, parser->m_declAttributeId,
5087 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5088 return XML_ERROR_NO_MEMORY;
5089 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5090 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5091 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5092 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5093 /* Enumerated or Notation type */
5094 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5095 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5096 return XML_ERROR_NO_MEMORY;
5097 parser->m_declAttributeType = parser->m_tempPool.start;
5098 poolFinish(&parser->m_tempPool);
5099 }
5100 *eventEndPP = s;
5101 parser->m_attlistDeclHandler(
5102 parser->m_handlerArg, parser->m_declElementType->name,
5103 parser->m_declAttributeId->name, parser->m_declAttributeType,
5104 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5105 poolClear(&parser->m_tempPool);
5106 handleDefault = XML_FALSE;
5107 }
5108 }
5109 break;
5110 case XML_ROLE_ENTITY_VALUE:
5111 if (dtd->keepProcessing) {
5112 #if XML_GE == 1
5113 // This will store the given replacement text in
5114 // parser->m_declEntity->textPtr.
5115 enum XML_Error result
5116 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5117 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5118 if (parser->m_declEntity) {
5119 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5120 parser->m_declEntity->textLen
5121 = (int)(poolLength(&dtd->entityValuePool));
5122 poolFinish(&dtd->entityValuePool);
5123 if (parser->m_entityDeclHandler) {
5124 *eventEndPP = s;
5125 parser->m_entityDeclHandler(
5126 parser->m_handlerArg, parser->m_declEntity->name,
5127 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5128 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5129 handleDefault = XML_FALSE;
5130 }
5131 } else
5132 poolDiscard(&dtd->entityValuePool);
5133 if (result != XML_ERROR_NONE)
5134 return result;
5135 #else
5136 // This will store "&entity123;" in parser->m_declEntity->textPtr
5137 // to end up as "&entity123;" in the handler.
5138 if (parser->m_declEntity != NULL) {
5139 const enum XML_Error result
5140 = storeSelfEntityValue(parser, parser->m_declEntity);
5141 if (result != XML_ERROR_NONE)
5142 return result;
5143
5144 if (parser->m_entityDeclHandler) {
5145 *eventEndPP = s;
5146 parser->m_entityDeclHandler(
5147 parser->m_handlerArg, parser->m_declEntity->name,
5148 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5149 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5150 handleDefault = XML_FALSE;
5151 }
5152 }
5153 #endif
5154 }
5155 break;
5156 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5157 #ifdef XML_DTD
5158 parser->m_useForeignDTD = XML_FALSE;
5159 #endif /* XML_DTD */
5160 dtd->hasParamEntityRefs = XML_TRUE;
5161 if (parser->m_startDoctypeDeclHandler) {
5162 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5163 s + enc->minBytesPerChar,
5164 next - enc->minBytesPerChar);
5165 if (parser->m_doctypeSysid == NULL)
5166 return XML_ERROR_NO_MEMORY;
5167 poolFinish(&parser->m_tempPool);
5168 handleDefault = XML_FALSE;
5169 }
5170 #ifdef XML_DTD
5171 else
5172 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5173 for the case where no parser->m_startDoctypeDeclHandler is set */
5174 parser->m_doctypeSysid = externalSubsetName;
5175 #endif /* XML_DTD */
5176 if (! dtd->standalone
5177 #ifdef XML_DTD
5178 && ! parser->m_paramEntityParsing
5179 #endif /* XML_DTD */
5180 && parser->m_notStandaloneHandler
5181 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5182 return XML_ERROR_NOT_STANDALONE;
5183 #ifndef XML_DTD
5184 break;
5185 #else /* XML_DTD */
5186 if (! parser->m_declEntity) {
5187 parser->m_declEntity = (ENTITY *)lookup(
5188 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5189 if (! parser->m_declEntity)
5190 return XML_ERROR_NO_MEMORY;
5191 parser->m_declEntity->publicId = NULL;
5192 }
5193 #endif /* XML_DTD */
5194 /* fall through */
5195 case XML_ROLE_ENTITY_SYSTEM_ID:
5196 if (dtd->keepProcessing && parser->m_declEntity) {
5197 parser->m_declEntity->systemId
5198 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5199 next - enc->minBytesPerChar);
5200 if (! parser->m_declEntity->systemId)
5201 return XML_ERROR_NO_MEMORY;
5202 parser->m_declEntity->base = parser->m_curBase;
5203 poolFinish(&dtd->pool);
5204 /* Don't suppress the default handler if we fell through from
5205 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5206 */
5207 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5208 handleDefault = XML_FALSE;
5209 }
5210 break;
5211 case XML_ROLE_ENTITY_COMPLETE:
5212 #if XML_GE == 0
5213 // This will store "&entity123;" in entity->textPtr
5214 // to end up as "&entity123;" in the handler.
5215 if (parser->m_declEntity != NULL) {
5216 const enum XML_Error result
5217 = storeSelfEntityValue(parser, parser->m_declEntity);
5218 if (result != XML_ERROR_NONE)
5219 return result;
5220 }
5221 #endif
5222 if (dtd->keepProcessing && parser->m_declEntity
5223 && parser->m_entityDeclHandler) {
5224 *eventEndPP = s;
5225 parser->m_entityDeclHandler(
5226 parser->m_handlerArg, parser->m_declEntity->name,
5227 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5228 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5229 handleDefault = XML_FALSE;
5230 }
5231 break;
5232 case XML_ROLE_ENTITY_NOTATION_NAME:
5233 if (dtd->keepProcessing && parser->m_declEntity) {
5234 parser->m_declEntity->notation
5235 = poolStoreString(&dtd->pool, enc, s, next);
5236 if (! parser->m_declEntity->notation)
5237 return XML_ERROR_NO_MEMORY;
5238 poolFinish(&dtd->pool);
5239 if (parser->m_unparsedEntityDeclHandler) {
5240 *eventEndPP = s;
5241 parser->m_unparsedEntityDeclHandler(
5242 parser->m_handlerArg, parser->m_declEntity->name,
5243 parser->m_declEntity->base, parser->m_declEntity->systemId,
5244 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5245 handleDefault = XML_FALSE;
5246 } else if (parser->m_entityDeclHandler) {
5247 *eventEndPP = s;
5248 parser->m_entityDeclHandler(
5249 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5250 parser->m_declEntity->base, parser->m_declEntity->systemId,
5251 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5252 handleDefault = XML_FALSE;
5253 }
5254 }
5255 break;
5256 case XML_ROLE_GENERAL_ENTITY_NAME: {
5257 if (XmlPredefinedEntityName(enc, s, next)) {
5258 parser->m_declEntity = NULL;
5259 break;
5260 }
5261 if (dtd->keepProcessing) {
5262 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5263 if (! name)
5264 return XML_ERROR_NO_MEMORY;
5265 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5266 name, sizeof(ENTITY));
5267 if (! parser->m_declEntity)
5268 return XML_ERROR_NO_MEMORY;
5269 if (parser->m_declEntity->name != name) {
5270 poolDiscard(&dtd->pool);
5271 parser->m_declEntity = NULL;
5272 } else {
5273 poolFinish(&dtd->pool);
5274 parser->m_declEntity->publicId = NULL;
5275 parser->m_declEntity->is_param = XML_FALSE;
5276 /* if we have a parent parser or are reading an internal parameter
5277 entity, then the entity declaration is not considered "internal"
5278 */
5279 parser->m_declEntity->is_internal
5280 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5281 if (parser->m_entityDeclHandler)
5282 handleDefault = XML_FALSE;
5283 }
5284 } else {
5285 poolDiscard(&dtd->pool);
5286 parser->m_declEntity = NULL;
5287 }
5288 } break;
5289 case XML_ROLE_PARAM_ENTITY_NAME:
5290 #ifdef XML_DTD
5291 if (dtd->keepProcessing) {
5292 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5293 if (! name)
5294 return XML_ERROR_NO_MEMORY;
5295 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5296 name, sizeof(ENTITY));
5297 if (! parser->m_declEntity)
5298 return XML_ERROR_NO_MEMORY;
5299 if (parser->m_declEntity->name != name) {
5300 poolDiscard(&dtd->pool);
5301 parser->m_declEntity = NULL;
5302 } else {
5303 poolFinish(&dtd->pool);
5304 parser->m_declEntity->publicId = NULL;
5305 parser->m_declEntity->is_param = XML_TRUE;
5306 /* if we have a parent parser or are reading an internal parameter
5307 entity, then the entity declaration is not considered "internal"
5308 */
5309 parser->m_declEntity->is_internal
5310 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5311 if (parser->m_entityDeclHandler)
5312 handleDefault = XML_FALSE;
5313 }
5314 } else {
5315 poolDiscard(&dtd->pool);
5316 parser->m_declEntity = NULL;
5317 }
5318 #else /* not XML_DTD */
5319 parser->m_declEntity = NULL;
5320 #endif /* XML_DTD */
5321 break;
5322 case XML_ROLE_NOTATION_NAME:
5323 parser->m_declNotationPublicId = NULL;
5324 parser->m_declNotationName = NULL;
5325 if (parser->m_notationDeclHandler) {
5326 parser->m_declNotationName
5327 = poolStoreString(&parser->m_tempPool, enc, s, next);
5328 if (! parser->m_declNotationName)
5329 return XML_ERROR_NO_MEMORY;
5330 poolFinish(&parser->m_tempPool);
5331 handleDefault = XML_FALSE;
5332 }
5333 break;
5334 case XML_ROLE_NOTATION_PUBLIC_ID:
5335 if (! XmlIsPublicId(enc, s, next, eventPP))
5336 return XML_ERROR_PUBLICID;
5337 if (parser
5338 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5339 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5340 s + enc->minBytesPerChar,
5341 next - enc->minBytesPerChar);
5342 if (! tem)
5343 return XML_ERROR_NO_MEMORY;
5344 normalizePublicId(tem);
5345 parser->m_declNotationPublicId = tem;
5346 poolFinish(&parser->m_tempPool);
5347 handleDefault = XML_FALSE;
5348 }
5349 break;
5350 case XML_ROLE_NOTATION_SYSTEM_ID:
5351 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5352 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5353 s + enc->minBytesPerChar,
5354 next - enc->minBytesPerChar);
5355 if (! systemId)
5356 return XML_ERROR_NO_MEMORY;
5357 *eventEndPP = s;
5358 parser->m_notationDeclHandler(
5359 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5360 systemId, parser->m_declNotationPublicId);
5361 handleDefault = XML_FALSE;
5362 }
5363 poolClear(&parser->m_tempPool);
5364 break;
5365 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5366 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5367 *eventEndPP = s;
5368 parser->m_notationDeclHandler(
5369 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5370 0, parser->m_declNotationPublicId);
5371 handleDefault = XML_FALSE;
5372 }
5373 poolClear(&parser->m_tempPool);
5374 break;
5375 case XML_ROLE_ERROR:
5376 switch (tok) {
5377 case XML_TOK_PARAM_ENTITY_REF:
5378 /* PE references in internal subset are
5379 not allowed within declarations. */
5380 return XML_ERROR_PARAM_ENTITY_REF;
5381 case XML_TOK_XML_DECL:
5382 return XML_ERROR_MISPLACED_XML_PI;
5383 default:
5384 return XML_ERROR_SYNTAX;
5385 }
5386 #ifdef XML_DTD
5387 case XML_ROLE_IGNORE_SECT: {
5388 enum XML_Error result;
5389 if (parser->m_defaultHandler)
5390 reportDefault(parser, enc, s, next);
5391 handleDefault = XML_FALSE;
5392 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5393 if (result != XML_ERROR_NONE)
5394 return result;
5395 else if (! next) {
5396 parser->m_processor = ignoreSectionProcessor;
5397 return result;
5398 }
5399 } break;
5400 #endif /* XML_DTD */
5401 case XML_ROLE_GROUP_OPEN:
5402 if (parser->m_prologState.level >= parser->m_groupSize) {
5403 if (parser->m_groupSize) {
5404 {
5405 /* Detect and prevent integer overflow */
5406 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5407 return XML_ERROR_NO_MEMORY;
5408 }
5409
5410 char *const new_connector = (char *)REALLOC(
5411 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5412 if (new_connector == NULL) {
5413 parser->m_groupSize /= 2;
5414 return XML_ERROR_NO_MEMORY;
5415 }
5416 parser->m_groupConnector = new_connector;
5417 }
5418
5419 if (dtd->scaffIndex) {
5420 /* Detect and prevent integer overflow.
5421 * The preprocessor guard addresses the "always false" warning
5422 * from -Wtype-limits on platforms where
5423 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5424 #if UINT_MAX >= SIZE_MAX
5425 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5426 return XML_ERROR_NO_MEMORY;
5427 }
5428 #endif
5429
5430 int *const new_scaff_index = (int *)REALLOC(
5431 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5432 if (new_scaff_index == NULL)
5433 return XML_ERROR_NO_MEMORY;
5434 dtd->scaffIndex = new_scaff_index;
5435 }
5436 } else {
5437 parser->m_groupConnector
5438 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5439 if (! parser->m_groupConnector) {
5440 parser->m_groupSize = 0;
5441 return XML_ERROR_NO_MEMORY;
5442 }
5443 }
5444 }
5445 parser->m_groupConnector[parser->m_prologState.level] = 0;
5446 if (dtd->in_eldecl) {
5447 int myindex = nextScaffoldPart(parser);
5448 if (myindex < 0)
5449 return XML_ERROR_NO_MEMORY;
5450 assert(dtd->scaffIndex != NULL);
5451 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5452 dtd->scaffLevel++;
5453 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5454 if (parser->m_elementDeclHandler)
5455 handleDefault = XML_FALSE;
5456 }
5457 break;
5458 case XML_ROLE_GROUP_SEQUENCE:
5459 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5460 return XML_ERROR_SYNTAX;
5461 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5462 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5463 handleDefault = XML_FALSE;
5464 break;
5465 case XML_ROLE_GROUP_CHOICE:
5466 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5467 return XML_ERROR_SYNTAX;
5468 if (dtd->in_eldecl
5469 && ! parser->m_groupConnector[parser->m_prologState.level]
5470 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5471 != XML_CTYPE_MIXED)) {
5472 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5473 = XML_CTYPE_CHOICE;
5474 if (parser->m_elementDeclHandler)
5475 handleDefault = XML_FALSE;
5476 }
5477 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5478 break;
5479 case XML_ROLE_PARAM_ENTITY_REF:
5480 #ifdef XML_DTD
5481 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5482 dtd->hasParamEntityRefs = XML_TRUE;
5483 if (! parser->m_paramEntityParsing)
5484 dtd->keepProcessing = dtd->standalone;
5485 else {
5486 const XML_Char *name;
5487 ENTITY *entity;
5488 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5489 next - enc->minBytesPerChar);
5490 if (! name)
5491 return XML_ERROR_NO_MEMORY;
5492 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5493 poolDiscard(&dtd->pool);
5494 /* first, determine if a check for an existing declaration is needed;
5495 if yes, check that the entity exists, and that it is internal,
5496 otherwise call the skipped entity handler
5497 */
5498 if (parser->m_prologState.documentEntity
5499 && (dtd->standalone ? ! parser->m_openInternalEntities
5500 : ! dtd->hasParamEntityRefs)) {
5501 if (! entity)
5502 return XML_ERROR_UNDEFINED_ENTITY;
5503 else if (! entity->is_internal) {
5504 /* It's hard to exhaustively search the code to be sure,
5505 * but there doesn't seem to be a way of executing the
5506 * following line. There are two cases:
5507 *
5508 * If 'standalone' is false, the DTD must have no
5509 * parameter entities or we wouldn't have passed the outer
5510 * 'if' statement. That means the only entity in the hash
5511 * table is the external subset name "#" which cannot be
5512 * given as a parameter entity name in XML syntax, so the
5513 * lookup must have returned NULL and we don't even reach
5514 * the test for an internal entity.
5515 *
5516 * If 'standalone' is true, it does not seem to be
5517 * possible to create entities taking this code path that
5518 * are not internal entities, so fail the test above.
5519 *
5520 * Because this analysis is very uncertain, the code is
5521 * being left in place and merely removed from the
5522 * coverage test statistics.
5523 */
5524 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5525 }
5526 } else if (! entity) {
5527 dtd->keepProcessing = dtd->standalone;
5528 /* cannot report skipped entities in declarations */
5529 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5530 && parser->m_skippedEntityHandler) {
5531 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5532 handleDefault = XML_FALSE;
5533 }
5534 break;
5535 }
5536 if (entity->open)
5537 return XML_ERROR_RECURSIVE_ENTITY_REF;
5538 if (entity->textPtr) {
5539 enum XML_Error result;
5540 XML_Bool betweenDecl
5541 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5542 result = processInternalEntity(parser, entity, betweenDecl);
5543 if (result != XML_ERROR_NONE)
5544 return result;
5545 handleDefault = XML_FALSE;
5546 break;
5547 }
5548 if (parser->m_externalEntityRefHandler) {
5549 dtd->paramEntityRead = XML_FALSE;
5550 entity->open = XML_TRUE;
5551 entityTrackingOnOpen(parser, entity, __LINE__);
5552 if (! parser->m_externalEntityRefHandler(
5553 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5554 entity->systemId, entity->publicId)) {
5555 entityTrackingOnClose(parser, entity, __LINE__);
5556 entity->open = XML_FALSE;
5557 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5558 }
5559 entityTrackingOnClose(parser, entity, __LINE__);
5560 entity->open = XML_FALSE;
5561 handleDefault = XML_FALSE;
5562 if (! dtd->paramEntityRead) {
5563 dtd->keepProcessing = dtd->standalone;
5564 break;
5565 }
5566 } else {
5567 dtd->keepProcessing = dtd->standalone;
5568 break;
5569 }
5570 }
5571 #endif /* XML_DTD */
5572 if (! dtd->standalone && parser->m_notStandaloneHandler
5573 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5574 return XML_ERROR_NOT_STANDALONE;
5575 break;
5576
5577 /* Element declaration stuff */
5578
5579 case XML_ROLE_ELEMENT_NAME:
5580 if (parser->m_elementDeclHandler) {
5581 parser->m_declElementType = getElementType(parser, enc, s, next);
5582 if (! parser->m_declElementType)
5583 return XML_ERROR_NO_MEMORY;
5584 dtd->scaffLevel = 0;
5585 dtd->scaffCount = 0;
5586 dtd->in_eldecl = XML_TRUE;
5587 handleDefault = XML_FALSE;
5588 }
5589 break;
5590
5591 case XML_ROLE_CONTENT_ANY:
5592 case XML_ROLE_CONTENT_EMPTY:
5593 if (dtd->in_eldecl) {
5594 if (parser->m_elementDeclHandler) {
5595 XML_Content *content
5596 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5597 if (! content)
5598 return XML_ERROR_NO_MEMORY;
5599 content->quant = XML_CQUANT_NONE;
5600 content->name = NULL;
5601 content->numchildren = 0;
5602 content->children = NULL;
5603 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5604 : XML_CTYPE_EMPTY);
5605 *eventEndPP = s;
5606 parser->m_elementDeclHandler(
5607 parser->m_handlerArg, parser->m_declElementType->name, content);
5608 handleDefault = XML_FALSE;
5609 }
5610 dtd->in_eldecl = XML_FALSE;
5611 }
5612 break;
5613
5614 case XML_ROLE_CONTENT_PCDATA:
5615 if (dtd->in_eldecl) {
5616 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5617 = XML_CTYPE_MIXED;
5618 if (parser->m_elementDeclHandler)
5619 handleDefault = XML_FALSE;
5620 }
5621 break;
5622
5623 case XML_ROLE_CONTENT_ELEMENT:
5624 quant = XML_CQUANT_NONE;
5625 goto elementContent;
5626 case XML_ROLE_CONTENT_ELEMENT_OPT:
5627 quant = XML_CQUANT_OPT;
5628 goto elementContent;
5629 case XML_ROLE_CONTENT_ELEMENT_REP:
5630 quant = XML_CQUANT_REP;
5631 goto elementContent;
5632 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5633 quant = XML_CQUANT_PLUS;
5634 elementContent:
5635 if (dtd->in_eldecl) {
5636 ELEMENT_TYPE *el;
5637 const XML_Char *name;
5638 size_t nameLen;
5639 const char *nxt
5640 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5641 int myindex = nextScaffoldPart(parser);
5642 if (myindex < 0)
5643 return XML_ERROR_NO_MEMORY;
5644 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5645 dtd->scaffold[myindex].quant = quant;
5646 el = getElementType(parser, enc, s, nxt);
5647 if (! el)
5648 return XML_ERROR_NO_MEMORY;
5649 name = el->name;
5650 dtd->scaffold[myindex].name = name;
5651 nameLen = 0;
5652 for (; name[nameLen++];)
5653 ;
5654
5655 /* Detect and prevent integer overflow */
5656 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5657 return XML_ERROR_NO_MEMORY;
5658 }
5659
5660 dtd->contentStringLen += (unsigned)nameLen;
5661 if (parser->m_elementDeclHandler)
5662 handleDefault = XML_FALSE;
5663 }
5664 break;
5665
5666 case XML_ROLE_GROUP_CLOSE:
5667 quant = XML_CQUANT_NONE;
5668 goto closeGroup;
5669 case XML_ROLE_GROUP_CLOSE_OPT:
5670 quant = XML_CQUANT_OPT;
5671 goto closeGroup;
5672 case XML_ROLE_GROUP_CLOSE_REP:
5673 quant = XML_CQUANT_REP;
5674 goto closeGroup;
5675 case XML_ROLE_GROUP_CLOSE_PLUS:
5676 quant = XML_CQUANT_PLUS;
5677 closeGroup:
5678 if (dtd->in_eldecl) {
5679 if (parser->m_elementDeclHandler)
5680 handleDefault = XML_FALSE;
5681 dtd->scaffLevel--;
5682 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5683 if (dtd->scaffLevel == 0) {
5684 if (! handleDefault) {
5685 XML_Content *model = build_model(parser);
5686 if (! model)
5687 return XML_ERROR_NO_MEMORY;
5688 *eventEndPP = s;
5689 parser->m_elementDeclHandler(
5690 parser->m_handlerArg, parser->m_declElementType->name, model);
5691 }
5692 dtd->in_eldecl = XML_FALSE;
5693 dtd->contentStringLen = 0;
5694 }
5695 }
5696 break;
5697 /* End element declaration stuff */
5698
5699 case XML_ROLE_PI:
5700 if (! reportProcessingInstruction(parser, enc, s, next))
5701 return XML_ERROR_NO_MEMORY;
5702 handleDefault = XML_FALSE;
5703 break;
5704 case XML_ROLE_COMMENT:
5705 if (! reportComment(parser, enc, s, next))
5706 return XML_ERROR_NO_MEMORY;
5707 handleDefault = XML_FALSE;
5708 break;
5709 case XML_ROLE_NONE:
5710 switch (tok) {
5711 case XML_TOK_BOM:
5712 handleDefault = XML_FALSE;
5713 break;
5714 }
5715 break;
5716 case XML_ROLE_DOCTYPE_NONE:
5717 if (parser->m_startDoctypeDeclHandler)
5718 handleDefault = XML_FALSE;
5719 break;
5720 case XML_ROLE_ENTITY_NONE:
5721 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5722 handleDefault = XML_FALSE;
5723 break;
5724 case XML_ROLE_NOTATION_NONE:
5725 if (parser->m_notationDeclHandler)
5726 handleDefault = XML_FALSE;
5727 break;
5728 case XML_ROLE_ATTLIST_NONE:
5729 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5730 handleDefault = XML_FALSE;
5731 break;
5732 case XML_ROLE_ELEMENT_NONE:
5733 if (parser->m_elementDeclHandler)
5734 handleDefault = XML_FALSE;
5735 break;
5736 } /* end of big switch */
5737
5738 if (handleDefault && parser->m_defaultHandler)
5739 reportDefault(parser, enc, s, next);
5740
5741 switch (parser->m_parsingStatus.parsing) {
5742 case XML_SUSPENDED:
5743 *nextPtr = next;
5744 return XML_ERROR_NONE;
5745 case XML_FINISHED:
5746 return XML_ERROR_ABORTED;
5747 default:
5748 s = next;
5749 tok = XmlPrologTok(enc, s, end, &next);
5750 }
5751 }
5752 /* not reached */
5753 }
5754
5755 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5756 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5757 const char **nextPtr) {
5758 parser->m_processor = epilogProcessor;
5759 parser->m_eventPtr = s;
5760 for (;;) {
5761 const char *next = NULL;
5762 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5763 #if XML_GE == 1
5764 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5765 XML_ACCOUNT_DIRECT)) {
5766 accountingOnAbort(parser);
5767 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5768 }
5769 #endif
5770 parser->m_eventEndPtr = next;
5771 switch (tok) {
5772 /* report partial linebreak - it might be the last token */
5773 case -XML_TOK_PROLOG_S:
5774 if (parser->m_defaultHandler) {
5775 reportDefault(parser, parser->m_encoding, s, next);
5776 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5777 return XML_ERROR_ABORTED;
5778 }
5779 *nextPtr = next;
5780 return XML_ERROR_NONE;
5781 case XML_TOK_NONE:
5782 *nextPtr = s;
5783 return XML_ERROR_NONE;
5784 case XML_TOK_PROLOG_S:
5785 if (parser->m_defaultHandler)
5786 reportDefault(parser, parser->m_encoding, s, next);
5787 break;
5788 case XML_TOK_PI:
5789 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5790 return XML_ERROR_NO_MEMORY;
5791 break;
5792 case XML_TOK_COMMENT:
5793 if (! reportComment(parser, parser->m_encoding, s, next))
5794 return XML_ERROR_NO_MEMORY;
5795 break;
5796 case XML_TOK_INVALID:
5797 parser->m_eventPtr = next;
5798 return XML_ERROR_INVALID_TOKEN;
5799 case XML_TOK_PARTIAL:
5800 if (! parser->m_parsingStatus.finalBuffer) {
5801 *nextPtr = s;
5802 return XML_ERROR_NONE;
5803 }
5804 return XML_ERROR_UNCLOSED_TOKEN;
5805 case XML_TOK_PARTIAL_CHAR:
5806 if (! parser->m_parsingStatus.finalBuffer) {
5807 *nextPtr = s;
5808 return XML_ERROR_NONE;
5809 }
5810 return XML_ERROR_PARTIAL_CHAR;
5811 default:
5812 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5813 }
5814 parser->m_eventPtr = s = next;
5815 switch (parser->m_parsingStatus.parsing) {
5816 case XML_SUSPENDED:
5817 *nextPtr = next;
5818 return XML_ERROR_NONE;
5819 case XML_FINISHED:
5820 return XML_ERROR_ABORTED;
5821 default:;
5822 }
5823 }
5824 }
5825
5826 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5827 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5828 const char *textStart, *textEnd;
5829 const char *next;
5830 enum XML_Error result;
5831 OPEN_INTERNAL_ENTITY *openEntity;
5832
5833 if (parser->m_freeInternalEntities) {
5834 openEntity = parser->m_freeInternalEntities;
5835 parser->m_freeInternalEntities = openEntity->next;
5836 } else {
5837 openEntity
5838 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5839 if (! openEntity)
5840 return XML_ERROR_NO_MEMORY;
5841 }
5842 entity->open = XML_TRUE;
5843 #if XML_GE == 1
5844 entityTrackingOnOpen(parser, entity, __LINE__);
5845 #endif
5846 entity->processed = 0;
5847 openEntity->next = parser->m_openInternalEntities;
5848 parser->m_openInternalEntities = openEntity;
5849 openEntity->entity = entity;
5850 openEntity->startTagLevel = parser->m_tagLevel;
5851 openEntity->betweenDecl = betweenDecl;
5852 openEntity->internalEventPtr = NULL;
5853 openEntity->internalEventEndPtr = NULL;
5854 textStart = (const char *)entity->textPtr;
5855 textEnd = (const char *)(entity->textPtr + entity->textLen);
5856 /* Set a safe default value in case 'next' does not get set */
5857 next = textStart;
5858
5859 if (entity->is_param) {
5860 int tok
5861 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5862 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5863 tok, next, &next, XML_FALSE, XML_FALSE,
5864 XML_ACCOUNT_ENTITY_EXPANSION);
5865 } else {
5866 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5867 textStart, textEnd, &next, XML_FALSE,
5868 XML_ACCOUNT_ENTITY_EXPANSION);
5869 }
5870
5871 if (result == XML_ERROR_NONE) {
5872 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5873 entity->processed = (int)(next - textStart);
5874 parser->m_processor = internalEntityProcessor;
5875 } else if (parser->m_openInternalEntities->entity == entity) {
5876 #if XML_GE == 1
5877 entityTrackingOnClose(parser, entity, __LINE__);
5878 #endif /* XML_GE == 1 */
5879 entity->open = XML_FALSE;
5880 parser->m_openInternalEntities = openEntity->next;
5881 /* put openEntity back in list of free instances */
5882 openEntity->next = parser->m_freeInternalEntities;
5883 parser->m_freeInternalEntities = openEntity;
5884 }
5885 }
5886 return result;
5887 }
5888
5889 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5890 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5891 const char **nextPtr) {
5892 ENTITY *entity;
5893 const char *textStart, *textEnd;
5894 const char *next;
5895 enum XML_Error result;
5896 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5897 if (! openEntity)
5898 return XML_ERROR_UNEXPECTED_STATE;
5899
5900 entity = openEntity->entity;
5901 textStart = ((const char *)entity->textPtr) + entity->processed;
5902 textEnd = (const char *)(entity->textPtr + entity->textLen);
5903 /* Set a safe default value in case 'next' does not get set */
5904 next = textStart;
5905
5906 if (entity->is_param) {
5907 int tok
5908 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5909 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5910 tok, next, &next, XML_FALSE, XML_TRUE,
5911 XML_ACCOUNT_ENTITY_EXPANSION);
5912 } else {
5913 result = doContent(parser, openEntity->startTagLevel,
5914 parser->m_internalEncoding, textStart, textEnd, &next,
5915 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5916 }
5917
5918 if (result != XML_ERROR_NONE)
5919 return result;
5920
5921 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5922 entity->processed = (int)(next - (const char *)entity->textPtr);
5923 return result;
5924 }
5925
5926 #if XML_GE == 1
5927 entityTrackingOnClose(parser, entity, __LINE__);
5928 #endif
5929 entity->open = XML_FALSE;
5930 parser->m_openInternalEntities = openEntity->next;
5931 /* put openEntity back in list of free instances */
5932 openEntity->next = parser->m_freeInternalEntities;
5933 parser->m_freeInternalEntities = openEntity;
5934
5935 // If there are more open entities we want to stop right here and have the
5936 // upcoming call to XML_ResumeParser continue with entity content, or it would
5937 // be ignored altogether.
5938 if (parser->m_openInternalEntities != NULL
5939 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5940 return XML_ERROR_NONE;
5941 }
5942
5943 if (entity->is_param) {
5944 int tok;
5945 parser->m_processor = prologProcessor;
5946 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5947 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5948 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5949 XML_ACCOUNT_DIRECT);
5950 } else {
5951 parser->m_processor = contentProcessor;
5952 /* see externalEntityContentProcessor vs contentProcessor */
5953 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5954 parser->m_encoding, s, end, nextPtr,
5955 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5956 XML_ACCOUNT_DIRECT);
5957 if (result == XML_ERROR_NONE) {
5958 if (! storeRawNames(parser))
5959 return XML_ERROR_NO_MEMORY;
5960 }
5961 return result;
5962 }
5963 }
5964
5965 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5966 errorProcessor(XML_Parser parser, const char *s, const char *end,
5967 const char **nextPtr) {
5968 UNUSED_P(s);
5969 UNUSED_P(end);
5970 UNUSED_P(nextPtr);
5971 return parser->m_errorCode;
5972 }
5973
5974 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5975 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5976 const char *ptr, const char *end, STRING_POOL *pool,
5977 enum XML_Account account) {
5978 enum XML_Error result
5979 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5980 if (result)
5981 return result;
5982 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5983 poolChop(pool);
5984 if (! poolAppendChar(pool, XML_T('\0')))
5985 return XML_ERROR_NO_MEMORY;
5986 return XML_ERROR_NONE;
5987 }
5988
5989 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5990 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5991 const char *ptr, const char *end, STRING_POOL *pool,
5992 enum XML_Account account) {
5993 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5994 #ifndef XML_DTD
5995 UNUSED_P(account);
5996 #endif
5997
5998 for (;;) {
5999 const char *next
6000 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6001 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6002 #if XML_GE == 1
6003 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6004 accountingOnAbort(parser);
6005 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6006 }
6007 #endif
6008 switch (tok) {
6009 case XML_TOK_NONE:
6010 return XML_ERROR_NONE;
6011 case XML_TOK_INVALID:
6012 if (enc == parser->m_encoding)
6013 parser->m_eventPtr = next;
6014 return XML_ERROR_INVALID_TOKEN;
6015 case XML_TOK_PARTIAL:
6016 if (enc == parser->m_encoding)
6017 parser->m_eventPtr = ptr;
6018 return XML_ERROR_INVALID_TOKEN;
6019 case XML_TOK_CHAR_REF: {
6020 XML_Char buf[XML_ENCODE_MAX];
6021 int i;
6022 int n = XmlCharRefNumber(enc, ptr);
6023 if (n < 0) {
6024 if (enc == parser->m_encoding)
6025 parser->m_eventPtr = ptr;
6026 return XML_ERROR_BAD_CHAR_REF;
6027 }
6028 if (! isCdata && n == 0x20 /* space */
6029 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6030 break;
6031 n = XmlEncode(n, (ICHAR *)buf);
6032 /* The XmlEncode() functions can never return 0 here. That
6033 * error return happens if the code point passed in is either
6034 * negative or greater than or equal to 0x110000. The
6035 * XmlCharRefNumber() functions will all return a number
6036 * strictly less than 0x110000 or a negative value if an error
6037 * occurred. The negative value is intercepted above, so
6038 * XmlEncode() is never passed a value it might return an
6039 * error for.
6040 */
6041 for (i = 0; i < n; i++) {
6042 if (! poolAppendChar(pool, buf[i]))
6043 return XML_ERROR_NO_MEMORY;
6044 }
6045 } break;
6046 case XML_TOK_DATA_CHARS:
6047 if (! poolAppend(pool, enc, ptr, next))
6048 return XML_ERROR_NO_MEMORY;
6049 break;
6050 case XML_TOK_TRAILING_CR:
6051 next = ptr + enc->minBytesPerChar;
6052 /* fall through */
6053 case XML_TOK_ATTRIBUTE_VALUE_S:
6054 case XML_TOK_DATA_NEWLINE:
6055 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6056 break;
6057 if (! poolAppendChar(pool, 0x20))
6058 return XML_ERROR_NO_MEMORY;
6059 break;
6060 case XML_TOK_ENTITY_REF: {
6061 const XML_Char *name;
6062 ENTITY *entity;
6063 char checkEntityDecl;
6064 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6065 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6066 if (ch) {
6067 #if XML_GE == 1
6068 /* NOTE: We are replacing 4-6 characters original input for 1 character
6069 * so there is no amplification and hence recording without
6070 * protection. */
6071 accountingDiffTolerated(parser, tok, (char *)&ch,
6072 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6073 XML_ACCOUNT_ENTITY_EXPANSION);
6074 #endif /* XML_GE == 1 */
6075 if (! poolAppendChar(pool, ch))
6076 return XML_ERROR_NO_MEMORY;
6077 break;
6078 }
6079 name = poolStoreString(&parser->m_temp2Pool, enc,
6080 ptr + enc->minBytesPerChar,
6081 next - enc->minBytesPerChar);
6082 if (! name)
6083 return XML_ERROR_NO_MEMORY;
6084 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6085 poolDiscard(&parser->m_temp2Pool);
6086 /* First, determine if a check for an existing declaration is needed;
6087 if yes, check that the entity exists, and that it is internal.
6088 */
6089 if (pool == &dtd->pool) /* are we called from prolog? */
6090 checkEntityDecl =
6091 #ifdef XML_DTD
6092 parser->m_prologState.documentEntity &&
6093 #endif /* XML_DTD */
6094 (dtd->standalone ? ! parser->m_openInternalEntities
6095 : ! dtd->hasParamEntityRefs);
6096 else /* if (pool == &parser->m_tempPool): we are called from content */
6097 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6098 if (checkEntityDecl) {
6099 if (! entity)
6100 return XML_ERROR_UNDEFINED_ENTITY;
6101 else if (! entity->is_internal)
6102 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6103 } else if (! entity) {
6104 /* Cannot report skipped entity here - see comments on
6105 parser->m_skippedEntityHandler.
6106 if (parser->m_skippedEntityHandler)
6107 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6108 */
6109 /* Cannot call the default handler because this would be
6110 out of sync with the call to the startElementHandler.
6111 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6112 reportDefault(parser, enc, ptr, next);
6113 */
6114 break;
6115 }
6116 if (entity->open) {
6117 if (enc == parser->m_encoding) {
6118 /* It does not appear that this line can be executed.
6119 *
6120 * The "if (entity->open)" check catches recursive entity
6121 * definitions. In order to be called with an open
6122 * entity, it must have gone through this code before and
6123 * been through the recursive call to
6124 * appendAttributeValue() some lines below. That call
6125 * sets the local encoding ("enc") to the parser's
6126 * internal encoding (internal_utf8 or internal_utf16),
6127 * which can never be the same as the principle encoding.
6128 * It doesn't appear there is another code path that gets
6129 * here with entity->open being TRUE.
6130 *
6131 * Since it is not certain that this logic is watertight,
6132 * we keep the line and merely exclude it from coverage
6133 * tests.
6134 */
6135 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6136 }
6137 return XML_ERROR_RECURSIVE_ENTITY_REF;
6138 }
6139 if (entity->notation) {
6140 if (enc == parser->m_encoding)
6141 parser->m_eventPtr = ptr;
6142 return XML_ERROR_BINARY_ENTITY_REF;
6143 }
6144 if (! entity->textPtr) {
6145 if (enc == parser->m_encoding)
6146 parser->m_eventPtr = ptr;
6147 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6148 } else {
6149 enum XML_Error result;
6150 const XML_Char *textEnd = entity->textPtr + entity->textLen;
6151 entity->open = XML_TRUE;
6152 #if XML_GE == 1
6153 entityTrackingOnOpen(parser, entity, __LINE__);
6154 #endif
6155 result = appendAttributeValue(parser, parser->m_internalEncoding,
6156 isCdata, (const char *)entity->textPtr,
6157 (const char *)textEnd, pool,
6158 XML_ACCOUNT_ENTITY_EXPANSION);
6159 #if XML_GE == 1
6160 entityTrackingOnClose(parser, entity, __LINE__);
6161 #endif
6162 entity->open = XML_FALSE;
6163 if (result)
6164 return result;
6165 }
6166 } break;
6167 default:
6168 /* The only token returned by XmlAttributeValueTok() that does
6169 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6170 * Getting that would require an entity name to contain an
6171 * incomplete XML character (e.g. \xE2\x82); however previous
6172 * tokenisers will have already recognised and rejected such
6173 * names before XmlAttributeValueTok() gets a look-in. This
6174 * default case should be retained as a safety net, but the code
6175 * excluded from coverage tests.
6176 *
6177 * LCOV_EXCL_START
6178 */
6179 if (enc == parser->m_encoding)
6180 parser->m_eventPtr = ptr;
6181 return XML_ERROR_UNEXPECTED_STATE;
6182 /* LCOV_EXCL_STOP */
6183 }
6184 ptr = next;
6185 }
6186 /* not reached */
6187 }
6188
6189 #if XML_GE == 1
6190 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6191 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6192 const char *entityTextPtr, const char *entityTextEnd,
6193 enum XML_Account account) {
6194 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6195 STRING_POOL *pool = &(dtd->entityValuePool);
6196 enum XML_Error result = XML_ERROR_NONE;
6197 # ifdef XML_DTD
6198 int oldInEntityValue = parser->m_prologState.inEntityValue;
6199 parser->m_prologState.inEntityValue = 1;
6200 # else
6201 UNUSED_P(account);
6202 # endif /* XML_DTD */
6203 /* never return Null for the value argument in EntityDeclHandler,
6204 since this would indicate an external entity; therefore we
6205 have to make sure that entityValuePool.start is not null */
6206 if (! pool->blocks) {
6207 if (! poolGrow(pool))
6208 return XML_ERROR_NO_MEMORY;
6209 }
6210
6211 for (;;) {
6212 const char *next
6213 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6214 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6215
6216 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6217 account)) {
6218 accountingOnAbort(parser);
6219 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6220 goto endEntityValue;
6221 }
6222
6223 switch (tok) {
6224 case XML_TOK_PARAM_ENTITY_REF:
6225 # ifdef XML_DTD
6226 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6227 const XML_Char *name;
6228 ENTITY *entity;
6229 name = poolStoreString(&parser->m_tempPool, enc,
6230 entityTextPtr + enc->minBytesPerChar,
6231 next - enc->minBytesPerChar);
6232 if (! name) {
6233 result = XML_ERROR_NO_MEMORY;
6234 goto endEntityValue;
6235 }
6236 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6237 poolDiscard(&parser->m_tempPool);
6238 if (! entity) {
6239 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6240 /* cannot report skipped entity here - see comments on
6241 parser->m_skippedEntityHandler
6242 if (parser->m_skippedEntityHandler)
6243 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6244 */
6245 dtd->keepProcessing = dtd->standalone;
6246 goto endEntityValue;
6247 }
6248 if (entity->open || (entity == parser->m_declEntity)) {
6249 if (enc == parser->m_encoding)
6250 parser->m_eventPtr = entityTextPtr;
6251 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6252 goto endEntityValue;
6253 }
6254 if (entity->systemId) {
6255 if (parser->m_externalEntityRefHandler) {
6256 dtd->paramEntityRead = XML_FALSE;
6257 entity->open = XML_TRUE;
6258 entityTrackingOnOpen(parser, entity, __LINE__);
6259 if (! parser->m_externalEntityRefHandler(
6260 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6261 entity->systemId, entity->publicId)) {
6262 entityTrackingOnClose(parser, entity, __LINE__);
6263 entity->open = XML_FALSE;
6264 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6265 goto endEntityValue;
6266 }
6267 entityTrackingOnClose(parser, entity, __LINE__);
6268 entity->open = XML_FALSE;
6269 if (! dtd->paramEntityRead)
6270 dtd->keepProcessing = dtd->standalone;
6271 } else
6272 dtd->keepProcessing = dtd->standalone;
6273 } else {
6274 entity->open = XML_TRUE;
6275 entityTrackingOnOpen(parser, entity, __LINE__);
6276 result = storeEntityValue(
6277 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6278 (const char *)(entity->textPtr + entity->textLen),
6279 XML_ACCOUNT_ENTITY_EXPANSION);
6280 entityTrackingOnClose(parser, entity, __LINE__);
6281 entity->open = XML_FALSE;
6282 if (result)
6283 goto endEntityValue;
6284 }
6285 break;
6286 }
6287 # endif /* XML_DTD */
6288 /* In the internal subset, PE references are not legal
6289 within markup declarations, e.g entity values in this case. */
6290 parser->m_eventPtr = entityTextPtr;
6291 result = XML_ERROR_PARAM_ENTITY_REF;
6292 goto endEntityValue;
6293 case XML_TOK_NONE:
6294 result = XML_ERROR_NONE;
6295 goto endEntityValue;
6296 case XML_TOK_ENTITY_REF:
6297 case XML_TOK_DATA_CHARS:
6298 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6299 result = XML_ERROR_NO_MEMORY;
6300 goto endEntityValue;
6301 }
6302 break;
6303 case XML_TOK_TRAILING_CR:
6304 next = entityTextPtr + enc->minBytesPerChar;
6305 /* fall through */
6306 case XML_TOK_DATA_NEWLINE:
6307 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6308 result = XML_ERROR_NO_MEMORY;
6309 goto endEntityValue;
6310 }
6311 *(pool->ptr)++ = 0xA;
6312 break;
6313 case XML_TOK_CHAR_REF: {
6314 XML_Char buf[XML_ENCODE_MAX];
6315 int i;
6316 int n = XmlCharRefNumber(enc, entityTextPtr);
6317 if (n < 0) {
6318 if (enc == parser->m_encoding)
6319 parser->m_eventPtr = entityTextPtr;
6320 result = XML_ERROR_BAD_CHAR_REF;
6321 goto endEntityValue;
6322 }
6323 n = XmlEncode(n, (ICHAR *)buf);
6324 /* The XmlEncode() functions can never return 0 here. That
6325 * error return happens if the code point passed in is either
6326 * negative or greater than or equal to 0x110000. The
6327 * XmlCharRefNumber() functions will all return a number
6328 * strictly less than 0x110000 or a negative value if an error
6329 * occurred. The negative value is intercepted above, so
6330 * XmlEncode() is never passed a value it might return an
6331 * error for.
6332 */
6333 for (i = 0; i < n; i++) {
6334 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6335 result = XML_ERROR_NO_MEMORY;
6336 goto endEntityValue;
6337 }
6338 *(pool->ptr)++ = buf[i];
6339 }
6340 } break;
6341 case XML_TOK_PARTIAL:
6342 if (enc == parser->m_encoding)
6343 parser->m_eventPtr = entityTextPtr;
6344 result = XML_ERROR_INVALID_TOKEN;
6345 goto endEntityValue;
6346 case XML_TOK_INVALID:
6347 if (enc == parser->m_encoding)
6348 parser->m_eventPtr = next;
6349 result = XML_ERROR_INVALID_TOKEN;
6350 goto endEntityValue;
6351 default:
6352 /* This default case should be unnecessary -- all the tokens
6353 * that XmlEntityValueTok() can return have their own explicit
6354 * cases -- but should be retained for safety. We do however
6355 * exclude it from the coverage statistics.
6356 *
6357 * LCOV_EXCL_START
6358 */
6359 if (enc == parser->m_encoding)
6360 parser->m_eventPtr = entityTextPtr;
6361 result = XML_ERROR_UNEXPECTED_STATE;
6362 goto endEntityValue;
6363 /* LCOV_EXCL_STOP */
6364 }
6365 entityTextPtr = next;
6366 }
6367 endEntityValue:
6368 # ifdef XML_DTD
6369 parser->m_prologState.inEntityValue = oldInEntityValue;
6370 # endif /* XML_DTD */
6371 return result;
6372 }
6373
6374 #else /* XML_GE == 0 */
6375
6376 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6377 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6378 // This will store "&entity123;" in entity->textPtr
6379 // to end up as "&entity123;" in the handler.
6380 const char *const entity_start = "&";
6381 const char *const entity_end = ";";
6382
6383 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6384 if (! poolAppendString(pool, entity_start)
6385 || ! poolAppendString(pool, entity->name)
6386 || ! poolAppendString(pool, entity_end)) {
6387 poolDiscard(pool);
6388 return XML_ERROR_NO_MEMORY;
6389 }
6390
6391 entity->textPtr = poolStart(pool);
6392 entity->textLen = (int)(poolLength(pool));
6393 poolFinish(pool);
6394
6395 return XML_ERROR_NONE;
6396 }
6397
6398 #endif /* XML_GE == 0 */
6399
6400 static void FASTCALL
normalizeLines(XML_Char * s)6401 normalizeLines(XML_Char *s) {
6402 XML_Char *p;
6403 for (;; s++) {
6404 if (*s == XML_T('\0'))
6405 return;
6406 if (*s == 0xD)
6407 break;
6408 }
6409 p = s;
6410 do {
6411 if (*s == 0xD) {
6412 *p++ = 0xA;
6413 if (*++s == 0xA)
6414 s++;
6415 } else
6416 *p++ = *s++;
6417 } while (*s);
6418 *p = XML_T('\0');
6419 }
6420
6421 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6422 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6423 const char *start, const char *end) {
6424 const XML_Char *target;
6425 XML_Char *data;
6426 const char *tem;
6427 if (! parser->m_processingInstructionHandler) {
6428 if (parser->m_defaultHandler)
6429 reportDefault(parser, enc, start, end);
6430 return 1;
6431 }
6432 start += enc->minBytesPerChar * 2;
6433 tem = start + XmlNameLength(enc, start);
6434 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6435 if (! target)
6436 return 0;
6437 poolFinish(&parser->m_tempPool);
6438 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6439 end - enc->minBytesPerChar * 2);
6440 if (! data)
6441 return 0;
6442 normalizeLines(data);
6443 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6444 poolClear(&parser->m_tempPool);
6445 return 1;
6446 }
6447
6448 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6449 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6450 const char *end) {
6451 XML_Char *data;
6452 if (! parser->m_commentHandler) {
6453 if (parser->m_defaultHandler)
6454 reportDefault(parser, enc, start, end);
6455 return 1;
6456 }
6457 data = poolStoreString(&parser->m_tempPool, enc,
6458 start + enc->minBytesPerChar * 4,
6459 end - enc->minBytesPerChar * 3);
6460 if (! data)
6461 return 0;
6462 normalizeLines(data);
6463 parser->m_commentHandler(parser->m_handlerArg, data);
6464 poolClear(&parser->m_tempPool);
6465 return 1;
6466 }
6467
6468 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6469 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6470 const char *end) {
6471 if (MUST_CONVERT(enc, s)) {
6472 enum XML_Convert_Result convert_res;
6473 const char **eventPP;
6474 const char **eventEndPP;
6475 if (enc == parser->m_encoding) {
6476 eventPP = &parser->m_eventPtr;
6477 eventEndPP = &parser->m_eventEndPtr;
6478 } else {
6479 /* To get here, two things must be true; the parser must be
6480 * using a character encoding that is not the same as the
6481 * encoding passed in, and the encoding passed in must need
6482 * conversion to the internal format (UTF-8 unless XML_UNICODE
6483 * is defined). The only occasions on which the encoding passed
6484 * in is not the same as the parser's encoding are when it is
6485 * the internal encoding (e.g. a previously defined parameter
6486 * entity, already converted to internal format). This by
6487 * definition doesn't need conversion, so the whole branch never
6488 * gets executed.
6489 *
6490 * For safety's sake we don't delete these lines and merely
6491 * exclude them from coverage statistics.
6492 *
6493 * LCOV_EXCL_START
6494 */
6495 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6496 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6497 /* LCOV_EXCL_STOP */
6498 }
6499 do {
6500 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6501 convert_res
6502 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6503 *eventEndPP = s;
6504 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6505 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6506 *eventPP = s;
6507 } while ((convert_res != XML_CONVERT_COMPLETED)
6508 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6509 } else
6510 parser->m_defaultHandler(
6511 parser->m_handlerArg, (const XML_Char *)s,
6512 (int)((const XML_Char *)end - (const XML_Char *)s));
6513 }
6514
6515 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6516 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6517 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6518 DEFAULT_ATTRIBUTE *att;
6519 if (value || isId) {
6520 /* The handling of default attributes gets messed up if we have
6521 a default which duplicates a non-default. */
6522 int i;
6523 for (i = 0; i < type->nDefaultAtts; i++)
6524 if (attId == type->defaultAtts[i].id)
6525 return 1;
6526 if (isId && ! type->idAtt && ! attId->xmlns)
6527 type->idAtt = attId;
6528 }
6529 if (type->nDefaultAtts == type->allocDefaultAtts) {
6530 if (type->allocDefaultAtts == 0) {
6531 type->allocDefaultAtts = 8;
6532 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6533 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6534 if (! type->defaultAtts) {
6535 type->allocDefaultAtts = 0;
6536 return 0;
6537 }
6538 } else {
6539 DEFAULT_ATTRIBUTE *temp;
6540
6541 /* Detect and prevent integer overflow */
6542 if (type->allocDefaultAtts > INT_MAX / 2) {
6543 return 0;
6544 }
6545
6546 int count = type->allocDefaultAtts * 2;
6547
6548 /* Detect and prevent integer overflow.
6549 * The preprocessor guard addresses the "always false" warning
6550 * from -Wtype-limits on platforms where
6551 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6552 #if UINT_MAX >= SIZE_MAX
6553 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6554 return 0;
6555 }
6556 #endif
6557
6558 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6559 (count * sizeof(DEFAULT_ATTRIBUTE)));
6560 if (temp == NULL)
6561 return 0;
6562 type->allocDefaultAtts = count;
6563 type->defaultAtts = temp;
6564 }
6565 }
6566 att = type->defaultAtts + type->nDefaultAtts;
6567 att->id = attId;
6568 att->value = value;
6569 att->isCdata = isCdata;
6570 if (! isCdata)
6571 attId->maybeTokenized = XML_TRUE;
6572 type->nDefaultAtts += 1;
6573 return 1;
6574 }
6575
6576 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6577 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6578 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6579 const XML_Char *name;
6580 for (name = elementType->name; *name; name++) {
6581 if (*name == XML_T(ASCII_COLON)) {
6582 PREFIX *prefix;
6583 const XML_Char *s;
6584 for (s = elementType->name; s != name; s++) {
6585 if (! poolAppendChar(&dtd->pool, *s))
6586 return 0;
6587 }
6588 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6589 return 0;
6590 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6591 sizeof(PREFIX));
6592 if (! prefix)
6593 return 0;
6594 if (prefix->name == poolStart(&dtd->pool))
6595 poolFinish(&dtd->pool);
6596 else
6597 poolDiscard(&dtd->pool);
6598 elementType->prefix = prefix;
6599 break;
6600 }
6601 }
6602 return 1;
6603 }
6604
6605 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6606 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6607 const char *end) {
6608 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6609 ATTRIBUTE_ID *id;
6610 const XML_Char *name;
6611 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6612 return NULL;
6613 name = poolStoreString(&dtd->pool, enc, start, end);
6614 if (! name)
6615 return NULL;
6616 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6617 ++name;
6618 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6619 sizeof(ATTRIBUTE_ID));
6620 if (! id)
6621 return NULL;
6622 if (id->name != name)
6623 poolDiscard(&dtd->pool);
6624 else {
6625 poolFinish(&dtd->pool);
6626 if (! parser->m_ns)
6627 ;
6628 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6629 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6630 && name[4] == XML_T(ASCII_s)
6631 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6632 if (name[5] == XML_T('\0'))
6633 id->prefix = &dtd->defaultPrefix;
6634 else
6635 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6636 sizeof(PREFIX));
6637 id->xmlns = XML_TRUE;
6638 } else {
6639 int i;
6640 for (i = 0; name[i]; i++) {
6641 /* attributes without prefix are *not* in the default namespace */
6642 if (name[i] == XML_T(ASCII_COLON)) {
6643 int j;
6644 for (j = 0; j < i; j++) {
6645 if (! poolAppendChar(&dtd->pool, name[j]))
6646 return NULL;
6647 }
6648 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6649 return NULL;
6650 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6651 poolStart(&dtd->pool), sizeof(PREFIX));
6652 if (! id->prefix)
6653 return NULL;
6654 if (id->prefix->name == poolStart(&dtd->pool))
6655 poolFinish(&dtd->pool);
6656 else
6657 poolDiscard(&dtd->pool);
6658 break;
6659 }
6660 }
6661 }
6662 }
6663 return id;
6664 }
6665
6666 #define CONTEXT_SEP XML_T(ASCII_FF)
6667
6668 static const XML_Char *
getContext(XML_Parser parser)6669 getContext(XML_Parser parser) {
6670 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6671 HASH_TABLE_ITER iter;
6672 XML_Bool needSep = XML_FALSE;
6673
6674 if (dtd->defaultPrefix.binding) {
6675 int i;
6676 int len;
6677 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6678 return NULL;
6679 len = dtd->defaultPrefix.binding->uriLen;
6680 if (parser->m_namespaceSeparator)
6681 len--;
6682 for (i = 0; i < len; i++) {
6683 if (! poolAppendChar(&parser->m_tempPool,
6684 dtd->defaultPrefix.binding->uri[i])) {
6685 /* Because of memory caching, I don't believe this line can be
6686 * executed.
6687 *
6688 * This is part of a loop copying the default prefix binding
6689 * URI into the parser's temporary string pool. Previously,
6690 * that URI was copied into the same string pool, with a
6691 * terminating NUL character, as part of setContext(). When
6692 * the pool was cleared, that leaves a block definitely big
6693 * enough to hold the URI on the free block list of the pool.
6694 * The URI copy in getContext() therefore cannot run out of
6695 * memory.
6696 *
6697 * If the pool is used between the setContext() and
6698 * getContext() calls, the worst it can do is leave a bigger
6699 * block on the front of the free list. Given that this is
6700 * all somewhat inobvious and program logic can be changed, we
6701 * don't delete the line but we do exclude it from the test
6702 * coverage statistics.
6703 */
6704 return NULL; /* LCOV_EXCL_LINE */
6705 }
6706 }
6707 needSep = XML_TRUE;
6708 }
6709
6710 hashTableIterInit(&iter, &(dtd->prefixes));
6711 for (;;) {
6712 int i;
6713 int len;
6714 const XML_Char *s;
6715 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6716 if (! prefix)
6717 break;
6718 if (! prefix->binding) {
6719 /* This test appears to be (justifiable) paranoia. There does
6720 * not seem to be a way of injecting a prefix without a binding
6721 * that doesn't get errored long before this function is called.
6722 * The test should remain for safety's sake, so we instead
6723 * exclude the following line from the coverage statistics.
6724 */
6725 continue; /* LCOV_EXCL_LINE */
6726 }
6727 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6728 return NULL;
6729 for (s = prefix->name; *s; s++)
6730 if (! poolAppendChar(&parser->m_tempPool, *s))
6731 return NULL;
6732 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6733 return NULL;
6734 len = prefix->binding->uriLen;
6735 if (parser->m_namespaceSeparator)
6736 len--;
6737 for (i = 0; i < len; i++)
6738 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6739 return NULL;
6740 needSep = XML_TRUE;
6741 }
6742
6743 hashTableIterInit(&iter, &(dtd->generalEntities));
6744 for (;;) {
6745 const XML_Char *s;
6746 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6747 if (! e)
6748 break;
6749 if (! e->open)
6750 continue;
6751 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6752 return NULL;
6753 for (s = e->name; *s; s++)
6754 if (! poolAppendChar(&parser->m_tempPool, *s))
6755 return 0;
6756 needSep = XML_TRUE;
6757 }
6758
6759 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6760 return NULL;
6761 return parser->m_tempPool.start;
6762 }
6763
6764 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6765 setContext(XML_Parser parser, const XML_Char *context) {
6766 if (context == NULL) {
6767 return XML_FALSE;
6768 }
6769
6770 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6771 const XML_Char *s = context;
6772
6773 while (*context != XML_T('\0')) {
6774 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6775 ENTITY *e;
6776 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6777 return XML_FALSE;
6778 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6779 poolStart(&parser->m_tempPool), 0);
6780 if (e)
6781 e->open = XML_TRUE;
6782 if (*s != XML_T('\0'))
6783 s++;
6784 context = s;
6785 poolDiscard(&parser->m_tempPool);
6786 } else if (*s == XML_T(ASCII_EQUALS)) {
6787 PREFIX *prefix;
6788 if (poolLength(&parser->m_tempPool) == 0)
6789 prefix = &dtd->defaultPrefix;
6790 else {
6791 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6792 return XML_FALSE;
6793 prefix
6794 = (PREFIX *)lookup(parser, &dtd->prefixes,
6795 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6796 if (! prefix)
6797 return XML_FALSE;
6798 if (prefix->name == poolStart(&parser->m_tempPool)) {
6799 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6800 if (! prefix->name)
6801 return XML_FALSE;
6802 }
6803 poolDiscard(&parser->m_tempPool);
6804 }
6805 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6806 context++)
6807 if (! poolAppendChar(&parser->m_tempPool, *context))
6808 return XML_FALSE;
6809 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6810 return XML_FALSE;
6811 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6812 &parser->m_inheritedBindings)
6813 != XML_ERROR_NONE)
6814 return XML_FALSE;
6815 poolDiscard(&parser->m_tempPool);
6816 if (*context != XML_T('\0'))
6817 ++context;
6818 s = context;
6819 } else {
6820 if (! poolAppendChar(&parser->m_tempPool, *s))
6821 return XML_FALSE;
6822 s++;
6823 }
6824 }
6825 return XML_TRUE;
6826 }
6827
6828 static void FASTCALL
normalizePublicId(XML_Char * publicId)6829 normalizePublicId(XML_Char *publicId) {
6830 XML_Char *p = publicId;
6831 XML_Char *s;
6832 for (s = publicId; *s; s++) {
6833 switch (*s) {
6834 case 0x20:
6835 case 0xD:
6836 case 0xA:
6837 if (p != publicId && p[-1] != 0x20)
6838 *p++ = 0x20;
6839 break;
6840 default:
6841 *p++ = *s;
6842 }
6843 }
6844 if (p != publicId && p[-1] == 0x20)
6845 --p;
6846 *p = XML_T('\0');
6847 }
6848
6849 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6850 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6851 DTD *p = ms->malloc_fcn(sizeof(DTD));
6852 if (p == NULL)
6853 return p;
6854 poolInit(&(p->pool), ms);
6855 poolInit(&(p->entityValuePool), ms);
6856 hashTableInit(&(p->generalEntities), ms);
6857 hashTableInit(&(p->elementTypes), ms);
6858 hashTableInit(&(p->attributeIds), ms);
6859 hashTableInit(&(p->prefixes), ms);
6860 #ifdef XML_DTD
6861 p->paramEntityRead = XML_FALSE;
6862 hashTableInit(&(p->paramEntities), ms);
6863 #endif /* XML_DTD */
6864 p->defaultPrefix.name = NULL;
6865 p->defaultPrefix.binding = NULL;
6866
6867 p->in_eldecl = XML_FALSE;
6868 p->scaffIndex = NULL;
6869 p->scaffold = NULL;
6870 p->scaffLevel = 0;
6871 p->scaffSize = 0;
6872 p->scaffCount = 0;
6873 p->contentStringLen = 0;
6874
6875 p->keepProcessing = XML_TRUE;
6876 p->hasParamEntityRefs = XML_FALSE;
6877 p->standalone = XML_FALSE;
6878 return p;
6879 }
6880
6881 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6882 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6883 HASH_TABLE_ITER iter;
6884 hashTableIterInit(&iter, &(p->elementTypes));
6885 for (;;) {
6886 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6887 if (! e)
6888 break;
6889 if (e->allocDefaultAtts != 0)
6890 ms->free_fcn(e->defaultAtts);
6891 }
6892 hashTableClear(&(p->generalEntities));
6893 #ifdef XML_DTD
6894 p->paramEntityRead = XML_FALSE;
6895 hashTableClear(&(p->paramEntities));
6896 #endif /* XML_DTD */
6897 hashTableClear(&(p->elementTypes));
6898 hashTableClear(&(p->attributeIds));
6899 hashTableClear(&(p->prefixes));
6900 poolClear(&(p->pool));
6901 poolClear(&(p->entityValuePool));
6902 p->defaultPrefix.name = NULL;
6903 p->defaultPrefix.binding = NULL;
6904
6905 p->in_eldecl = XML_FALSE;
6906
6907 ms->free_fcn(p->scaffIndex);
6908 p->scaffIndex = NULL;
6909 ms->free_fcn(p->scaffold);
6910 p->scaffold = NULL;
6911
6912 p->scaffLevel = 0;
6913 p->scaffSize = 0;
6914 p->scaffCount = 0;
6915 p->contentStringLen = 0;
6916
6917 p->keepProcessing = XML_TRUE;
6918 p->hasParamEntityRefs = XML_FALSE;
6919 p->standalone = XML_FALSE;
6920 }
6921
6922 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6923 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6924 HASH_TABLE_ITER iter;
6925 hashTableIterInit(&iter, &(p->elementTypes));
6926 for (;;) {
6927 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6928 if (! e)
6929 break;
6930 if (e->allocDefaultAtts != 0)
6931 ms->free_fcn(e->defaultAtts);
6932 }
6933 hashTableDestroy(&(p->generalEntities));
6934 #ifdef XML_DTD
6935 hashTableDestroy(&(p->paramEntities));
6936 #endif /* XML_DTD */
6937 hashTableDestroy(&(p->elementTypes));
6938 hashTableDestroy(&(p->attributeIds));
6939 hashTableDestroy(&(p->prefixes));
6940 poolDestroy(&(p->pool));
6941 poolDestroy(&(p->entityValuePool));
6942 if (isDocEntity) {
6943 ms->free_fcn(p->scaffIndex);
6944 ms->free_fcn(p->scaffold);
6945 }
6946 ms->free_fcn(p);
6947 }
6948
6949 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6950 The new DTD has already been initialized.
6951 */
6952 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6953 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6954 const XML_Memory_Handling_Suite *ms) {
6955 HASH_TABLE_ITER iter;
6956
6957 /* Copy the prefix table. */
6958
6959 hashTableIterInit(&iter, &(oldDtd->prefixes));
6960 for (;;) {
6961 const XML_Char *name;
6962 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6963 if (! oldP)
6964 break;
6965 name = poolCopyString(&(newDtd->pool), oldP->name);
6966 if (! name)
6967 return 0;
6968 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6969 return 0;
6970 }
6971
6972 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6973
6974 /* Copy the attribute id table. */
6975
6976 for (;;) {
6977 ATTRIBUTE_ID *newA;
6978 const XML_Char *name;
6979 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6980
6981 if (! oldA)
6982 break;
6983 /* Remember to allocate the scratch byte before the name. */
6984 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6985 return 0;
6986 name = poolCopyString(&(newDtd->pool), oldA->name);
6987 if (! name)
6988 return 0;
6989 ++name;
6990 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6991 sizeof(ATTRIBUTE_ID));
6992 if (! newA)
6993 return 0;
6994 newA->maybeTokenized = oldA->maybeTokenized;
6995 if (oldA->prefix) {
6996 newA->xmlns = oldA->xmlns;
6997 if (oldA->prefix == &oldDtd->defaultPrefix)
6998 newA->prefix = &newDtd->defaultPrefix;
6999 else
7000 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7001 oldA->prefix->name, 0);
7002 }
7003 }
7004
7005 /* Copy the element type table. */
7006
7007 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7008
7009 for (;;) {
7010 int i;
7011 ELEMENT_TYPE *newE;
7012 const XML_Char *name;
7013 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7014 if (! oldE)
7015 break;
7016 name = poolCopyString(&(newDtd->pool), oldE->name);
7017 if (! name)
7018 return 0;
7019 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7020 sizeof(ELEMENT_TYPE));
7021 if (! newE)
7022 return 0;
7023 if (oldE->nDefaultAtts) {
7024 /* Detect and prevent integer overflow.
7025 * The preprocessor guard addresses the "always false" warning
7026 * from -Wtype-limits on platforms where
7027 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7028 #if UINT_MAX >= SIZE_MAX
7029 if ((size_t)oldE->nDefaultAtts
7030 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7031 return 0;
7032 }
7033 #endif
7034 newE->defaultAtts
7035 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7036 if (! newE->defaultAtts) {
7037 return 0;
7038 }
7039 }
7040 if (oldE->idAtt)
7041 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7042 oldE->idAtt->name, 0);
7043 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7044 if (oldE->prefix)
7045 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7046 oldE->prefix->name, 0);
7047 for (i = 0; i < newE->nDefaultAtts; i++) {
7048 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7049 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7050 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7051 if (oldE->defaultAtts[i].value) {
7052 newE->defaultAtts[i].value
7053 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7054 if (! newE->defaultAtts[i].value)
7055 return 0;
7056 } else
7057 newE->defaultAtts[i].value = NULL;
7058 }
7059 }
7060
7061 /* Copy the entity tables. */
7062 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7063 &(oldDtd->generalEntities)))
7064 return 0;
7065
7066 #ifdef XML_DTD
7067 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7068 &(oldDtd->paramEntities)))
7069 return 0;
7070 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7071 #endif /* XML_DTD */
7072
7073 newDtd->keepProcessing = oldDtd->keepProcessing;
7074 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7075 newDtd->standalone = oldDtd->standalone;
7076
7077 /* Don't want deep copying for scaffolding */
7078 newDtd->in_eldecl = oldDtd->in_eldecl;
7079 newDtd->scaffold = oldDtd->scaffold;
7080 newDtd->contentStringLen = oldDtd->contentStringLen;
7081 newDtd->scaffSize = oldDtd->scaffSize;
7082 newDtd->scaffLevel = oldDtd->scaffLevel;
7083 newDtd->scaffIndex = oldDtd->scaffIndex;
7084
7085 return 1;
7086 } /* End dtdCopy */
7087
7088 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7089 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7090 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7091 HASH_TABLE_ITER iter;
7092 const XML_Char *cachedOldBase = NULL;
7093 const XML_Char *cachedNewBase = NULL;
7094
7095 hashTableIterInit(&iter, oldTable);
7096
7097 for (;;) {
7098 ENTITY *newE;
7099 const XML_Char *name;
7100 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7101 if (! oldE)
7102 break;
7103 name = poolCopyString(newPool, oldE->name);
7104 if (! name)
7105 return 0;
7106 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7107 if (! newE)
7108 return 0;
7109 if (oldE->systemId) {
7110 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7111 if (! tem)
7112 return 0;
7113 newE->systemId = tem;
7114 if (oldE->base) {
7115 if (oldE->base == cachedOldBase)
7116 newE->base = cachedNewBase;
7117 else {
7118 cachedOldBase = oldE->base;
7119 tem = poolCopyString(newPool, cachedOldBase);
7120 if (! tem)
7121 return 0;
7122 cachedNewBase = newE->base = tem;
7123 }
7124 }
7125 if (oldE->publicId) {
7126 tem = poolCopyString(newPool, oldE->publicId);
7127 if (! tem)
7128 return 0;
7129 newE->publicId = tem;
7130 }
7131 } else {
7132 const XML_Char *tem
7133 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7134 if (! tem)
7135 return 0;
7136 newE->textPtr = tem;
7137 newE->textLen = oldE->textLen;
7138 }
7139 if (oldE->notation) {
7140 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7141 if (! tem)
7142 return 0;
7143 newE->notation = tem;
7144 }
7145 newE->is_param = oldE->is_param;
7146 newE->is_internal = oldE->is_internal;
7147 }
7148 return 1;
7149 }
7150
7151 #define INIT_POWER 6
7152
7153 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7154 keyeq(KEY s1, KEY s2) {
7155 for (; *s1 == *s2; s1++, s2++)
7156 if (*s1 == 0)
7157 return XML_TRUE;
7158 return XML_FALSE;
7159 }
7160
7161 static size_t
keylen(KEY s)7162 keylen(KEY s) {
7163 size_t len = 0;
7164 for (; *s; s++, len++)
7165 ;
7166 return len;
7167 }
7168
7169 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7170 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7171 key->k[0] = 0;
7172 key->k[1] = get_hash_secret_salt(parser);
7173 }
7174
7175 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7176 hash(XML_Parser parser, KEY s) {
7177 struct siphash state;
7178 struct sipkey key;
7179 (void)sip24_valid;
7180 copy_salt_to_sipkey(parser, &key);
7181 sip24_init(&state, &key);
7182 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7183 return (unsigned long)sip24_final(&state);
7184 }
7185
7186 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7187 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7188 size_t i;
7189 if (table->size == 0) {
7190 size_t tsize;
7191 if (! createSize)
7192 return NULL;
7193 table->power = INIT_POWER;
7194 /* table->size is a power of 2 */
7195 table->size = (size_t)1 << INIT_POWER;
7196 tsize = table->size * sizeof(NAMED *);
7197 table->v = table->mem->malloc_fcn(tsize);
7198 if (! table->v) {
7199 table->size = 0;
7200 return NULL;
7201 }
7202 memset(table->v, 0, tsize);
7203 i = hash(parser, name) & ((unsigned long)table->size - 1);
7204 } else {
7205 unsigned long h = hash(parser, name);
7206 unsigned long mask = (unsigned long)table->size - 1;
7207 unsigned char step = 0;
7208 i = h & mask;
7209 while (table->v[i]) {
7210 if (keyeq(name, table->v[i]->name))
7211 return table->v[i];
7212 if (! step)
7213 step = PROBE_STEP(h, mask, table->power);
7214 i < step ? (i += table->size - step) : (i -= step);
7215 }
7216 if (! createSize)
7217 return NULL;
7218
7219 /* check for overflow (table is half full) */
7220 if (table->used >> (table->power - 1)) {
7221 unsigned char newPower = table->power + 1;
7222
7223 /* Detect and prevent invalid shift */
7224 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7225 return NULL;
7226 }
7227
7228 size_t newSize = (size_t)1 << newPower;
7229 unsigned long newMask = (unsigned long)newSize - 1;
7230
7231 /* Detect and prevent integer overflow */
7232 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7233 return NULL;
7234 }
7235
7236 size_t tsize = newSize * sizeof(NAMED *);
7237 NAMED **newV = table->mem->malloc_fcn(tsize);
7238 if (! newV)
7239 return NULL;
7240 memset(newV, 0, tsize);
7241 for (i = 0; i < table->size; i++)
7242 if (table->v[i]) {
7243 unsigned long newHash = hash(parser, table->v[i]->name);
7244 size_t j = newHash & newMask;
7245 step = 0;
7246 while (newV[j]) {
7247 if (! step)
7248 step = PROBE_STEP(newHash, newMask, newPower);
7249 j < step ? (j += newSize - step) : (j -= step);
7250 }
7251 newV[j] = table->v[i];
7252 }
7253 table->mem->free_fcn(table->v);
7254 table->v = newV;
7255 table->power = newPower;
7256 table->size = newSize;
7257 i = h & newMask;
7258 step = 0;
7259 while (table->v[i]) {
7260 if (! step)
7261 step = PROBE_STEP(h, newMask, newPower);
7262 i < step ? (i += newSize - step) : (i -= step);
7263 }
7264 }
7265 }
7266 table->v[i] = table->mem->malloc_fcn(createSize);
7267 if (! table->v[i])
7268 return NULL;
7269 memset(table->v[i], 0, createSize);
7270 table->v[i]->name = name;
7271 (table->used)++;
7272 return table->v[i];
7273 }
7274
7275 static void FASTCALL
hashTableClear(HASH_TABLE * table)7276 hashTableClear(HASH_TABLE *table) {
7277 size_t i;
7278 for (i = 0; i < table->size; i++) {
7279 table->mem->free_fcn(table->v[i]);
7280 table->v[i] = NULL;
7281 }
7282 table->used = 0;
7283 }
7284
7285 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7286 hashTableDestroy(HASH_TABLE *table) {
7287 size_t i;
7288 for (i = 0; i < table->size; i++)
7289 table->mem->free_fcn(table->v[i]);
7290 table->mem->free_fcn(table->v);
7291 }
7292
7293 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7294 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7295 p->power = 0;
7296 p->size = 0;
7297 p->used = 0;
7298 p->v = NULL;
7299 p->mem = ms;
7300 }
7301
7302 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7303 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7304 iter->p = table->v;
7305 iter->end = iter->p ? iter->p + table->size : NULL;
7306 }
7307
7308 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7309 hashTableIterNext(HASH_TABLE_ITER *iter) {
7310 while (iter->p != iter->end) {
7311 NAMED *tem = *(iter->p)++;
7312 if (tem)
7313 return tem;
7314 }
7315 return NULL;
7316 }
7317
7318 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7319 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7320 pool->blocks = NULL;
7321 pool->freeBlocks = NULL;
7322 pool->start = NULL;
7323 pool->ptr = NULL;
7324 pool->end = NULL;
7325 pool->mem = ms;
7326 }
7327
7328 static void FASTCALL
poolClear(STRING_POOL * pool)7329 poolClear(STRING_POOL *pool) {
7330 if (! pool->freeBlocks)
7331 pool->freeBlocks = pool->blocks;
7332 else {
7333 BLOCK *p = pool->blocks;
7334 while (p) {
7335 BLOCK *tem = p->next;
7336 p->next = pool->freeBlocks;
7337 pool->freeBlocks = p;
7338 p = tem;
7339 }
7340 }
7341 pool->blocks = NULL;
7342 pool->start = NULL;
7343 pool->ptr = NULL;
7344 pool->end = NULL;
7345 }
7346
7347 static void FASTCALL
poolDestroy(STRING_POOL * pool)7348 poolDestroy(STRING_POOL *pool) {
7349 BLOCK *p = pool->blocks;
7350 while (p) {
7351 BLOCK *tem = p->next;
7352 pool->mem->free_fcn(p);
7353 p = tem;
7354 }
7355 p = pool->freeBlocks;
7356 while (p) {
7357 BLOCK *tem = p->next;
7358 pool->mem->free_fcn(p);
7359 p = tem;
7360 }
7361 }
7362
7363 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7364 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7365 const char *end) {
7366 if (! pool->ptr && ! poolGrow(pool))
7367 return NULL;
7368 for (;;) {
7369 const enum XML_Convert_Result convert_res = XmlConvert(
7370 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7371 if ((convert_res == XML_CONVERT_COMPLETED)
7372 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7373 break;
7374 if (! poolGrow(pool))
7375 return NULL;
7376 }
7377 return pool->start;
7378 }
7379
7380 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7381 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7382 do {
7383 if (! poolAppendChar(pool, *s))
7384 return NULL;
7385 } while (*s++);
7386 s = pool->start;
7387 poolFinish(pool);
7388 return s;
7389 }
7390
7391 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7392 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7393 if (! pool->ptr && ! poolGrow(pool)) {
7394 /* The following line is unreachable given the current usage of
7395 * poolCopyStringN(). Currently it is called from exactly one
7396 * place to copy the text of a simple general entity. By that
7397 * point, the name of the entity is already stored in the pool, so
7398 * pool->ptr cannot be NULL.
7399 *
7400 * If poolCopyStringN() is used elsewhere as it well might be,
7401 * this line may well become executable again. Regardless, this
7402 * sort of check shouldn't be removed lightly, so we just exclude
7403 * it from the coverage statistics.
7404 */
7405 return NULL; /* LCOV_EXCL_LINE */
7406 }
7407 for (; n > 0; --n, s++) {
7408 if (! poolAppendChar(pool, *s))
7409 return NULL;
7410 }
7411 s = pool->start;
7412 poolFinish(pool);
7413 return s;
7414 }
7415
7416 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7417 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7418 while (*s) {
7419 if (! poolAppendChar(pool, *s))
7420 return NULL;
7421 s++;
7422 }
7423 return pool->start;
7424 }
7425
7426 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7427 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7428 const char *end) {
7429 if (! poolAppend(pool, enc, ptr, end))
7430 return NULL;
7431 if (pool->ptr == pool->end && ! poolGrow(pool))
7432 return NULL;
7433 *(pool->ptr)++ = 0;
7434 return pool->start;
7435 }
7436
7437 static size_t
poolBytesToAllocateFor(int blockSize)7438 poolBytesToAllocateFor(int blockSize) {
7439 /* Unprotected math would be:
7440 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7441 **
7442 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7443 ** For a + b * c we check b * c in isolation first, so that addition of a
7444 ** on top has no chance of making us accept a small non-negative number
7445 */
7446 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7447
7448 if (blockSize <= 0)
7449 return 0;
7450
7451 if (blockSize > (int)(INT_MAX / stretch))
7452 return 0;
7453
7454 {
7455 const int stretchedBlockSize = blockSize * (int)stretch;
7456 const int bytesToAllocate
7457 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7458 if (bytesToAllocate < 0)
7459 return 0;
7460
7461 return (size_t)bytesToAllocate;
7462 }
7463 }
7464
7465 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7466 poolGrow(STRING_POOL *pool) {
7467 if (pool->freeBlocks) {
7468 if (pool->start == 0) {
7469 pool->blocks = pool->freeBlocks;
7470 pool->freeBlocks = pool->freeBlocks->next;
7471 pool->blocks->next = NULL;
7472 pool->start = pool->blocks->s;
7473 pool->end = pool->start + pool->blocks->size;
7474 pool->ptr = pool->start;
7475 return XML_TRUE;
7476 }
7477 if (pool->end - pool->start < pool->freeBlocks->size) {
7478 BLOCK *tem = pool->freeBlocks->next;
7479 pool->freeBlocks->next = pool->blocks;
7480 pool->blocks = pool->freeBlocks;
7481 pool->freeBlocks = tem;
7482 memcpy(pool->blocks->s, pool->start,
7483 (pool->end - pool->start) * sizeof(XML_Char));
7484 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7485 pool->start = pool->blocks->s;
7486 pool->end = pool->start + pool->blocks->size;
7487 return XML_TRUE;
7488 }
7489 }
7490 if (pool->blocks && pool->start == pool->blocks->s) {
7491 BLOCK *temp;
7492 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7493 size_t bytesToAllocate;
7494
7495 /* NOTE: Needs to be calculated prior to calling `realloc`
7496 to avoid dangling pointers: */
7497 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7498
7499 if (blockSize < 0) {
7500 /* This condition traps a situation where either more than
7501 * INT_MAX/2 bytes have already been allocated. This isn't
7502 * readily testable, since it is unlikely that an average
7503 * machine will have that much memory, so we exclude it from the
7504 * coverage statistics.
7505 */
7506 return XML_FALSE; /* LCOV_EXCL_LINE */
7507 }
7508
7509 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7510 if (bytesToAllocate == 0)
7511 return XML_FALSE;
7512
7513 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7514 (unsigned)bytesToAllocate);
7515 if (temp == NULL)
7516 return XML_FALSE;
7517 pool->blocks = temp;
7518 pool->blocks->size = blockSize;
7519 pool->ptr = pool->blocks->s + offsetInsideBlock;
7520 pool->start = pool->blocks->s;
7521 pool->end = pool->start + blockSize;
7522 } else {
7523 BLOCK *tem;
7524 int blockSize = (int)(pool->end - pool->start);
7525 size_t bytesToAllocate;
7526
7527 if (blockSize < 0) {
7528 /* This condition traps a situation where either more than
7529 * INT_MAX bytes have already been allocated (which is prevented
7530 * by various pieces of program logic, not least this one, never
7531 * mind the unlikelihood of actually having that much memory) or
7532 * the pool control fields have been corrupted (which could
7533 * conceivably happen in an extremely buggy user handler
7534 * function). Either way it isn't readily testable, so we
7535 * exclude it from the coverage statistics.
7536 */
7537 return XML_FALSE; /* LCOV_EXCL_LINE */
7538 }
7539
7540 if (blockSize < INIT_BLOCK_SIZE)
7541 blockSize = INIT_BLOCK_SIZE;
7542 else {
7543 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7544 if ((int)((unsigned)blockSize * 2U) < 0) {
7545 return XML_FALSE;
7546 }
7547 blockSize *= 2;
7548 }
7549
7550 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7551 if (bytesToAllocate == 0)
7552 return XML_FALSE;
7553
7554 tem = pool->mem->malloc_fcn(bytesToAllocate);
7555 if (! tem)
7556 return XML_FALSE;
7557 tem->size = blockSize;
7558 tem->next = pool->blocks;
7559 pool->blocks = tem;
7560 if (pool->ptr != pool->start)
7561 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7562 pool->ptr = tem->s + (pool->ptr - pool->start);
7563 pool->start = tem->s;
7564 pool->end = tem->s + blockSize;
7565 }
7566 return XML_TRUE;
7567 }
7568
7569 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7570 nextScaffoldPart(XML_Parser parser) {
7571 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7572 CONTENT_SCAFFOLD *me;
7573 int next;
7574
7575 if (! dtd->scaffIndex) {
7576 /* Detect and prevent integer overflow.
7577 * The preprocessor guard addresses the "always false" warning
7578 * from -Wtype-limits on platforms where
7579 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7580 #if UINT_MAX >= SIZE_MAX
7581 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7582 return -1;
7583 }
7584 #endif
7585 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7586 if (! dtd->scaffIndex)
7587 return -1;
7588 dtd->scaffIndex[0] = 0;
7589 }
7590
7591 if (dtd->scaffCount >= dtd->scaffSize) {
7592 CONTENT_SCAFFOLD *temp;
7593 if (dtd->scaffold) {
7594 /* Detect and prevent integer overflow */
7595 if (dtd->scaffSize > UINT_MAX / 2u) {
7596 return -1;
7597 }
7598 /* Detect and prevent integer overflow.
7599 * The preprocessor guard addresses the "always false" warning
7600 * from -Wtype-limits on platforms where
7601 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7602 #if UINT_MAX >= SIZE_MAX
7603 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7604 return -1;
7605 }
7606 #endif
7607
7608 temp = (CONTENT_SCAFFOLD *)REALLOC(
7609 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7610 if (temp == NULL)
7611 return -1;
7612 dtd->scaffSize *= 2;
7613 } else {
7614 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7615 * sizeof(CONTENT_SCAFFOLD));
7616 if (temp == NULL)
7617 return -1;
7618 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7619 }
7620 dtd->scaffold = temp;
7621 }
7622 next = dtd->scaffCount++;
7623 me = &dtd->scaffold[next];
7624 if (dtd->scaffLevel) {
7625 CONTENT_SCAFFOLD *parent
7626 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7627 if (parent->lastchild) {
7628 dtd->scaffold[parent->lastchild].nextsib = next;
7629 }
7630 if (! parent->childcnt)
7631 parent->firstchild = next;
7632 parent->lastchild = next;
7633 parent->childcnt++;
7634 }
7635 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7636 return next;
7637 }
7638
7639 static XML_Content *
build_model(XML_Parser parser)7640 build_model(XML_Parser parser) {
7641 /* Function build_model transforms the existing parser->m_dtd->scaffold
7642 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7643 * XML_Content tree nodes followed by a gapless list of zero-terminated
7644 * strings. */
7645 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7646 XML_Content *ret;
7647 XML_Char *str; /* the current string writing location */
7648
7649 /* Detect and prevent integer overflow.
7650 * The preprocessor guard addresses the "always false" warning
7651 * from -Wtype-limits on platforms where
7652 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7653 #if UINT_MAX >= SIZE_MAX
7654 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7655 return NULL;
7656 }
7657 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7658 return NULL;
7659 }
7660 #endif
7661 if (dtd->scaffCount * sizeof(XML_Content)
7662 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7663 return NULL;
7664 }
7665
7666 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7667 + (dtd->contentStringLen * sizeof(XML_Char)));
7668
7669 ret = (XML_Content *)MALLOC(parser, allocsize);
7670 if (! ret)
7671 return NULL;
7672
7673 /* What follows is an iterative implementation (of what was previously done
7674 * recursively in a dedicated function called "build_node". The old recursive
7675 * build_node could be forced into stack exhaustion from input as small as a
7676 * few megabyte, and so that was a security issue. Hence, a function call
7677 * stack is avoided now by resolving recursion.)
7678 *
7679 * The iterative approach works as follows:
7680 *
7681 * - We have two writing pointers, both walking up the result array; one does
7682 * the work, the other creates "jobs" for its colleague to do, and leads
7683 * the way:
7684 *
7685 * - The faster one, pointer jobDest, always leads and writes "what job
7686 * to do" by the other, once they reach that place in the
7687 * array: leader "jobDest" stores the source node array index (relative
7688 * to array dtd->scaffold) in field "numchildren".
7689 *
7690 * - The slower one, pointer dest, looks at the value stored in the
7691 * "numchildren" field (which actually holds a source node array index
7692 * at that time) and puts the real data from dtd->scaffold in.
7693 *
7694 * - Before the loop starts, jobDest writes source array index 0
7695 * (where the root node is located) so that dest will have something to do
7696 * when it starts operation.
7697 *
7698 * - Whenever nodes with children are encountered, jobDest appends
7699 * them as new jobs, in order. As a result, tree node siblings are
7700 * adjacent in the resulting array, for example:
7701 *
7702 * [0] root, has two children
7703 * [1] first child of 0, has three children
7704 * [3] first child of 1, does not have children
7705 * [4] second child of 1, does not have children
7706 * [5] third child of 1, does not have children
7707 * [2] second child of 0, does not have children
7708 *
7709 * Or (the same data) presented in flat array view:
7710 *
7711 * [0] root, has two children
7712 *
7713 * [1] first child of 0, has three children
7714 * [2] second child of 0, does not have children
7715 *
7716 * [3] first child of 1, does not have children
7717 * [4] second child of 1, does not have children
7718 * [5] third child of 1, does not have children
7719 *
7720 * - The algorithm repeats until all target array indices have been processed.
7721 */
7722 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7723 XML_Content *const destLimit = &ret[dtd->scaffCount];
7724 XML_Content *jobDest = ret; /* next free writing location in target array */
7725 str = (XML_Char *)&ret[dtd->scaffCount];
7726
7727 /* Add the starting job, the root node (index 0) of the source tree */
7728 (jobDest++)->numchildren = 0;
7729
7730 for (; dest < destLimit; dest++) {
7731 /* Retrieve source tree array index from job storage */
7732 const int src_node = (int)dest->numchildren;
7733
7734 /* Convert item */
7735 dest->type = dtd->scaffold[src_node].type;
7736 dest->quant = dtd->scaffold[src_node].quant;
7737 if (dest->type == XML_CTYPE_NAME) {
7738 const XML_Char *src;
7739 dest->name = str;
7740 src = dtd->scaffold[src_node].name;
7741 for (;;) {
7742 *str++ = *src;
7743 if (! *src)
7744 break;
7745 src++;
7746 }
7747 dest->numchildren = 0;
7748 dest->children = NULL;
7749 } else {
7750 unsigned int i;
7751 int cn;
7752 dest->name = NULL;
7753 dest->numchildren = dtd->scaffold[src_node].childcnt;
7754 dest->children = jobDest;
7755
7756 /* Append scaffold indices of children to array */
7757 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7758 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7759 (jobDest++)->numchildren = (unsigned int)cn;
7760 }
7761 }
7762
7763 return ret;
7764 }
7765
7766 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7767 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7768 const char *end) {
7769 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7770 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7771 ELEMENT_TYPE *ret;
7772
7773 if (! name)
7774 return NULL;
7775 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7776 sizeof(ELEMENT_TYPE));
7777 if (! ret)
7778 return NULL;
7779 if (ret->name != name)
7780 poolDiscard(&dtd->pool);
7781 else {
7782 poolFinish(&dtd->pool);
7783 if (! setElementTypePrefix(parser, ret))
7784 return NULL;
7785 }
7786 return ret;
7787 }
7788
7789 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7790 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7791 size_t charsRequired = 0;
7792 XML_Char *result;
7793
7794 /* First determine how long the string is */
7795 while (s[charsRequired] != 0) {
7796 charsRequired++;
7797 }
7798 /* Include the terminator */
7799 charsRequired++;
7800
7801 /* Now allocate space for the copy */
7802 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7803 if (result == NULL)
7804 return NULL;
7805 /* Copy the original into place */
7806 memcpy(result, s, charsRequired * sizeof(XML_Char));
7807 return result;
7808 }
7809
7810 #if XML_GE == 1
7811
7812 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7813 accountingGetCurrentAmplification(XML_Parser rootParser) {
7814 // 1.........1.........12 => 22
7815 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7816 const XmlBigCount countBytesOutput
7817 = rootParser->m_accounting.countBytesDirect
7818 + rootParser->m_accounting.countBytesIndirect;
7819 const float amplificationFactor
7820 = rootParser->m_accounting.countBytesDirect
7821 ? (countBytesOutput
7822 / (float)(rootParser->m_accounting.countBytesDirect))
7823 : ((lenOfShortestInclude
7824 + rootParser->m_accounting.countBytesIndirect)
7825 / (float)lenOfShortestInclude);
7826 assert(! rootParser->m_parentParser);
7827 return amplificationFactor;
7828 }
7829
7830 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7831 accountingReportStats(XML_Parser originParser, const char *epilog) {
7832 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7833 assert(! rootParser->m_parentParser);
7834
7835 if (rootParser->m_accounting.debugLevel == 0u) {
7836 return;
7837 }
7838
7839 const float amplificationFactor
7840 = accountingGetCurrentAmplification(rootParser);
7841 fprintf(stderr,
7842 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7843 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7844 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7845 rootParser->m_accounting.countBytesIndirect,
7846 (double)amplificationFactor, epilog);
7847 }
7848
7849 static void
accountingOnAbort(XML_Parser originParser)7850 accountingOnAbort(XML_Parser originParser) {
7851 accountingReportStats(originParser, " ABORTING\n");
7852 }
7853
7854 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7855 accountingReportDiff(XML_Parser rootParser,
7856 unsigned int levelsAwayFromRootParser, const char *before,
7857 const char *after, ptrdiff_t bytesMore, int source_line,
7858 enum XML_Account account) {
7859 assert(! rootParser->m_parentParser);
7860
7861 fprintf(stderr,
7862 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7863 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7864 levelsAwayFromRootParser, source_line, 10, "");
7865
7866 const char ellipis[] = "[..]";
7867 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7868 const unsigned int contextLength = 10;
7869
7870 /* Note: Performance is of no concern here */
7871 const char *walker = before;
7872 if ((rootParser->m_accounting.debugLevel >= 3u)
7873 || (after - before)
7874 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7875 for (; walker < after; walker++) {
7876 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7877 }
7878 } else {
7879 for (; walker < before + contextLength; walker++) {
7880 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7881 }
7882 fprintf(stderr, ellipis);
7883 walker = after - contextLength;
7884 for (; walker < after; walker++) {
7885 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7886 }
7887 }
7888 fprintf(stderr, "\"\n");
7889 }
7890
7891 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7892 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7893 const char *after, int source_line,
7894 enum XML_Account account) {
7895 /* Note: We need to check the token type *first* to be sure that
7896 * we can even access variable <after>, safely.
7897 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7898 switch (tok) {
7899 case XML_TOK_INVALID:
7900 case XML_TOK_PARTIAL:
7901 case XML_TOK_PARTIAL_CHAR:
7902 case XML_TOK_NONE:
7903 return XML_TRUE;
7904 }
7905
7906 if (account == XML_ACCOUNT_NONE)
7907 return XML_TRUE; /* because these bytes have been accounted for, already */
7908
7909 unsigned int levelsAwayFromRootParser;
7910 const XML_Parser rootParser
7911 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7912 assert(! rootParser->m_parentParser);
7913
7914 const int isDirect
7915 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7916 const ptrdiff_t bytesMore = after - before;
7917
7918 XmlBigCount *const additionTarget
7919 = isDirect ? &rootParser->m_accounting.countBytesDirect
7920 : &rootParser->m_accounting.countBytesIndirect;
7921
7922 /* Detect and avoid integer overflow */
7923 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7924 return XML_FALSE;
7925 *additionTarget += bytesMore;
7926
7927 const XmlBigCount countBytesOutput
7928 = rootParser->m_accounting.countBytesDirect
7929 + rootParser->m_accounting.countBytesIndirect;
7930 const float amplificationFactor
7931 = accountingGetCurrentAmplification(rootParser);
7932 const XML_Bool tolerated
7933 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7934 || (amplificationFactor
7935 <= rootParser->m_accounting.maximumAmplificationFactor);
7936
7937 if (rootParser->m_accounting.debugLevel >= 2u) {
7938 accountingReportStats(rootParser, "");
7939 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7940 bytesMore, source_line, account);
7941 }
7942
7943 return tolerated;
7944 }
7945
7946 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7947 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7948 if (! parser)
7949 return 0;
7950 return parser->m_accounting.countBytesDirect;
7951 }
7952
7953 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7954 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7955 if (! parser)
7956 return 0;
7957 return parser->m_accounting.countBytesIndirect;
7958 }
7959
7960 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7961 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7962 const char *action, int sourceLine) {
7963 assert(! rootParser->m_parentParser);
7964 if (rootParser->m_entity_stats.debugLevel == 0u)
7965 return;
7966
7967 # if defined(XML_UNICODE)
7968 const char *const entityName = "[..]";
7969 # else
7970 const char *const entityName = entity->name;
7971 # endif
7972
7973 fprintf(
7974 stderr,
7975 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7976 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7977 rootParser->m_entity_stats.currentDepth,
7978 rootParser->m_entity_stats.maximumDepthSeen,
7979 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7980 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7981 sourceLine);
7982 }
7983
7984 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7985 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7986 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7987 assert(! rootParser->m_parentParser);
7988
7989 rootParser->m_entity_stats.countEverOpened++;
7990 rootParser->m_entity_stats.currentDepth++;
7991 if (rootParser->m_entity_stats.currentDepth
7992 > rootParser->m_entity_stats.maximumDepthSeen) {
7993 rootParser->m_entity_stats.maximumDepthSeen++;
7994 }
7995
7996 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7997 }
7998
7999 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)8000 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8001 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8002 assert(! rootParser->m_parentParser);
8003
8004 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8005 rootParser->m_entity_stats.currentDepth--;
8006 }
8007
8008 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)8009 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8010 XML_Parser rootParser = parser;
8011 unsigned int stepsTakenUpwards = 0;
8012 while (rootParser->m_parentParser) {
8013 rootParser = rootParser->m_parentParser;
8014 stepsTakenUpwards++;
8015 }
8016 assert(! rootParser->m_parentParser);
8017 if (outLevelDiff != NULL) {
8018 *outLevelDiff = stepsTakenUpwards;
8019 }
8020 return rootParser;
8021 }
8022
8023 const char *
unsignedCharToPrintable(unsigned char c)8024 unsignedCharToPrintable(unsigned char c) {
8025 switch (c) {
8026 case 0:
8027 return "\\0";
8028 case 1:
8029 return "\\x1";
8030 case 2:
8031 return "\\x2";
8032 case 3:
8033 return "\\x3";
8034 case 4:
8035 return "\\x4";
8036 case 5:
8037 return "\\x5";
8038 case 6:
8039 return "\\x6";
8040 case 7:
8041 return "\\x7";
8042 case 8:
8043 return "\\x8";
8044 case 9:
8045 return "\\t";
8046 case 10:
8047 return "\\n";
8048 case 11:
8049 return "\\xB";
8050 case 12:
8051 return "\\xC";
8052 case 13:
8053 return "\\r";
8054 case 14:
8055 return "\\xE";
8056 case 15:
8057 return "\\xF";
8058 case 16:
8059 return "\\x10";
8060 case 17:
8061 return "\\x11";
8062 case 18:
8063 return "\\x12";
8064 case 19:
8065 return "\\x13";
8066 case 20:
8067 return "\\x14";
8068 case 21:
8069 return "\\x15";
8070 case 22:
8071 return "\\x16";
8072 case 23:
8073 return "\\x17";
8074 case 24:
8075 return "\\x18";
8076 case 25:
8077 return "\\x19";
8078 case 26:
8079 return "\\x1A";
8080 case 27:
8081 return "\\x1B";
8082 case 28:
8083 return "\\x1C";
8084 case 29:
8085 return "\\x1D";
8086 case 30:
8087 return "\\x1E";
8088 case 31:
8089 return "\\x1F";
8090 case 32:
8091 return " ";
8092 case 33:
8093 return "!";
8094 case 34:
8095 return "\\\"";
8096 case 35:
8097 return "#";
8098 case 36:
8099 return "$";
8100 case 37:
8101 return "%";
8102 case 38:
8103 return "&";
8104 case 39:
8105 return "'";
8106 case 40:
8107 return "(";
8108 case 41:
8109 return ")";
8110 case 42:
8111 return "*";
8112 case 43:
8113 return "+";
8114 case 44:
8115 return ",";
8116 case 45:
8117 return "-";
8118 case 46:
8119 return ".";
8120 case 47:
8121 return "/";
8122 case 48:
8123 return "0";
8124 case 49:
8125 return "1";
8126 case 50:
8127 return "2";
8128 case 51:
8129 return "3";
8130 case 52:
8131 return "4";
8132 case 53:
8133 return "5";
8134 case 54:
8135 return "6";
8136 case 55:
8137 return "7";
8138 case 56:
8139 return "8";
8140 case 57:
8141 return "9";
8142 case 58:
8143 return ":";
8144 case 59:
8145 return ";";
8146 case 60:
8147 return "<";
8148 case 61:
8149 return "=";
8150 case 62:
8151 return ">";
8152 case 63:
8153 return "?";
8154 case 64:
8155 return "@";
8156 case 65:
8157 return "A";
8158 case 66:
8159 return "B";
8160 case 67:
8161 return "C";
8162 case 68:
8163 return "D";
8164 case 69:
8165 return "E";
8166 case 70:
8167 return "F";
8168 case 71:
8169 return "G";
8170 case 72:
8171 return "H";
8172 case 73:
8173 return "I";
8174 case 74:
8175 return "J";
8176 case 75:
8177 return "K";
8178 case 76:
8179 return "L";
8180 case 77:
8181 return "M";
8182 case 78:
8183 return "N";
8184 case 79:
8185 return "O";
8186 case 80:
8187 return "P";
8188 case 81:
8189 return "Q";
8190 case 82:
8191 return "R";
8192 case 83:
8193 return "S";
8194 case 84:
8195 return "T";
8196 case 85:
8197 return "U";
8198 case 86:
8199 return "V";
8200 case 87:
8201 return "W";
8202 case 88:
8203 return "X";
8204 case 89:
8205 return "Y";
8206 case 90:
8207 return "Z";
8208 case 91:
8209 return "[";
8210 case 92:
8211 return "\\\\";
8212 case 93:
8213 return "]";
8214 case 94:
8215 return "^";
8216 case 95:
8217 return "_";
8218 case 96:
8219 return "`";
8220 case 97:
8221 return "a";
8222 case 98:
8223 return "b";
8224 case 99:
8225 return "c";
8226 case 100:
8227 return "d";
8228 case 101:
8229 return "e";
8230 case 102:
8231 return "f";
8232 case 103:
8233 return "g";
8234 case 104:
8235 return "h";
8236 case 105:
8237 return "i";
8238 case 106:
8239 return "j";
8240 case 107:
8241 return "k";
8242 case 108:
8243 return "l";
8244 case 109:
8245 return "m";
8246 case 110:
8247 return "n";
8248 case 111:
8249 return "o";
8250 case 112:
8251 return "p";
8252 case 113:
8253 return "q";
8254 case 114:
8255 return "r";
8256 case 115:
8257 return "s";
8258 case 116:
8259 return "t";
8260 case 117:
8261 return "u";
8262 case 118:
8263 return "v";
8264 case 119:
8265 return "w";
8266 case 120:
8267 return "x";
8268 case 121:
8269 return "y";
8270 case 122:
8271 return "z";
8272 case 123:
8273 return "{";
8274 case 124:
8275 return "|";
8276 case 125:
8277 return "}";
8278 case 126:
8279 return "~";
8280 case 127:
8281 return "\\x7F";
8282 case 128:
8283 return "\\x80";
8284 case 129:
8285 return "\\x81";
8286 case 130:
8287 return "\\x82";
8288 case 131:
8289 return "\\x83";
8290 case 132:
8291 return "\\x84";
8292 case 133:
8293 return "\\x85";
8294 case 134:
8295 return "\\x86";
8296 case 135:
8297 return "\\x87";
8298 case 136:
8299 return "\\x88";
8300 case 137:
8301 return "\\x89";
8302 case 138:
8303 return "\\x8A";
8304 case 139:
8305 return "\\x8B";
8306 case 140:
8307 return "\\x8C";
8308 case 141:
8309 return "\\x8D";
8310 case 142:
8311 return "\\x8E";
8312 case 143:
8313 return "\\x8F";
8314 case 144:
8315 return "\\x90";
8316 case 145:
8317 return "\\x91";
8318 case 146:
8319 return "\\x92";
8320 case 147:
8321 return "\\x93";
8322 case 148:
8323 return "\\x94";
8324 case 149:
8325 return "\\x95";
8326 case 150:
8327 return "\\x96";
8328 case 151:
8329 return "\\x97";
8330 case 152:
8331 return "\\x98";
8332 case 153:
8333 return "\\x99";
8334 case 154:
8335 return "\\x9A";
8336 case 155:
8337 return "\\x9B";
8338 case 156:
8339 return "\\x9C";
8340 case 157:
8341 return "\\x9D";
8342 case 158:
8343 return "\\x9E";
8344 case 159:
8345 return "\\x9F";
8346 case 160:
8347 return "\\xA0";
8348 case 161:
8349 return "\\xA1";
8350 case 162:
8351 return "\\xA2";
8352 case 163:
8353 return "\\xA3";
8354 case 164:
8355 return "\\xA4";
8356 case 165:
8357 return "\\xA5";
8358 case 166:
8359 return "\\xA6";
8360 case 167:
8361 return "\\xA7";
8362 case 168:
8363 return "\\xA8";
8364 case 169:
8365 return "\\xA9";
8366 case 170:
8367 return "\\xAA";
8368 case 171:
8369 return "\\xAB";
8370 case 172:
8371 return "\\xAC";
8372 case 173:
8373 return "\\xAD";
8374 case 174:
8375 return "\\xAE";
8376 case 175:
8377 return "\\xAF";
8378 case 176:
8379 return "\\xB0";
8380 case 177:
8381 return "\\xB1";
8382 case 178:
8383 return "\\xB2";
8384 case 179:
8385 return "\\xB3";
8386 case 180:
8387 return "\\xB4";
8388 case 181:
8389 return "\\xB5";
8390 case 182:
8391 return "\\xB6";
8392 case 183:
8393 return "\\xB7";
8394 case 184:
8395 return "\\xB8";
8396 case 185:
8397 return "\\xB9";
8398 case 186:
8399 return "\\xBA";
8400 case 187:
8401 return "\\xBB";
8402 case 188:
8403 return "\\xBC";
8404 case 189:
8405 return "\\xBD";
8406 case 190:
8407 return "\\xBE";
8408 case 191:
8409 return "\\xBF";
8410 case 192:
8411 return "\\xC0";
8412 case 193:
8413 return "\\xC1";
8414 case 194:
8415 return "\\xC2";
8416 case 195:
8417 return "\\xC3";
8418 case 196:
8419 return "\\xC4";
8420 case 197:
8421 return "\\xC5";
8422 case 198:
8423 return "\\xC6";
8424 case 199:
8425 return "\\xC7";
8426 case 200:
8427 return "\\xC8";
8428 case 201:
8429 return "\\xC9";
8430 case 202:
8431 return "\\xCA";
8432 case 203:
8433 return "\\xCB";
8434 case 204:
8435 return "\\xCC";
8436 case 205:
8437 return "\\xCD";
8438 case 206:
8439 return "\\xCE";
8440 case 207:
8441 return "\\xCF";
8442 case 208:
8443 return "\\xD0";
8444 case 209:
8445 return "\\xD1";
8446 case 210:
8447 return "\\xD2";
8448 case 211:
8449 return "\\xD3";
8450 case 212:
8451 return "\\xD4";
8452 case 213:
8453 return "\\xD5";
8454 case 214:
8455 return "\\xD6";
8456 case 215:
8457 return "\\xD7";
8458 case 216:
8459 return "\\xD8";
8460 case 217:
8461 return "\\xD9";
8462 case 218:
8463 return "\\xDA";
8464 case 219:
8465 return "\\xDB";
8466 case 220:
8467 return "\\xDC";
8468 case 221:
8469 return "\\xDD";
8470 case 222:
8471 return "\\xDE";
8472 case 223:
8473 return "\\xDF";
8474 case 224:
8475 return "\\xE0";
8476 case 225:
8477 return "\\xE1";
8478 case 226:
8479 return "\\xE2";
8480 case 227:
8481 return "\\xE3";
8482 case 228:
8483 return "\\xE4";
8484 case 229:
8485 return "\\xE5";
8486 case 230:
8487 return "\\xE6";
8488 case 231:
8489 return "\\xE7";
8490 case 232:
8491 return "\\xE8";
8492 case 233:
8493 return "\\xE9";
8494 case 234:
8495 return "\\xEA";
8496 case 235:
8497 return "\\xEB";
8498 case 236:
8499 return "\\xEC";
8500 case 237:
8501 return "\\xED";
8502 case 238:
8503 return "\\xEE";
8504 case 239:
8505 return "\\xEF";
8506 case 240:
8507 return "\\xF0";
8508 case 241:
8509 return "\\xF1";
8510 case 242:
8511 return "\\xF2";
8512 case 243:
8513 return "\\xF3";
8514 case 244:
8515 return "\\xF4";
8516 case 245:
8517 return "\\xF5";
8518 case 246:
8519 return "\\xF6";
8520 case 247:
8521 return "\\xF7";
8522 case 248:
8523 return "\\xF8";
8524 case 249:
8525 return "\\xF9";
8526 case 250:
8527 return "\\xFA";
8528 case 251:
8529 return "\\xFB";
8530 case 252:
8531 return "\\xFC";
8532 case 253:
8533 return "\\xFD";
8534 case 254:
8535 return "\\xFE";
8536 case 255:
8537 return "\\xFF";
8538 default:
8539 assert(0); /* never gets here */
8540 return "dead code";
8541 }
8542 assert(0); /* never gets here */
8543 }
8544
8545 #endif /* XML_GE == 1 */
8546
8547 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8548 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8549 const char *const valueOrNull = getenv(variableName);
8550 if (valueOrNull == NULL) {
8551 return defaultDebugLevel;
8552 }
8553 const char *const value = valueOrNull;
8554
8555 errno = 0;
8556 char *afterValue = NULL;
8557 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8558 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8559 errno = 0;
8560 return defaultDebugLevel;
8561 }
8562
8563 return debugLevel;
8564 }
8565
8566 #endif /* LV_USE_XML */
8567
8568