1 /*
2 * Copyright (c) 2020 - 2024 the ThorVG project. All rights reserved.
3
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
13
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "../../lv_conf_internal.h"
24 #if LV_USE_THORVG_INTERNAL
25
26 #include <cstring>
27 #include <ctype.h>
28 #include <string>
29
30 #ifdef _WIN32
31 #include <malloc.h>
32 #elif defined(__linux__)
33 #include <alloca.h>
34 #else
35 #include <stdlib.h>
36 #endif
37
38 #include "tvgXmlParser.h"
39 #include "tvgStr.h"
40
41 /************************************************************************/
42 /* Internal Class Implementation */
43 /************************************************************************/
44
_isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char * tagAttribute,TVG_UNUSED const char * tagValue)45 bool _isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char* tagAttribute, TVG_UNUSED const char* tagValue)
46 {
47 #ifdef THORVG_LOG_ENABLED
48 const auto attributesNum = 6;
49 const struct
50 {
51 const char* tag;
52 bool tagWildcard; //If true, it is assumed that a wildcard is used after the tag. (ex: tagName*)
53 const char* value;
54 } attributes[] = {
55 {"id", false, nullptr},
56 {"data-name", false, nullptr},
57 {"overflow", false, "visible"},
58 {"version", false, nullptr},
59 {"xmlns", true, nullptr},
60 {"xml:space", false, nullptr},
61 };
62
63 for (unsigned int i = 0; i < attributesNum; ++i) {
64 if (!strncmp(tagAttribute, attributes[i].tag, attributes[i].tagWildcard ? strlen(attributes[i].tag) : strlen(tagAttribute))) {
65 if (attributes[i].value && tagValue) {
66 if (!strncmp(tagValue, attributes[i].value, strlen(tagValue))) {
67 return true;
68 } else continue;
69 }
70 return true;
71 }
72 }
73 return false;
74 #endif
75 return true;
76 }
77
78
_simpleXmlFindWhiteSpace(const char * itr,const char * itrEnd)79 static const char* _simpleXmlFindWhiteSpace(const char* itr, const char* itrEnd)
80 {
81 for (; itr < itrEnd; itr++) {
82 if (isspace((unsigned char)*itr)) break;
83 }
84 return itr;
85 }
86
87
_simpleXmlSkipWhiteSpace(const char * itr,const char * itrEnd)88 static const char* _simpleXmlSkipWhiteSpace(const char* itr, const char* itrEnd)
89 {
90 for (; itr < itrEnd; itr++) {
91 if (!isspace((unsigned char)*itr)) break;
92 }
93 return itr;
94 }
95
96
_simpleXmlUnskipWhiteSpace(const char * itr,const char * itrStart)97 static const char* _simpleXmlUnskipWhiteSpace(const char* itr, const char* itrStart)
98 {
99 for (itr--; itr > itrStart; itr--) {
100 if (!isspace((unsigned char)*itr)) break;
101 }
102 return itr + 1;
103 }
104
105
_simpleXmlSkipXmlEntities(const char * itr,const char * itrEnd)106 static const char* _simpleXmlSkipXmlEntities(const char* itr, const char* itrEnd)
107 {
108 auto p = itr;
109 while (itr < itrEnd && *itr == '&') {
110 for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
111 if (strncmp(itr, xmlEntity[i], xmlEntityLength[i]) == 0) {
112 itr += xmlEntityLength[i];
113 break;
114 }
115 }
116 if (itr == p) break;
117 p = itr;
118 }
119 return itr;
120 }
121
122
_simpleXmlUnskipXmlEntities(const char * itr,const char * itrStart)123 static const char* _simpleXmlUnskipXmlEntities(const char* itr, const char* itrStart)
124 {
125 auto p = itr;
126 while (itr > itrStart && *(itr - 1) == ';') {
127 for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
128 if (itr - xmlEntityLength[i] > itrStart &&
129 strncmp(itr - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) {
130 itr -= xmlEntityLength[i];
131 break;
132 }
133 }
134 if (itr == p) break;
135 p = itr;
136 }
137 return itr;
138 }
139
140
_skipWhiteSpacesAndXmlEntities(const char * itr,const char * itrEnd)141 static const char* _skipWhiteSpacesAndXmlEntities(const char* itr, const char* itrEnd)
142 {
143 itr = _simpleXmlSkipWhiteSpace(itr, itrEnd);
144 auto p = itr;
145 while (true) {
146 if (p != (itr = _simpleXmlSkipXmlEntities(itr, itrEnd))) p = itr;
147 else break;
148 if (p != (itr = _simpleXmlSkipWhiteSpace(itr, itrEnd))) p = itr;
149 else break;
150 }
151 return itr;
152 }
153
154
_unskipWhiteSpacesAndXmlEntities(const char * itr,const char * itrStart)155 static const char* _unskipWhiteSpacesAndXmlEntities(const char* itr, const char* itrStart)
156 {
157 itr = _simpleXmlUnskipWhiteSpace(itr, itrStart);
158 auto p = itr;
159 while (true) {
160 if (p != (itr = _simpleXmlUnskipXmlEntities(itr, itrStart))) p = itr;
161 else break;
162 if (p != (itr = _simpleXmlUnskipWhiteSpace(itr, itrStart))) p = itr;
163 else break;
164 }
165 return itr;
166 }
167
168
_simpleXmlFindStartTag(const char * itr,const char * itrEnd)169 static const char* _simpleXmlFindStartTag(const char* itr, const char* itrEnd)
170 {
171 return (const char*)memchr(itr, '<', itrEnd - itr);
172 }
173
174
_simpleXmlFindEndTag(const char * itr,const char * itrEnd)175 static const char* _simpleXmlFindEndTag(const char* itr, const char* itrEnd)
176 {
177 bool insideQuote[2] = {false, false}; // 0: ", 1: '
178 for (; itr < itrEnd; itr++) {
179 if (*itr == '"' && !insideQuote[1]) insideQuote[0] = !insideQuote[0];
180 if (*itr == '\'' && !insideQuote[0]) insideQuote[1] = !insideQuote[1];
181 if (!insideQuote[0] && !insideQuote[1]) {
182 if ((*itr == '>') || (*itr == '<'))
183 return itr;
184 }
185 }
186 return nullptr;
187 }
188
189
_simpleXmlFindEndCommentTag(const char * itr,const char * itrEnd)190 static const char* _simpleXmlFindEndCommentTag(const char* itr, const char* itrEnd)
191 {
192 for (; itr < itrEnd; itr++) {
193 if ((*itr == '-') && ((itr + 1 < itrEnd) && (*(itr + 1) == '-')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
194 }
195 return nullptr;
196 }
197
198
_simpleXmlFindEndCdataTag(const char * itr,const char * itrEnd)199 static const char* _simpleXmlFindEndCdataTag(const char* itr, const char* itrEnd)
200 {
201 for (; itr < itrEnd; itr++) {
202 if ((*itr == ']') && ((itr + 1 < itrEnd) && (*(itr + 1) == ']')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
203 }
204 return nullptr;
205 }
206
207
_simpleXmlFindDoctypeChildEndTag(const char * itr,const char * itrEnd)208 static const char* _simpleXmlFindDoctypeChildEndTag(const char* itr, const char* itrEnd)
209 {
210 for (; itr < itrEnd; itr++) {
211 if (*itr == '>') return itr;
212 }
213 return nullptr;
214 }
215
216
_getXMLType(const char * itr,const char * itrEnd,size_t & toff)217 static SimpleXMLType _getXMLType(const char* itr, const char* itrEnd, size_t &toff)
218 {
219 toff = 0;
220 if (itr[1] == '/') {
221 toff = 1;
222 return SimpleXMLType::Close;
223 } else if (itr[1] == '?') {
224 toff = 1;
225 return SimpleXMLType::Processing;
226 } else if (itr[1] == '!') {
227 if ((itr + sizeof("<!DOCTYPE>") - 1 < itrEnd) && (!memcmp(itr + 2, "DOCTYPE", sizeof("DOCTYPE") - 1)) && ((itr[2 + sizeof("DOCTYPE") - 1] == '>') || (isspace((unsigned char)itr[2 + sizeof("DOCTYPE") - 1])))) {
228 toff = sizeof("!DOCTYPE") - 1;
229 return SimpleXMLType::Doctype;
230 } else if ((itr + sizeof("<![CDATA[]]>") - 1 < itrEnd) && (!memcmp(itr + 2, "[CDATA[", sizeof("[CDATA[") - 1))) {
231 toff = sizeof("![CDATA[") - 1;
232 return SimpleXMLType::CData;
233 } else if ((itr + sizeof("<!---->") - 1 < itrEnd) && (!memcmp(itr + 2, "--", sizeof("--") - 1))) {
234 toff = sizeof("!--") - 1;
235 return SimpleXMLType::Comment;
236 } else if (itr + sizeof("<!>") - 1 < itrEnd) {
237 toff = sizeof("!") - 1;
238 return SimpleXMLType::DoctypeChild;
239 }
240 return SimpleXMLType::Open;
241 }
242 return SimpleXMLType::Open;
243 }
244
245
246 /************************************************************************/
247 /* External Class Implementation */
248 /************************************************************************/
249
simpleXmlNodeTypeToString(TVG_UNUSED SvgNodeType type)250 const char* simpleXmlNodeTypeToString(TVG_UNUSED SvgNodeType type)
251 {
252 #ifdef THORVG_LOG_ENABLED
253 static const char* TYPE_NAMES[] = {
254 "Svg",
255 "G",
256 "Defs",
257 "Animation",
258 "Arc",
259 "Circle",
260 "Ellipse",
261 "Image",
262 "Line",
263 "Path",
264 "Polygon",
265 "Polyline",
266 "Rect",
267 "Text",
268 "TextArea",
269 "Tspan",
270 "Use",
271 "Video",
272 "ClipPath",
273 "Mask",
274 "Symbol",
275 "Unknown",
276 };
277 return TYPE_NAMES[(int) type];
278 #endif
279 return nullptr;
280 }
281
282
isIgnoreUnsupportedLogElements(TVG_UNUSED const char * tagName)283 bool isIgnoreUnsupportedLogElements(TVG_UNUSED const char* tagName)
284 {
285 #ifdef THORVG_LOG_ENABLED
286 const auto elementsNum = 1;
287 const char* const elements[] = { "title" };
288
289 for (unsigned int i = 0; i < elementsNum; ++i) {
290 if (!strncmp(tagName, elements[i], strlen(tagName))) {
291 return true;
292 }
293 }
294 return false;
295 #else
296 return true;
297 #endif
298 }
299
300
simpleXmlParseAttributes(const char * buf,unsigned bufLength,simpleXMLAttributeCb func,const void * data)301 bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
302 {
303 const char *itr = buf, *itrEnd = buf + bufLength;
304 char* tmpBuf = (char*)malloc(bufLength + 1);
305
306 if (!buf || !func || !tmpBuf) goto error;
307
308 while (itr < itrEnd) {
309 const char* p = _skipWhiteSpacesAndXmlEntities(itr, itrEnd);
310 const char *key, *keyEnd, *value, *valueEnd;
311 char* tval;
312
313 if (p == itrEnd) goto success;
314
315 key = p;
316 for (keyEnd = key; keyEnd < itrEnd; keyEnd++) {
317 if ((*keyEnd == '=') || (isspace((unsigned char)*keyEnd))) break;
318 }
319 if (keyEnd == itrEnd) goto error;
320 if (keyEnd == key) { // There is no key. This case is invalid, but explores the following syntax.
321 itr = keyEnd + 1;
322 continue;
323 }
324
325 if (*keyEnd == '=') value = keyEnd + 1;
326 else {
327 value = (const char*)memchr(keyEnd, '=', itrEnd - keyEnd);
328 if (!value) goto error;
329 value++;
330 }
331 keyEnd = _simpleXmlUnskipXmlEntities(keyEnd, key);
332
333 value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
334 if (value == itrEnd) goto error;
335
336 if ((*value == '"') || (*value == '\'')) {
337 valueEnd = (const char*)memchr(value + 1, *value, itrEnd - value);
338 if (!valueEnd) goto error;
339 value++;
340 } else {
341 valueEnd = _simpleXmlFindWhiteSpace(value, itrEnd);
342 }
343
344 itr = valueEnd + 1;
345
346 value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
347 valueEnd = _unskipWhiteSpacesAndXmlEntities(valueEnd, value);
348
349 memcpy(tmpBuf, key, keyEnd - key);
350 tmpBuf[keyEnd - key] = '\0';
351
352 tval = tmpBuf + (keyEnd - key) + 1;
353 int i = 0;
354 while (value < valueEnd) {
355 value = _simpleXmlSkipXmlEntities(value, valueEnd);
356 tval[i++] = *value;
357 value++;
358 }
359 tval[i] = '\0';
360
361 if (!func((void*)data, tmpBuf, tval)) {
362 if (!_isIgnoreUnsupportedLogAttributes(tmpBuf, tval)) {
363 TVGLOG("SVG", "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID", tmpBuf, tval ? tval : "NONE");
364 }
365 }
366 }
367
368 success:
369 free(tmpBuf);
370 return true;
371
372 error:
373 free(tmpBuf);
374 return false;
375 }
376
377
simpleXmlParse(const char * buf,unsigned bufLength,bool strip,simpleXMLCb func,const void * data)378 bool simpleXmlParse(const char* buf, unsigned bufLength, bool strip, simpleXMLCb func, const void* data)
379 {
380 const char *itr = buf, *itrEnd = buf + bufLength;
381
382 if (!buf || !func) return false;
383
384 while (itr < itrEnd) {
385 if (itr[0] == '<') {
386 //Invalid case
387 if (itr + 1 >= itrEnd) return false;
388
389 size_t toff = 0;
390 SimpleXMLType type = _getXMLType(itr, itrEnd, toff);
391
392 const char* p;
393 if (type == SimpleXMLType::CData) p = _simpleXmlFindEndCdataTag(itr + 1 + toff, itrEnd);
394 else if (type == SimpleXMLType::DoctypeChild) p = _simpleXmlFindDoctypeChildEndTag(itr + 1 + toff, itrEnd);
395 else if (type == SimpleXMLType::Comment) p = _simpleXmlFindEndCommentTag(itr + 1 + toff, itrEnd);
396 else p = _simpleXmlFindEndTag(itr + 1 + toff, itrEnd);
397
398 if (p) {
399 //Invalid case: '<' nested
400 if (*p == '<' && type != SimpleXMLType::Doctype) return false;
401 const char *start, *end;
402
403 start = itr + 1 + toff;
404 end = p;
405
406 switch (type) {
407 case SimpleXMLType::Open: {
408 if (p[-1] == '/') {
409 type = SimpleXMLType::OpenEmpty;
410 end--;
411 }
412 break;
413 }
414 case SimpleXMLType::CData: {
415 if (!memcmp(p - 2, "]]", 2)) end -= 2;
416 break;
417 }
418 case SimpleXMLType::Processing: {
419 if (p[-1] == '?') end--;
420 break;
421 }
422 case SimpleXMLType::Comment: {
423 if (!memcmp(p - 2, "--", 2)) end -= 2;
424 break;
425 }
426 default: {
427 break;
428 }
429 }
430
431 if (strip && (type != SimpleXMLType::CData)) {
432 start = _skipWhiteSpacesAndXmlEntities(start, end);
433 end = _unskipWhiteSpacesAndXmlEntities(end, start);
434 }
435
436 if (!func((void*)data, type, start, (unsigned int)(end - start))) return false;
437
438 itr = p + 1;
439 } else {
440 return false;
441 }
442 } else {
443 const char *p, *end;
444
445 if (strip) {
446 p = itr;
447 p = _skipWhiteSpacesAndXmlEntities(p, itrEnd);
448 if (p) {
449 if (!func((void*)data, SimpleXMLType::Ignored, itr, (unsigned int)(p - itr))) return false;
450 itr = p;
451 }
452 }
453
454 p = _simpleXmlFindStartTag(itr, itrEnd);
455 if (!p) p = itrEnd;
456
457 end = p;
458 if (strip) end = _unskipWhiteSpacesAndXmlEntities(end, itr);
459
460 if (itr != end && !func((void*)data, SimpleXMLType::Data, itr, (unsigned int)(end - itr))) return false;
461
462 if (strip && (end < p) && !func((void*)data, SimpleXMLType::Ignored, end, (unsigned int)(p - end))) return false;
463
464 itr = p;
465 }
466 }
467 return true;
468 }
469
470
simpleXmlParseW3CAttribute(const char * buf,unsigned bufLength,simpleXMLAttributeCb func,const void * data)471 bool simpleXmlParseW3CAttribute(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
472 {
473 const char* end;
474 char* key;
475 char* val;
476 char* next;
477
478 if (!buf) return false;
479
480 end = buf + bufLength;
481 key = (char*)alloca(end - buf + 1);
482 val = (char*)alloca(end - buf + 1);
483
484 if (buf == end) return true;
485
486 do {
487 char* sep = (char*)strchr(buf, ':');
488 next = (char*)strchr(buf, ';');
489 if (sep >= end) {
490 next = nullptr;
491 sep = nullptr;
492 }
493 if (next >= end) next = nullptr;
494
495 key[0] = '\0';
496 val[0] = '\0';
497
498 if (sep != nullptr && next == nullptr) {
499 memcpy(key, buf, sep - buf);
500 key[sep - buf] = '\0';
501
502 memcpy(val, sep + 1, end - sep - 1);
503 val[end - sep - 1] = '\0';
504 } else if (sep != nullptr && sep < next) {
505 memcpy(key, buf, sep - buf);
506 key[sep - buf] = '\0';
507
508 memcpy(val, sep + 1, next - sep - 1);
509 val[next - sep - 1] = '\0';
510 } else if (next) {
511 memcpy(key, buf, next - buf);
512 key[next - buf] = '\0';
513 }
514
515 if (key[0]) {
516 key = const_cast<char*>(_simpleXmlSkipWhiteSpace(key, key + strlen(key)));
517 key[_simpleXmlUnskipWhiteSpace(key + strlen(key) , key) - key] = '\0';
518 val = const_cast<char*>(_simpleXmlSkipWhiteSpace(val, val + strlen(val)));
519 val[_simpleXmlUnskipWhiteSpace(val + strlen(val) , val) - val] = '\0';
520
521 if (!func((void*)data, key, val)) {
522 if (!_isIgnoreUnsupportedLogAttributes(key, val)) {
523 TVGLOG("SVG", "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID", key, val ? val : "NONE");
524 }
525 }
526 }
527
528 if (!next) break;
529 buf = next + 1;
530 } while (true);
531
532 return true;
533 }
534
535
536 /*
537 * Supported formats:
538 * tag {}, .name {}, tag.name{}
539 */
simpleXmlParseCSSAttribute(const char * buf,unsigned bufLength,char ** tag,char ** name,const char ** attrs,unsigned * attrsLength)540 const char* simpleXmlParseCSSAttribute(const char* buf, unsigned bufLength, char** tag, char** name, const char** attrs, unsigned* attrsLength)
541 {
542 if (!buf) return nullptr;
543
544 *tag = *name = nullptr;
545 *attrsLength = 0;
546
547 auto itr = _simpleXmlSkipWhiteSpace(buf, buf + bufLength);
548 auto itrEnd = (const char*)memchr(buf, '{', bufLength);
549
550 if (!itrEnd || itr == itrEnd) return nullptr;
551
552 auto nextElement = (const char*)memchr(itrEnd, '}', bufLength - (itrEnd - buf));
553 if (!nextElement) return nullptr;
554
555 *attrs = itrEnd + 1;
556 *attrsLength = nextElement - *attrs;
557
558 const char *p;
559
560 itrEnd = _simpleXmlUnskipWhiteSpace(itrEnd, itr);
561 if (*(itrEnd - 1) == '.') return nullptr;
562
563 for (p = itr; p < itrEnd; p++) {
564 if (*p == '.') break;
565 }
566
567 if (p == itr) *tag = strdup("all");
568 else *tag = strDuplicate(itr, p - itr);
569
570 if (p == itrEnd) *name = nullptr;
571 else *name = strDuplicate(p + 1, itrEnd - p - 1);
572
573 return (nextElement ? nextElement + 1 : nullptr);
574 }
575
576
simpleXmlFindAttributesTag(const char * buf,unsigned bufLength)577 const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength)
578 {
579 const char *itr = buf, *itrEnd = buf + bufLength;
580
581 for (; itr < itrEnd; itr++) {
582 if (!isspace((unsigned char)*itr)) {
583 //User skip tagname and already gave it the attributes.
584 if (*itr == '=') return buf;
585 } else {
586 itr = _simpleXmlUnskipXmlEntities(itr, buf);
587 if (itr == itrEnd) return nullptr;
588 return itr;
589 }
590 }
591
592 return nullptr;
593 }
594
595 #endif /* LV_USE_THORVG_INTERNAL */
596
597