1 /*
2  * Copyright (c) 2020 - 2024 the ThorVG project. All rights reserved.
3 
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10 
11  * The above copyright notice and this permission notice shall be included in all
12  * copies or substantial portions of the Software.
13 
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "../../lv_conf_internal.h"
24 #if LV_USE_THORVG_INTERNAL
25 
26 #include <cstring>
27 #include <ctype.h>
28 #include <string>
29 
30 #ifdef _WIN32
31     #include <malloc.h>
32 #elif defined(__linux__)
33     #include <alloca.h>
34 #else
35     #include <stdlib.h>
36 #endif
37 
38 #include "tvgXmlParser.h"
39 #include "tvgStr.h"
40 
41 /************************************************************************/
42 /* Internal Class Implementation                                        */
43 /************************************************************************/
44 
_isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char * tagAttribute,TVG_UNUSED const char * tagValue)45 bool _isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char* tagAttribute, TVG_UNUSED const char* tagValue)
46 {
47 #ifdef THORVG_LOG_ENABLED
48     const auto attributesNum = 6;
49     const struct
50     {
51         const char* tag;
52         bool tagWildcard; //If true, it is assumed that a wildcard is used after the tag. (ex: tagName*)
53         const char* value;
54     } attributes[] = {
55         {"id", false, nullptr},
56         {"data-name", false, nullptr},
57         {"overflow", false, "visible"},
58         {"version", false, nullptr},
59         {"xmlns", true, nullptr},
60         {"xml:space", false, nullptr},
61     };
62 
63     for (unsigned int i = 0; i < attributesNum; ++i) {
64         if (!strncmp(tagAttribute, attributes[i].tag, attributes[i].tagWildcard ? strlen(attributes[i].tag) : strlen(tagAttribute))) {
65             if (attributes[i].value && tagValue) {
66                 if (!strncmp(tagValue, attributes[i].value, strlen(tagValue))) {
67                     return true;
68                 } else continue;
69             }
70             return true;
71         }
72     }
73     return false;
74 #endif
75     return true;
76 }
77 
78 
_simpleXmlFindWhiteSpace(const char * itr,const char * itrEnd)79 static const char* _simpleXmlFindWhiteSpace(const char* itr, const char* itrEnd)
80 {
81     for (; itr < itrEnd; itr++) {
82         if (isspace((unsigned char)*itr)) break;
83     }
84     return itr;
85 }
86 
87 
_simpleXmlSkipWhiteSpace(const char * itr,const char * itrEnd)88 static const char* _simpleXmlSkipWhiteSpace(const char* itr, const char* itrEnd)
89 {
90     for (; itr < itrEnd; itr++) {
91         if (!isspace((unsigned char)*itr)) break;
92     }
93     return itr;
94 }
95 
96 
_simpleXmlUnskipWhiteSpace(const char * itr,const char * itrStart)97 static const char* _simpleXmlUnskipWhiteSpace(const char* itr, const char* itrStart)
98 {
99     for (itr--; itr > itrStart; itr--) {
100         if (!isspace((unsigned char)*itr)) break;
101     }
102     return itr + 1;
103 }
104 
105 
_simpleXmlSkipXmlEntities(const char * itr,const char * itrEnd)106 static const char* _simpleXmlSkipXmlEntities(const char* itr, const char* itrEnd)
107 {
108     auto p = itr;
109     while (itr < itrEnd && *itr == '&') {
110         for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
111             if (strncmp(itr, xmlEntity[i], xmlEntityLength[i]) == 0) {
112                 itr += xmlEntityLength[i];
113                 break;
114             }
115         }
116         if (itr == p) break;
117         p = itr;
118     }
119     return itr;
120 }
121 
122 
_simpleXmlUnskipXmlEntities(const char * itr,const char * itrStart)123 static const char* _simpleXmlUnskipXmlEntities(const char* itr, const char* itrStart)
124 {
125     auto p = itr;
126     while (itr > itrStart && *(itr - 1) == ';') {
127         for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
128             if (itr - xmlEntityLength[i] > itrStart &&
129                 strncmp(itr - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) {
130                 itr -= xmlEntityLength[i];
131                 break;
132             }
133         }
134         if (itr == p) break;
135         p = itr;
136     }
137     return itr;
138 }
139 
140 
_skipWhiteSpacesAndXmlEntities(const char * itr,const char * itrEnd)141 static const char* _skipWhiteSpacesAndXmlEntities(const char* itr, const char* itrEnd)
142 {
143     itr = _simpleXmlSkipWhiteSpace(itr, itrEnd);
144     auto p = itr;
145     while (true) {
146         if (p != (itr = _simpleXmlSkipXmlEntities(itr, itrEnd))) p = itr;
147         else break;
148         if (p != (itr = _simpleXmlSkipWhiteSpace(itr, itrEnd))) p = itr;
149         else break;
150     }
151     return itr;
152 }
153 
154 
_unskipWhiteSpacesAndXmlEntities(const char * itr,const char * itrStart)155 static const char* _unskipWhiteSpacesAndXmlEntities(const char* itr, const char* itrStart)
156 {
157     itr = _simpleXmlUnskipWhiteSpace(itr, itrStart);
158     auto p = itr;
159     while (true) {
160         if (p != (itr = _simpleXmlUnskipXmlEntities(itr, itrStart))) p = itr;
161         else break;
162         if (p != (itr = _simpleXmlUnskipWhiteSpace(itr, itrStart))) p = itr;
163         else break;
164     }
165     return itr;
166 }
167 
168 
_simpleXmlFindStartTag(const char * itr,const char * itrEnd)169 static const char* _simpleXmlFindStartTag(const char* itr, const char* itrEnd)
170 {
171     return (const char*)memchr(itr, '<', itrEnd - itr);
172 }
173 
174 
_simpleXmlFindEndTag(const char * itr,const char * itrEnd)175 static const char* _simpleXmlFindEndTag(const char* itr, const char* itrEnd)
176 {
177     bool insideQuote[2] = {false, false}; // 0: ", 1: '
178     for (; itr < itrEnd; itr++) {
179         if (*itr == '"' && !insideQuote[1]) insideQuote[0] = !insideQuote[0];
180         if (*itr == '\'' && !insideQuote[0]) insideQuote[1] = !insideQuote[1];
181         if (!insideQuote[0] && !insideQuote[1]) {
182             if ((*itr == '>') || (*itr == '<'))
183                 return itr;
184         }
185     }
186     return nullptr;
187 }
188 
189 
_simpleXmlFindEndCommentTag(const char * itr,const char * itrEnd)190 static const char* _simpleXmlFindEndCommentTag(const char* itr, const char* itrEnd)
191 {
192     for (; itr < itrEnd; itr++) {
193         if ((*itr == '-') && ((itr + 1 < itrEnd) && (*(itr + 1) == '-')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
194     }
195     return nullptr;
196 }
197 
198 
_simpleXmlFindEndCdataTag(const char * itr,const char * itrEnd)199 static const char* _simpleXmlFindEndCdataTag(const char* itr, const char* itrEnd)
200 {
201     for (; itr < itrEnd; itr++) {
202         if ((*itr == ']') && ((itr + 1 < itrEnd) && (*(itr + 1) == ']')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
203     }
204     return nullptr;
205 }
206 
207 
_simpleXmlFindDoctypeChildEndTag(const char * itr,const char * itrEnd)208 static const char* _simpleXmlFindDoctypeChildEndTag(const char* itr, const char* itrEnd)
209 {
210     for (; itr < itrEnd; itr++) {
211         if (*itr == '>') return itr;
212     }
213     return nullptr;
214 }
215 
216 
_getXMLType(const char * itr,const char * itrEnd,size_t & toff)217 static SimpleXMLType _getXMLType(const char* itr, const char* itrEnd, size_t &toff)
218 {
219     toff = 0;
220     if (itr[1] == '/') {
221         toff = 1;
222         return SimpleXMLType::Close;
223     } else if (itr[1] == '?') {
224         toff = 1;
225         return SimpleXMLType::Processing;
226     } else if (itr[1] == '!') {
227         if ((itr + sizeof("<!DOCTYPE>") - 1 < itrEnd) && (!memcmp(itr + 2, "DOCTYPE", sizeof("DOCTYPE") - 1)) && ((itr[2 + sizeof("DOCTYPE") - 1] == '>') || (isspace((unsigned char)itr[2 + sizeof("DOCTYPE") - 1])))) {
228             toff = sizeof("!DOCTYPE") - 1;
229             return SimpleXMLType::Doctype;
230         } else if ((itr + sizeof("<![CDATA[]]>") - 1 < itrEnd) && (!memcmp(itr + 2, "[CDATA[", sizeof("[CDATA[") - 1))) {
231             toff = sizeof("![CDATA[") - 1;
232             return SimpleXMLType::CData;
233         } else if ((itr + sizeof("<!---->") - 1 < itrEnd) && (!memcmp(itr + 2, "--", sizeof("--") - 1))) {
234             toff = sizeof("!--") - 1;
235             return SimpleXMLType::Comment;
236         } else if (itr + sizeof("<!>") - 1 < itrEnd) {
237             toff = sizeof("!") - 1;
238             return SimpleXMLType::DoctypeChild;
239         }
240         return SimpleXMLType::Open;
241     }
242     return SimpleXMLType::Open;
243 }
244 
245 
246 /************************************************************************/
247 /* External Class Implementation                                        */
248 /************************************************************************/
249 
simpleXmlNodeTypeToString(TVG_UNUSED SvgNodeType type)250 const char* simpleXmlNodeTypeToString(TVG_UNUSED SvgNodeType type)
251 {
252 #ifdef THORVG_LOG_ENABLED
253     static const char* TYPE_NAMES[] = {
254         "Svg",
255         "G",
256         "Defs",
257         "Animation",
258         "Arc",
259         "Circle",
260         "Ellipse",
261         "Image",
262         "Line",
263         "Path",
264         "Polygon",
265         "Polyline",
266         "Rect",
267         "Text",
268         "TextArea",
269         "Tspan",
270         "Use",
271         "Video",
272         "ClipPath",
273         "Mask",
274         "Symbol",
275         "Unknown",
276     };
277     return TYPE_NAMES[(int) type];
278 #endif
279     return nullptr;
280 }
281 
282 
isIgnoreUnsupportedLogElements(TVG_UNUSED const char * tagName)283 bool isIgnoreUnsupportedLogElements(TVG_UNUSED const char* tagName)
284 {
285 #ifdef THORVG_LOG_ENABLED
286     const auto elementsNum = 1;
287     const char* const elements[] = { "title" };
288 
289     for (unsigned int i = 0; i < elementsNum; ++i) {
290         if (!strncmp(tagName, elements[i], strlen(tagName))) {
291             return true;
292         }
293     }
294     return false;
295 #else
296     return true;
297 #endif
298 }
299 
300 
simpleXmlParseAttributes(const char * buf,unsigned bufLength,simpleXMLAttributeCb func,const void * data)301 bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
302 {
303     const char *itr = buf, *itrEnd = buf + bufLength;
304     char* tmpBuf = (char*)malloc(bufLength + 1);
305 
306     if (!buf || !func || !tmpBuf) goto error;
307 
308     while (itr < itrEnd) {
309         const char* p = _skipWhiteSpacesAndXmlEntities(itr, itrEnd);
310         const char *key, *keyEnd, *value, *valueEnd;
311         char* tval;
312 
313         if (p == itrEnd) goto success;
314 
315         key = p;
316         for (keyEnd = key; keyEnd < itrEnd; keyEnd++) {
317             if ((*keyEnd == '=') || (isspace((unsigned char)*keyEnd))) break;
318         }
319         if (keyEnd == itrEnd) goto error;
320         if (keyEnd == key) {  // There is no key. This case is invalid, but explores the following syntax.
321             itr = keyEnd + 1;
322             continue;
323         }
324 
325         if (*keyEnd == '=') value = keyEnd + 1;
326         else {
327             value = (const char*)memchr(keyEnd, '=', itrEnd - keyEnd);
328             if (!value) goto error;
329             value++;
330         }
331         keyEnd = _simpleXmlUnskipXmlEntities(keyEnd, key);
332 
333         value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
334         if (value == itrEnd) goto error;
335 
336         if ((*value == '"') || (*value == '\'')) {
337             valueEnd = (const char*)memchr(value + 1, *value, itrEnd - value);
338             if (!valueEnd) goto error;
339             value++;
340         } else {
341             valueEnd = _simpleXmlFindWhiteSpace(value, itrEnd);
342         }
343 
344         itr = valueEnd + 1;
345 
346         value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
347         valueEnd = _unskipWhiteSpacesAndXmlEntities(valueEnd, value);
348 
349         memcpy(tmpBuf, key, keyEnd - key);
350         tmpBuf[keyEnd - key] = '\0';
351 
352         tval = tmpBuf + (keyEnd - key) + 1;
353         int i = 0;
354         while (value < valueEnd) {
355             value = _simpleXmlSkipXmlEntities(value, valueEnd);
356             tval[i++] = *value;
357             value++;
358         }
359         tval[i] = '\0';
360 
361         if (!func((void*)data, tmpBuf, tval)) {
362             if (!_isIgnoreUnsupportedLogAttributes(tmpBuf, tval)) {
363                 TVGLOG("SVG", "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID", tmpBuf, tval ? tval : "NONE");
364             }
365         }
366     }
367 
368 success:
369     free(tmpBuf);
370     return true;
371 
372 error:
373     free(tmpBuf);
374     return false;
375 }
376 
377 
simpleXmlParse(const char * buf,unsigned bufLength,bool strip,simpleXMLCb func,const void * data)378 bool simpleXmlParse(const char* buf, unsigned bufLength, bool strip, simpleXMLCb func, const void* data)
379 {
380     const char *itr = buf, *itrEnd = buf + bufLength;
381 
382     if (!buf || !func) return false;
383 
384     while (itr < itrEnd) {
385         if (itr[0] == '<') {
386             //Invalid case
387             if (itr + 1 >= itrEnd) return false;
388 
389             size_t toff = 0;
390             SimpleXMLType type = _getXMLType(itr, itrEnd, toff);
391 
392             const char* p;
393             if (type == SimpleXMLType::CData) p = _simpleXmlFindEndCdataTag(itr + 1 + toff, itrEnd);
394             else if (type == SimpleXMLType::DoctypeChild) p = _simpleXmlFindDoctypeChildEndTag(itr + 1 + toff, itrEnd);
395             else if (type == SimpleXMLType::Comment) p = _simpleXmlFindEndCommentTag(itr + 1 + toff, itrEnd);
396             else p = _simpleXmlFindEndTag(itr + 1 + toff, itrEnd);
397 
398             if (p) {
399                 //Invalid case: '<' nested
400                 if (*p == '<' && type != SimpleXMLType::Doctype) return false;
401                 const char *start, *end;
402 
403                 start = itr + 1 + toff;
404                 end = p;
405 
406                 switch (type) {
407                     case SimpleXMLType::Open: {
408                         if (p[-1] == '/') {
409                             type = SimpleXMLType::OpenEmpty;
410                             end--;
411                         }
412                         break;
413                     }
414                     case SimpleXMLType::CData: {
415                         if (!memcmp(p - 2, "]]", 2)) end -= 2;
416                         break;
417                     }
418                     case SimpleXMLType::Processing: {
419                         if (p[-1] == '?') end--;
420                         break;
421                     }
422                     case SimpleXMLType::Comment: {
423                         if (!memcmp(p - 2, "--", 2)) end -= 2;
424                         break;
425                     }
426                     default: {
427                         break;
428                     }
429                 }
430 
431                 if (strip && (type != SimpleXMLType::CData)) {
432                     start = _skipWhiteSpacesAndXmlEntities(start, end);
433                     end = _unskipWhiteSpacesAndXmlEntities(end, start);
434                 }
435 
436                 if (!func((void*)data, type, start, (unsigned int)(end - start))) return false;
437 
438                 itr = p + 1;
439             } else {
440                 return false;
441             }
442         } else {
443             const char *p, *end;
444 
445             if (strip) {
446                 p = itr;
447                 p = _skipWhiteSpacesAndXmlEntities(p, itrEnd);
448                 if (p) {
449                     if (!func((void*)data, SimpleXMLType::Ignored, itr, (unsigned int)(p - itr))) return false;
450                     itr = p;
451                 }
452             }
453 
454             p = _simpleXmlFindStartTag(itr, itrEnd);
455             if (!p) p = itrEnd;
456 
457             end = p;
458             if (strip) end = _unskipWhiteSpacesAndXmlEntities(end, itr);
459 
460             if (itr != end && !func((void*)data, SimpleXMLType::Data, itr, (unsigned int)(end - itr))) return false;
461 
462             if (strip && (end < p) && !func((void*)data, SimpleXMLType::Ignored, end, (unsigned int)(p - end))) return false;
463 
464             itr = p;
465         }
466     }
467     return true;
468 }
469 
470 
simpleXmlParseW3CAttribute(const char * buf,unsigned bufLength,simpleXMLAttributeCb func,const void * data)471 bool simpleXmlParseW3CAttribute(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
472 {
473     const char* end;
474     char* key;
475     char* val;
476     char* next;
477 
478     if (!buf) return false;
479 
480     end = buf + bufLength;
481     key = (char*)alloca(end - buf + 1);
482     val = (char*)alloca(end - buf + 1);
483 
484     if (buf == end) return true;
485 
486     do {
487         char* sep = (char*)strchr(buf, ':');
488         next = (char*)strchr(buf, ';');
489         if (sep >= end) {
490             next = nullptr;
491             sep = nullptr;
492         }
493         if (next >= end) next = nullptr;
494 
495         key[0] = '\0';
496         val[0] = '\0';
497 
498         if (sep != nullptr && next == nullptr) {
499             memcpy(key, buf, sep - buf);
500             key[sep - buf] = '\0';
501 
502             memcpy(val, sep + 1, end - sep - 1);
503             val[end - sep - 1] = '\0';
504         } else if (sep != nullptr && sep < next) {
505             memcpy(key, buf, sep - buf);
506             key[sep - buf] = '\0';
507 
508             memcpy(val, sep + 1, next - sep - 1);
509             val[next - sep - 1] = '\0';
510         } else if (next) {
511             memcpy(key, buf, next - buf);
512             key[next - buf] = '\0';
513         }
514 
515         if (key[0]) {
516             key = const_cast<char*>(_simpleXmlSkipWhiteSpace(key, key + strlen(key)));
517             key[_simpleXmlUnskipWhiteSpace(key + strlen(key) , key) - key] = '\0';
518             val = const_cast<char*>(_simpleXmlSkipWhiteSpace(val, val + strlen(val)));
519             val[_simpleXmlUnskipWhiteSpace(val + strlen(val) , val) - val] = '\0';
520 
521             if (!func((void*)data, key, val)) {
522                 if (!_isIgnoreUnsupportedLogAttributes(key, val)) {
523                     TVGLOG("SVG", "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID", key, val ? val : "NONE");
524                 }
525             }
526         }
527 
528         if (!next) break;
529         buf = next + 1;
530     } while (true);
531 
532     return true;
533 }
534 
535 
536 /*
537  * Supported formats:
538  * tag {}, .name {}, tag.name{}
539  */
simpleXmlParseCSSAttribute(const char * buf,unsigned bufLength,char ** tag,char ** name,const char ** attrs,unsigned * attrsLength)540 const char* simpleXmlParseCSSAttribute(const char* buf, unsigned bufLength, char** tag, char** name, const char** attrs, unsigned* attrsLength)
541 {
542     if (!buf) return nullptr;
543 
544     *tag = *name = nullptr;
545     *attrsLength = 0;
546 
547     auto itr = _simpleXmlSkipWhiteSpace(buf, buf + bufLength);
548     auto itrEnd = (const char*)memchr(buf, '{', bufLength);
549 
550     if (!itrEnd || itr == itrEnd) return nullptr;
551 
552     auto nextElement = (const char*)memchr(itrEnd, '}', bufLength - (itrEnd - buf));
553     if (!nextElement) return nullptr;
554 
555     *attrs = itrEnd + 1;
556     *attrsLength = nextElement - *attrs;
557 
558     const char *p;
559 
560     itrEnd = _simpleXmlUnskipWhiteSpace(itrEnd, itr);
561     if (*(itrEnd - 1) == '.') return nullptr;
562 
563     for (p = itr; p < itrEnd; p++) {
564         if (*p == '.') break;
565     }
566 
567     if (p == itr) *tag = strdup("all");
568     else *tag = strDuplicate(itr, p - itr);
569 
570     if (p == itrEnd) *name = nullptr;
571     else *name = strDuplicate(p + 1, itrEnd - p - 1);
572 
573     return (nextElement ? nextElement + 1 : nullptr);
574 }
575 
576 
simpleXmlFindAttributesTag(const char * buf,unsigned bufLength)577 const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength)
578 {
579     const char *itr = buf, *itrEnd = buf + bufLength;
580 
581     for (; itr < itrEnd; itr++) {
582         if (!isspace((unsigned char)*itr)) {
583             //User skip tagname and already gave it the attributes.
584             if (*itr == '=') return buf;
585         } else {
586             itr = _simpleXmlUnskipXmlEntities(itr, buf);
587             if (itr == itrEnd) return nullptr;
588             return itr;
589         }
590     }
591 
592     return nullptr;
593 }
594 
595 #endif /* LV_USE_THORVG_INTERNAL */
596 
597