Lines Matching +full:- +full:c

9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
17 Copyright (c) 2016 Don Lewis <truckman@apache.org>
18 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
19 Copyright (c) 2017 Alexander Bluhm <alexander.bluhm@gmx.net>
20 Copyright (c) 2017 Benbuck Nason <bnason@netflix.com>
21 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
22 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
23 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
24 Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
25 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
26 Copyright (c) 2023 Hanno Böck <hanno@gentoo.org>
86 /* A 2 byte UTF-8 representation splits the characters 11 bits between
95 /* A 3 byte UTF-8 representation splits the characters 16 bits between
107 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
244 # define MINBPC(enc) ((enc)->minBytesPerChar)
251 (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
258 # define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
264 # define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
274 #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
275 #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
278 (AS_NORMAL_ENCODING(enc)->isInvalid##n \
279 && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
282 (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
287 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
289 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
296 # define CHAR_MATCHES(enc, p, c) \ argument
297 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
299 sb_charMatches(const ENCODING *enc, const char *p, int c) { in sb_charMatches() argument
301 return *p == c; in sb_charMatches()
304 /* c is an ASCII character */
305 # define CHAR_MATCHES(enc, p, c) (*(p) == (c)) argument
310 #include "xmltok_impl.c"
335 for (; fromLim > from; fromLim--, walked++) { in _INTERNAL_trim_to_complete_utf8_characters()
336 const unsigned char prev = (unsigned char)fromLim[-1]; in _INTERNAL_trim_to_complete_utf8_characters()
338 == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ in _INTERNAL_trim_to_complete_utf8_characters()
340 fromLim += 4 - 1; in _INTERNAL_trim_to_complete_utf8_characters()
346 == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ in _INTERNAL_trim_to_complete_utf8_characters()
348 fromLim += 3 - 1; in _INTERNAL_trim_to_complete_utf8_characters()
354 == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ in _INTERNAL_trim_to_complete_utf8_characters()
356 fromLim += 2 - 1; in _INTERNAL_trim_to_complete_utf8_characters()
362 == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ in _INTERNAL_trim_to_complete_utf8_characters()
376 const ptrdiff_t bytesAvailable = fromLim - *fromP; in utf8_toUtf8()
377 const ptrdiff_t bytesStorable = toLim - *toP; in utf8_toUtf8()
394 const ptrdiff_t bytesToCopy = fromLim - *fromP; in utf8_toUtf8()
417 if (fromLim - from < 2) { in utf8_toUtf16()
425 if (fromLim - from < 3) { in utf8_toUtf16()
435 if (toLim - to < 2) { in utf8_toUtf16()
439 if (fromLim - from < 4) { in utf8_toUtf16()
445 n -= 0x10000; in utf8_toUtf16()
511 unsigned char c; in latin1_toUtf8() local
514 c = (unsigned char)**fromP; in latin1_toUtf8()
515 if (c & 0x80) { in latin1_toUtf8()
516 if (toLim - *toP < 2) in latin1_toUtf8()
518 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); in latin1_toUtf8()
519 *(*toP)++ = (char)((c & 0x3f) | 0x80); in latin1_toUtf8()
602 /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ in unicode_byte_type()
608 /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ in unicode_byte_type()
616 case 0xFF: /* noncharacter-FFFF */ in unicode_byte_type()
617 case 0xFE: /* noncharacter-FFFE */ in unicode_byte_type()
631 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
655 if (toLim - *toP < 2) { \
663 if (toLim - *toP < 3) { \
676 if (toLim - *toP < 4) { \
680 if (fromLim - from < 4) { \
708 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
710 if (fromLim - *fromP > ((toLim - *toP) << 1) \
711 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
712 fromLim -= 2; \
743 #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) in DEFINE_UTF16_TO_UTF16()
744 #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c)) in DEFINE_UTF16_TO_UTF16() argument
764 little2_charMatches(const ENCODING *enc, const char *p, int c) { in little2_charMatches() argument
766 return LITTLE2_CHAR_MATCHES(p, c); in little2_charMatches()
792 # define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
799 # include "xmltok_impl.c"
876 #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
877 #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c)) argument
897 big2_charMatches(const ENCODING *enc, const char *p, int c) { in big2_charMatches() argument
899 return BIG2_CHAR_MATCHES(p, c); in big2_charMatches()
925 # define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) argument
932 # include "xmltok_impl.c"
1015 c1 += ASCII_A - ASCII_a; in streqci()
1019 * upper-case strings into s2. in streqci()
1021 c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ in streqci()
1043 return -1; in toAscii()
1049 isSpace(int c) { in isSpace() argument
1050 switch (c) { in isSpace()
1067 int c; in parsePseudoAttribute() local
1078 ptr += enc->minBytesPerChar; in parsePseudoAttribute()
1086 c = toAscii(enc, ptr, end); in parsePseudoAttribute()
1087 if (c == -1) { in parsePseudoAttribute()
1091 if (c == ASCII_EQUALS) { in parsePseudoAttribute()
1095 if (isSpace(c)) { in parsePseudoAttribute()
1098 ptr += enc->minBytesPerChar; in parsePseudoAttribute()
1099 } while (isSpace(c = toAscii(enc, ptr, end))); in parsePseudoAttribute()
1100 if (c != ASCII_EQUALS) { in parsePseudoAttribute()
1106 ptr += enc->minBytesPerChar; in parsePseudoAttribute()
1112 ptr += enc->minBytesPerChar; in parsePseudoAttribute()
1113 c = toAscii(enc, ptr, end); in parsePseudoAttribute()
1114 while (isSpace(c)) { in parsePseudoAttribute()
1115 ptr += enc->minBytesPerChar; in parsePseudoAttribute()
1116 c = toAscii(enc, ptr, end); in parsePseudoAttribute()
1118 if (c != ASCII_QUOT && c != ASCII_APOS) { in parsePseudoAttribute()
1122 open = (char)c; in parsePseudoAttribute()
1123 ptr += enc->minBytesPerChar; in parsePseudoAttribute()
1125 for (;; ptr += enc->minBytesPerChar) { in parsePseudoAttribute()
1126 c = toAscii(enc, ptr, end); in parsePseudoAttribute()
1127 if (c == open) in parsePseudoAttribute()
1129 if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z) in parsePseudoAttribute()
1130 && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD in parsePseudoAttribute()
1131 && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { in parsePseudoAttribute()
1136 *nextTokPtr = ptr + enc->minBytesPerChar; in parsePseudoAttribute()
1164 ptr += 5 * enc->minBytesPerChar; in doParseXmlDecl()
1165 end -= 2 * enc->minBytesPerChar; in doParseXmlDecl()
1195 int c = toAscii(enc, val, end); in doParseXmlDecl() local
1196 if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) { in doParseXmlDecl()
1203 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); in doParseXmlDecl()
1216 if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { in doParseXmlDecl()
1219 } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { in doParseXmlDecl()
1227 ptr += enc->minBytesPerChar; in doParseXmlDecl()
1246 return -1; in checkCharRefNumber()
1249 return -1; in checkCharRefNumber()
1253 return -1; in checkCharRefNumber()
1260 XmlUtf8Encode(int c, char *buf) { in XmlUtf8Encode() argument
1268 if (c < 0) in XmlUtf8Encode()
1270 if (c < min2) { in XmlUtf8Encode()
1271 buf[0] = (char)(c | UTF8_cval1); in XmlUtf8Encode()
1274 if (c < min3) { in XmlUtf8Encode()
1275 buf[0] = (char)((c >> 6) | UTF8_cval2); in XmlUtf8Encode()
1276 buf[1] = (char)((c & 0x3f) | 0x80); in XmlUtf8Encode()
1279 if (c < min4) { in XmlUtf8Encode()
1280 buf[0] = (char)((c >> 12) | UTF8_cval3); in XmlUtf8Encode()
1281 buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); in XmlUtf8Encode()
1282 buf[2] = (char)((c & 0x3f) | 0x80); in XmlUtf8Encode()
1285 if (c < 0x110000) { in XmlUtf8Encode()
1286 buf[0] = (char)((c >> 18) | UTF8_cval4); in XmlUtf8Encode()
1287 buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); in XmlUtf8Encode()
1288 buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); in XmlUtf8Encode()
1289 buf[3] = (char)((c & 0x3f) | 0x80); in XmlUtf8Encode()
1304 charNum -= 0x10000; in XmlUtf16Encode()
1330 int c = uenc->convert(uenc->userData, p); in unknown_isName() local
1331 if (c & ~0xFFFF) in unknown_isName()
1333 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); in unknown_isName()
1339 int c = uenc->convert(uenc->userData, p); in unknown_isNmstrt() local
1340 if (c & ~0xFFFF) in unknown_isNmstrt()
1342 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); in unknown_isNmstrt()
1348 int c = uenc->convert(uenc->userData, p); in unknown_isInvalid() local
1349 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; in unknown_isInvalid()
1362 utf8 = uenc->utf8[(unsigned char)**fromP]; in unknown_toUtf8()
1365 int c = uenc->convert(uenc->userData, *fromP); in unknown_toUtf8() local
1366 n = XmlUtf8Encode(c, buf); in unknown_toUtf8()
1367 if (n > toLim - *toP) in unknown_toUtf8()
1370 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] in unknown_toUtf8()
1371 - (BT_LEAD2 - 2)); in unknown_toUtf8()
1373 if (n > toLim - *toP) in unknown_toUtf8()
1387 unsigned short c = uenc->utf16[(unsigned char)**fromP]; in unknown_toUtf16() local
1388 if (c == 0) { in unknown_toUtf16()
1389 c = (unsigned short)uenc->convert(uenc->userData, *fromP); in unknown_toUtf16()
1390 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] in unknown_toUtf16()
1391 - (BT_LEAD2 - 2)); in unknown_toUtf16()
1394 *(*toP)++ = c; in unknown_toUtf16()
1414 int c = table[i]; in XmlInitUnknownEncoding() local
1415 if (c == -1) { in XmlInitUnknownEncoding()
1416 e->normal.type[i] = BT_MALFORM; in XmlInitUnknownEncoding()
1418 e->utf16[i] = 0xFFFF; in XmlInitUnknownEncoding()
1419 e->utf8[i][0] = 1; in XmlInitUnknownEncoding()
1420 e->utf8[i][1] = 0; in XmlInitUnknownEncoding()
1421 } else if (c < 0) { in XmlInitUnknownEncoding()
1422 if (c < -4) in XmlInitUnknownEncoding()
1424 /* Multi-byte sequences need a converter function */ in XmlInitUnknownEncoding()
1427 e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); in XmlInitUnknownEncoding()
1428 e->utf8[i][0] = 0; in XmlInitUnknownEncoding()
1429 e->utf16[i] = 0; in XmlInitUnknownEncoding()
1430 } else if (c < 0x80) { in XmlInitUnknownEncoding()
1431 if (latin1_encoding.type[c] != BT_OTHER in XmlInitUnknownEncoding()
1432 && latin1_encoding.type[c] != BT_NONXML && c != i) in XmlInitUnknownEncoding()
1434 e->normal.type[i] = latin1_encoding.type[c]; in XmlInitUnknownEncoding()
1435 e->utf8[i][0] = 1; in XmlInitUnknownEncoding()
1436 e->utf8[i][1] = (char)c; in XmlInitUnknownEncoding()
1437 e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); in XmlInitUnknownEncoding()
1438 } else if (checkCharRefNumber(c) < 0) { in XmlInitUnknownEncoding()
1439 e->normal.type[i] = BT_NONXML; in XmlInitUnknownEncoding()
1441 e->utf16[i] = 0xFFFF; in XmlInitUnknownEncoding()
1442 e->utf8[i][0] = 1; in XmlInitUnknownEncoding()
1443 e->utf8[i][1] = 0; in XmlInitUnknownEncoding()
1445 if (c > 0xFFFF) in XmlInitUnknownEncoding()
1447 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) in XmlInitUnknownEncoding()
1448 e->normal.type[i] = BT_NMSTRT; in XmlInitUnknownEncoding()
1449 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) in XmlInitUnknownEncoding()
1450 e->normal.type[i] = BT_NAME; in XmlInitUnknownEncoding()
1452 e->normal.type[i] = BT_OTHER; in XmlInitUnknownEncoding()
1453 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); in XmlInitUnknownEncoding()
1454 e->utf16[i] = (unsigned short)c; in XmlInitUnknownEncoding()
1457 e->userData = userData; in XmlInitUnknownEncoding()
1458 e->convert = convert; in XmlInitUnknownEncoding()
1460 e->normal.isName2 = unknown_isName; in XmlInitUnknownEncoding()
1461 e->normal.isName3 = unknown_isName; in XmlInitUnknownEncoding()
1462 e->normal.isName4 = unknown_isName; in XmlInitUnknownEncoding()
1463 e->normal.isNmstrt2 = unknown_isNmstrt; in XmlInitUnknownEncoding()
1464 e->normal.isNmstrt3 = unknown_isNmstrt; in XmlInitUnknownEncoding()
1465 e->normal.isNmstrt4 = unknown_isNmstrt; in XmlInitUnknownEncoding()
1466 e->normal.isInvalid2 = unknown_isInvalid; in XmlInitUnknownEncoding()
1467 e->normal.isInvalid3 = unknown_isInvalid; in XmlInitUnknownEncoding()
1468 e->normal.isInvalid4 = unknown_isInvalid; in XmlInitUnknownEncoding()
1470 e->normal.enc.utf8Convert = unknown_toUtf8; in XmlInitUnknownEncoding()
1471 e->normal.enc.utf16Convert = unknown_toUtf16; in XmlInitUnknownEncoding()
1472 return &(e->normal.enc); in XmlInitUnknownEncoding()
1478 UNKNOWN_ENC = -1,
1524 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1525 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1541 encPtr = enc->encPtr; in initScan()
1543 /* only a single byte available for auto-detection */ in initScan()
1545 /* a well-formed document entity must have more than one byte */ in initScan()
1550 /* if UTF-16 was externally specified, then we need at least 2 bytes */ in initScan()
1560 case 0xEF: /* possibly first byte of UTF-8 BOM */ in initScan()
1576 /* 00 3C is handled in the default case */ in initScan()
1591 /* Maybe a UTF-8 BOM (EF BB BF) */ in initScan()
1593 of ISO-8859-1 or some flavour of UTF-16 in initScan()
1616 way this can fail to be big-endian UTF-16 if it it's an in initScan()
1618 UTF-16LE. in initScan()
1626 - parsing an external entity in initScan()
1627 - second byte is 0 in initScan()
1628 - no externally specified encoding in initScan()
1629 - no encoding declaration in initScan()
1630 by assuming UTF-16LE. But we don't, because this would mean when in initScan()
1649 #include "xmltok_ns.c"
1660 # include "xmltok_ns.c"
1671 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; in XmlInitUnknownEncodingNS()