1 /*************************************************************************** 2 * Copyright (c) 2024 Microsoft Corporation 3 * 4 * This program and the accompanying materials are made available under the 5 * terms of the MIT License which is available at 6 * https://opensource.org/licenses/MIT. 7 * 8 * SPDX-License-Identifier: MIT 9 **************************************************************************/ 10 11 12 /**************************************************************************/ 13 /**************************************************************************/ 14 /** */ 15 /** GUIX Component */ 16 /** */ 17 /** Utility (Utility) */ 18 /** */ 19 /**************************************************************************/ 20 21 #define GX_SOURCE_CODE 22 23 /* Include necessary system files. */ 24 25 #include "gx_api.h" 26 #include "gx_utility.h" 27 28 29 /**************************************************************************/ 30 /* */ 31 /* FUNCTION RELEASE */ 32 /* */ 33 /* _gx_utility_utf8_string_character_get PORTABLE C */ 34 /* 6.1 */ 35 /* AUTHOR */ 36 /* */ 37 /* Kenneth Maxwell, Microsoft Corporation */ 38 /* */ 39 /* DESCRIPTION */ 40 /* */ 41 /* This function parses utf8 string to multibyte glyph. */ 42 /* */ 43 /* INPUT */ 44 /* */ 45 /* utf8_str UTF-8 string */ 46 /* glyph_value Multibyte value of glyph */ 47 /* byte_count Length of UTF-8 string in byte*/ 48 /* glyph_len Length of glyph value in byte */ 49 /* */ 50 /* OUTPUT */ 51 /* */ 52 /* status Completion status */ 53 /* */ 54 /* CALLS */ 55 /* */ 56 /* */ 57 /* CALLED BY */ 58 /* */ 59 /* _gx_system_string_width_get */ 60 /* _gx_display_driver_indexed_color_text_draw */ 61 /* */ 62 /* RELEASE HISTORY */ 63 /* */ 64 /* DATE NAME DESCRIPTION */ 65 /* */ 66 /* 05-19-2020 Kenneth Maxwell Initial Version 6.0 */ 67 /* 09-30-2020 Kenneth Maxwell Modified comment(s), */ 68 /* resulting in version 6.1 */ 69 /* */ 70 /**************************************************************************/ 71 #ifdef GX_UTF8_SUPPORT _gx_utility_utf8_string_character_get(GX_STRING * utf8_str,GX_CHAR_CODE * glyph_value,UINT * glyph_len)72UINT _gx_utility_utf8_string_character_get(GX_STRING *utf8_str, GX_CHAR_CODE *glyph_value, UINT *glyph_len) 73 { 74 GX_CONST GX_CHAR *ch; 75 UINT bytes; 76 GX_CHAR_CODE value = 0; 77 UINT len; 78 UINT byte_count; 79 80 ch = utf8_str -> gx_string_ptr; 81 byte_count = utf8_str -> gx_string_length; 82 83 /* BOM check. */ 84 if ((byte_count >= 3) && (*ch == (char)0xEF) && (*(ch + 1) == (char)0xBB) && (*(ch + 2) == (char)0xBF)) 85 { 86 /* It is BOM. Skip it. */ 87 ch += 3; 88 byte_count -= 3; 89 } 90 91 if (byte_count == 0) 92 { 93 /* Zero length string. Return error. */ 94 if (glyph_value) 95 { 96 *glyph_value = 0; 97 } 98 99 if (glyph_len) 100 { 101 *glyph_len = 0; 102 } 103 return GX_INVALID_VALUE; 104 } 105 106 /* Reset glyph length to 1. */ 107 len = 1; 108 109 /* Check the first byte */ 110 if ((*ch & 0x80) == 0) 111 { 112 113 utf8_str -> gx_string_ptr = ch + 1; 114 utf8_str -> gx_string_length -= 1; 115 116 /* One byte glyph. */ 117 if (glyph_value) 118 { 119 *glyph_value = (*ch & ~0x80) & 0xFF; 120 } 121 122 if (glyph_len) 123 { 124 *glyph_len = 1; 125 } 126 127 return GX_SUCCESS; 128 } 129 else if ((*ch & 0xE0) == 0xC0) 130 { 131 132 /* Two bytes glyph. */ 133 bytes = 2; 134 value = (*ch & ~0xE0) & 0xFF; 135 } 136 else if ((*ch & 0xF0) == 0xE0) 137 { 138 139 /* Three bytes glyph. */ 140 bytes = 3; 141 value = (*ch & ~0xF0) & 0xFF; 142 } 143 else if ((*ch & 0xF8) == 0xF0) 144 { 145 146 /* Four bytes glyph. */ 147 bytes = 4; 148 value = (*ch & ~0xF8) & 0xFF; 149 } 150 else if ((*ch & 0xFC) == 0xF8) 151 { 152 153 /* Five bytes glyph. */ 154 bytes = 5; 155 value = (*ch & ~0xFC) & 0xFF; 156 } 157 else if ((*ch & 0xFE) == 0xFC) 158 { 159 160 /* Six bytes glyph. */ 161 bytes = 6; 162 value = (*ch & ~0xFE) & 0xFF; 163 } 164 else 165 { 166 /* Not a valid utf8 glyph. */ 167 utf8_str -> gx_string_ptr = ch + 1; 168 utf8_str -> gx_string_length -= 1; 169 170 if (glyph_value) 171 { 172 *glyph_value = 0; 173 } 174 175 if (glyph_len) 176 { 177 *glyph_len = 1; 178 } 179 180 return GX_INVALID_VALUE; 181 } 182 183 if (byte_count < bytes) 184 { 185 /* Not a valid utf8 glyph. */ 186 if (glyph_value) 187 { 188 *glyph_value = 0; 189 } 190 191 if (glyph_len) 192 { 193 *glyph_len = bytes; 194 } 195 utf8_str -> gx_string_length -= bytes; 196 197 return GX_INVALID_VALUE; 198 } 199 200 while (len < bytes) 201 { 202 ch++; 203 len++; 204 205 if ((*ch & 0xC0) != 0x80) 206 { 207 208 /* Not a valid utf8 glyph. */ 209 if (glyph_len) 210 { 211 *glyph_len = len; 212 } 213 214 utf8_str -> gx_string_ptr = ch; 215 utf8_str -> gx_string_length -= len; 216 217 return GX_INVALID_VALUE; 218 } 219 220 value = (GX_CHAR_CODE)(value << 6); 221 value = (GX_CHAR_CODE)(value + ((*ch & ~0xC0) & 0xFF)); 222 } 223 224 if (glyph_value) 225 { 226 *glyph_value = value; 227 } 228 229 if (glyph_len) 230 { 231 *glyph_len = len; 232 } 233 234 utf8_str -> gx_string_ptr = ch + 1; 235 utf8_str -> gx_string_length -= len; 236 237 return GX_SUCCESS; 238 } 239 240 #endif /* GX_UTF8_SUPPORT */ 241 242