1 /***************************************************************************
2  * Copyright (c) 2024 Microsoft Corporation
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the MIT License which is available at
6  * https://opensource.org/licenses/MIT.
7  *
8  * SPDX-License-Identifier: MIT
9  **************************************************************************/
10 
11 
12 /**************************************************************************/
13 /**************************************************************************/
14 /**                                                                       */
15 /** GUIX Component                                                        */
16 /**                                                                       */
17 /**   Utility (Utility)                                                   */
18 /**                                                                       */
19 /**************************************************************************/
20 
21 #define GX_SOURCE_CODE
22 
23 /* Include necessary system files.  */
24 
25 #include "gx_api.h"
26 #include "gx_utility.h"
27 
28 
29 /**************************************************************************/
30 /*                                                                        */
31 /*  FUNCTION                                               RELEASE        */
32 /*                                                                        */
33 /*    _gx_utility_utf8_string_character_get               PORTABLE C      */
34 /*                                                           6.1          */
35 /*  AUTHOR                                                                */
36 /*                                                                        */
37 /*    Kenneth Maxwell, Microsoft Corporation                              */
38 /*                                                                        */
39 /*  DESCRIPTION                                                           */
40 /*                                                                        */
41 /*    This function parses utf8 string to multibyte glyph.                */
42 /*                                                                        */
43 /*  INPUT                                                                 */
44 /*                                                                        */
45 /*    utf8_str                              UTF-8 string                  */
46 /*    glyph_value                           Multibyte value of glyph      */
47 /*    byte_count                            Length of UTF-8 string in byte*/
48 /*    glyph_len                             Length of glyph value in byte */
49 /*                                                                        */
50 /*  OUTPUT                                                                */
51 /*                                                                        */
52 /*    status                                Completion status             */
53 /*                                                                        */
54 /*  CALLS                                                                 */
55 /*                                                                        */
56 /*                                                                        */
57 /*  CALLED BY                                                             */
58 /*                                                                        */
59 /*    _gx_system_string_width_get                                         */
60 /*    _gx_display_driver_indexed_color_text_draw                          */
61 /*                                                                        */
62 /*  RELEASE HISTORY                                                       */
63 /*                                                                        */
64 /*    DATE              NAME                      DESCRIPTION             */
65 /*                                                                        */
66 /*  05-19-2020     Kenneth Maxwell          Initial Version 6.0           */
67 /*  09-30-2020     Kenneth Maxwell          Modified comment(s),          */
68 /*                                            resulting in version 6.1    */
69 /*                                                                        */
70 /**************************************************************************/
71 #ifdef GX_UTF8_SUPPORT
_gx_utility_utf8_string_character_get(GX_STRING * utf8_str,GX_CHAR_CODE * glyph_value,UINT * glyph_len)72 UINT  _gx_utility_utf8_string_character_get(GX_STRING *utf8_str, GX_CHAR_CODE *glyph_value, UINT *glyph_len)
73 {
74 GX_CONST GX_CHAR *ch;
75 UINT              bytes;
76 GX_CHAR_CODE      value = 0;
77 UINT              len;
78 UINT              byte_count;
79 
80     ch = utf8_str -> gx_string_ptr;
81     byte_count = utf8_str -> gx_string_length;
82 
83     /* BOM check. */
84     if ((byte_count >= 3) && (*ch == (char)0xEF) && (*(ch + 1) == (char)0xBB) && (*(ch + 2) == (char)0xBF))
85     {
86         /* It is BOM. Skip it. */
87         ch += 3;
88         byte_count -= 3;
89     }
90 
91     if (byte_count == 0)
92     {
93         /* Zero length string. Return error. */
94         if (glyph_value)
95         {
96             *glyph_value = 0;
97         }
98 
99         if (glyph_len)
100         {
101             *glyph_len = 0;
102         }
103         return GX_INVALID_VALUE;
104     }
105 
106     /* Reset glyph length to 1. */
107     len = 1;
108 
109     /* Check the first byte */
110     if ((*ch & 0x80) == 0)
111     {
112 
113         utf8_str -> gx_string_ptr = ch + 1;
114         utf8_str -> gx_string_length -= 1;
115 
116         /* One byte glyph. */
117         if (glyph_value)
118         {
119             *glyph_value = (*ch & ~0x80) & 0xFF;
120         }
121 
122         if (glyph_len)
123         {
124             *glyph_len = 1;
125         }
126 
127         return GX_SUCCESS;
128     }
129     else if ((*ch & 0xE0) == 0xC0)
130     {
131 
132         /* Two bytes glyph. */
133         bytes = 2;
134         value = (*ch & ~0xE0) & 0xFF;
135     }
136     else if ((*ch & 0xF0) == 0xE0)
137     {
138 
139         /* Three bytes glyph. */
140         bytes = 3;
141         value = (*ch & ~0xF0) & 0xFF;
142     }
143     else if ((*ch & 0xF8) == 0xF0)
144     {
145 
146         /* Four bytes glyph. */
147         bytes = 4;
148         value = (*ch & ~0xF8) & 0xFF;
149     }
150     else if ((*ch & 0xFC) == 0xF8)
151     {
152 
153         /* Five bytes glyph. */
154         bytes = 5;
155         value = (*ch & ~0xFC) & 0xFF;
156     }
157     else if ((*ch & 0xFE) == 0xFC)
158     {
159 
160         /* Six bytes glyph. */
161         bytes = 6;
162         value = (*ch & ~0xFE) & 0xFF;
163     }
164     else
165     {
166         /* Not a valid utf8 glyph. */
167         utf8_str -> gx_string_ptr = ch + 1;
168         utf8_str -> gx_string_length -= 1;
169 
170         if (glyph_value)
171         {
172             *glyph_value = 0;
173         }
174 
175         if (glyph_len)
176         {
177             *glyph_len = 1;
178         }
179 
180         return GX_INVALID_VALUE;
181     }
182 
183     if (byte_count < bytes)
184     {
185         /* Not a valid utf8 glyph. */
186         if (glyph_value)
187         {
188             *glyph_value = 0;
189         }
190 
191         if (glyph_len)
192         {
193             *glyph_len = bytes;
194         }
195         utf8_str -> gx_string_length -= bytes;
196 
197         return GX_INVALID_VALUE;
198     }
199 
200     while (len < bytes)
201     {
202         ch++;
203         len++;
204 
205         if ((*ch & 0xC0) != 0x80)
206         {
207 
208             /* Not a valid utf8 glyph. */
209             if (glyph_len)
210             {
211                 *glyph_len = len;
212             }
213 
214             utf8_str -> gx_string_ptr = ch;
215             utf8_str -> gx_string_length -= len;
216 
217             return GX_INVALID_VALUE;
218         }
219 
220         value = (GX_CHAR_CODE)(value << 6);
221         value = (GX_CHAR_CODE)(value + ((*ch & ~0xC0) & 0xFF));
222     }
223 
224     if (glyph_value)
225     {
226         *glyph_value = value;
227     }
228 
229     if (glyph_len)
230     {
231         *glyph_len = len;
232     }
233 
234     utf8_str -> gx_string_ptr = ch + 1;
235     utf8_str -> gx_string_length -= len;
236 
237     return GX_SUCCESS;
238 }
239 
240 #endif /* GX_UTF8_SUPPORT */
241 
242