1 /*
2 Copyright (c) 1990 Regents of the University of California.
3 All rights reserved.
4 */
5 #include <stdlib.h>
6 #include <locale.h>
7 #include "mbctype.h"
8 #include <wchar.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include "local.h"
13
14 int
__ascii_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)15 __ascii_mbtowc (
16 wchar_t *pwc,
17 const char *s,
18 size_t n,
19 mbstate_t *state)
20 {
21 wchar_t dummy;
22 unsigned char *t = (unsigned char *)s;
23
24 (void) state;
25 if (pwc == NULL)
26 pwc = &dummy;
27
28 if (s == NULL)
29 return 0;
30
31 if (n == 0)
32 return -2;
33
34 *pwc = (wchar_t)*t;
35
36 if (*t == '\0')
37 return 0;
38
39 return 1;
40 }
41
42 #ifdef _MB_CAPABLE
43 typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
44 NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
45 typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
46 INV, JIS_S_NUM } JIS_STATE;
47 typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
48
49 /**************************************************************************************
50 * state/action tables for processing JIS encoding
51 * Where possible, switches to JIS are grouped with proceding JIS characters and switches
52 * to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
53 * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
54 *************************************************************************************/
55
56 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
57 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
58 /* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
59 /* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
60 /* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
61 /* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
62 /* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
63 /* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
64 /* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
65 };
66
67 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
68 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
69 /* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
70 /* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
71 /* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
72 /* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
73 /* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
74 /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
75 /* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
76 };
77
78 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
79 #define __state __count
80
81 #ifdef _MB_EXTENDED_CHARSETS_ISO
82 static int
___iso_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int iso_idx,mbstate_t * state)83 ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
84 int iso_idx, mbstate_t *state)
85 {
86 wchar_t dummy;
87 unsigned char *t = (unsigned char *)s;
88
89 if (pwc == NULL)
90 pwc = &dummy;
91
92 if (s == NULL)
93 return 0;
94
95 if (n == 0)
96 return -2;
97
98 if (*t >= 0xa0)
99 {
100 if (iso_idx >= 0)
101 {
102 *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
103 if (*pwc == 0) /* Invalid character */
104 {
105 _REENT_ERRNO(r) = EILSEQ;
106 return -1;
107 }
108 return 1;
109 }
110 }
111
112 *pwc = (wchar_t) *t;
113
114 if (*t == '\0')
115 return 0;
116
117 return 1;
118 }
119
120 static int
__iso_8859_1_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)121 __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
122 mbstate_t *state)
123 {
124 return ___iso_mbtowc (r, pwc, s, n, -1, state);
125 }
126
127 static int
__iso_8859_2_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)128 __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
129 mbstate_t *state)
130 {
131 return ___iso_mbtowc (r, pwc, s, n, 0, state);
132 }
133
134 static int
__iso_8859_3_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)135 __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
136 mbstate_t *state)
137 {
138 return ___iso_mbtowc (r, pwc, s, n, 1, state);
139 }
140
141 static int
__iso_8859_4_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)142 __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
143 mbstate_t *state)
144 {
145 return ___iso_mbtowc (r, pwc, s, n, 2, state);
146 }
147
148 static int
__iso_8859_5_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)149 __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
150 mbstate_t *state)
151 {
152 return ___iso_mbtowc (r, pwc, s, n, 3, state);
153 }
154
155 static int
__iso_8859_6_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)156 __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
157 mbstate_t *state)
158 {
159 return ___iso_mbtowc (r, pwc, s, n, 4, state);
160 }
161
162 static int
__iso_8859_7_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)163 __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
164 mbstate_t *state)
165 {
166 return ___iso_mbtowc (r, pwc, s, n, 5, state);
167 }
168
169 static int
__iso_8859_8_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)170 __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
171 mbstate_t *state)
172 {
173 return ___iso_mbtowc (r, pwc, s, n, 6, state);
174 }
175
176 static int
__iso_8859_9_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)177 __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
178 mbstate_t *state)
179 {
180 return ___iso_mbtowc (r, pwc, s, n, 7, state);
181 }
182
183 static int
__iso_8859_10_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)184 __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
185 mbstate_t *state)
186 {
187 return ___iso_mbtowc (r, pwc, s, n, 8, state);
188 }
189
190 static int
__iso_8859_11_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)191 __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
192 mbstate_t *state)
193 {
194 return ___iso_mbtowc (r, pwc, s, n, 9, state);
195 }
196
197 static int
__iso_8859_13_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)198 __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
199 mbstate_t *state)
200 {
201 return ___iso_mbtowc (r, pwc, s, n, 10, state);
202 }
203
204 static int
__iso_8859_14_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)205 __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
206 mbstate_t *state)
207 {
208 return ___iso_mbtowc (r, pwc, s, n, 11, state);
209 }
210
211 static int
__iso_8859_15_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)212 __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
213 mbstate_t *state)
214 {
215 return ___iso_mbtowc (r, pwc, s, n, 12, state);
216 }
217
218 static int
__iso_8859_16_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)219 __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
220 mbstate_t *state)
221 {
222 return ___iso_mbtowc (r, pwc, s, n, 13, state);
223 }
224
225 static mbtowc_p __iso_8859_mbtowc[17] = {
226 NULL,
227 __iso_8859_1_mbtowc,
228 __iso_8859_2_mbtowc,
229 __iso_8859_3_mbtowc,
230 __iso_8859_4_mbtowc,
231 __iso_8859_5_mbtowc,
232 __iso_8859_6_mbtowc,
233 __iso_8859_7_mbtowc,
234 __iso_8859_8_mbtowc,
235 __iso_8859_9_mbtowc,
236 __iso_8859_10_mbtowc,
237 __iso_8859_11_mbtowc,
238 NULL, /* No ISO 8859-12 */
239 __iso_8859_13_mbtowc,
240 __iso_8859_14_mbtowc,
241 __iso_8859_15_mbtowc,
242 __iso_8859_16_mbtowc
243 };
244
245 /* val *MUST* be valid! All checks for validity are supposed to be
246 performed before calling this function. */
247 mbtowc_p
__iso_mbtowc(int val)248 __iso_mbtowc (int val)
249 {
250 return __iso_8859_mbtowc[val];
251 }
252 #endif /* _MB_EXTENDED_CHARSETS_ISO */
253
254 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
255 static int
___cp_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int cp_idx,mbstate_t * state)256 ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
257 int cp_idx, mbstate_t *state)
258 {
259 wchar_t dummy;
260 unsigned char *t = (unsigned char *)s;
261
262 if (pwc == NULL)
263 pwc = &dummy;
264
265 if (s == NULL)
266 return 0;
267
268 if (n == 0)
269 return -2;
270
271 if (*t >= 0x80)
272 {
273 if (cp_idx >= 0)
274 {
275 *pwc = __cp_conv[cp_idx][*t - 0x80];
276 if (*pwc == 0) /* Invalid character */
277 {
278 _REENT_ERRNO(r) = EILSEQ;
279 return -1;
280 }
281 return 1;
282 }
283 }
284
285 *pwc = (wchar_t)*t;
286
287 if (*t == '\0')
288 return 0;
289
290 return 1;
291 }
292
293 static int
__cp_437_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)294 __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
295 mbstate_t *state)
296 {
297 return ___cp_mbtowc (r, pwc, s, n, 0, state);
298 }
299
300 static int
__cp_720_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)301 __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
302 mbstate_t *state)
303 {
304 return ___cp_mbtowc (r, pwc, s, n, 1, state);
305 }
306
307 static int
__cp_737_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)308 __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
309 mbstate_t *state)
310 {
311 return ___cp_mbtowc (r, pwc, s, n, 2, state);
312 }
313
314 static int
__cp_775_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)315 __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
316 mbstate_t *state)
317 {
318 return ___cp_mbtowc (r, pwc, s, n, 3, state);
319 }
320
321 static int
__cp_850_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)322 __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
323 mbstate_t *state)
324 {
325 return ___cp_mbtowc (r, pwc, s, n, 4, state);
326 }
327
328 static int
__cp_852_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)329 __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
330 mbstate_t *state)
331 {
332 return ___cp_mbtowc (r, pwc, s, n, 5, state);
333 }
334
335 static int
__cp_855_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)336 __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
337 mbstate_t *state)
338 {
339 return ___cp_mbtowc (r, pwc, s, n, 6, state);
340 }
341
342 static int
__cp_857_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)343 __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
344 mbstate_t *state)
345 {
346 return ___cp_mbtowc (r, pwc, s, n, 7, state);
347 }
348
349 static int
__cp_858_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)350 __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
351 mbstate_t *state)
352 {
353 return ___cp_mbtowc (r, pwc, s, n, 8, state);
354 }
355
356 static int
__cp_862_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)357 __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
358 mbstate_t *state)
359 {
360 return ___cp_mbtowc (r, pwc, s, n, 9, state);
361 }
362
363 static int
__cp_866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)364 __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
365 mbstate_t *state)
366 {
367 return ___cp_mbtowc (r, pwc, s, n, 10, state);
368 }
369
370 static int
__cp_874_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)371 __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
372 mbstate_t *state)
373 {
374 return ___cp_mbtowc (r, pwc, s, n, 11, state);
375 }
376
377 static int
__cp_1125_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)378 __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
379 mbstate_t *state)
380 {
381 return ___cp_mbtowc (r, pwc, s, n, 12, state);
382 }
383
384 static int
__cp_1250_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)385 __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
386 mbstate_t *state)
387 {
388 return ___cp_mbtowc (r, pwc, s, n, 13, state);
389 }
390
391 static int
__cp_1251_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)392 __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
393 mbstate_t *state)
394 {
395 return ___cp_mbtowc (r, pwc, s, n, 14, state);
396 }
397
398 static int
__cp_1252_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)399 __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
400 mbstate_t *state)
401 {
402 return ___cp_mbtowc (r, pwc, s, n, 15, state);
403 }
404
405 static int
__cp_1253_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)406 __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
407 mbstate_t *state)
408 {
409 return ___cp_mbtowc (r, pwc, s, n, 16, state);
410 }
411
412 static int
__cp_1254_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)413 __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
414 mbstate_t *state)
415 {
416 return ___cp_mbtowc (r, pwc, s, n, 17, state);
417 }
418
419 static int
__cp_1255_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)420 __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
421 mbstate_t *state)
422 {
423 return ___cp_mbtowc (r, pwc, s, n, 18, state);
424 }
425
426 static int
__cp_1256_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)427 __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
428 mbstate_t *state)
429 {
430 return ___cp_mbtowc (r, pwc, s, n, 19, state);
431 }
432
433 static int
__cp_1257_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)434 __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
435 mbstate_t *state)
436 {
437 return ___cp_mbtowc (r, pwc, s, n, 20, state);
438 }
439
440 static int
__cp_1258_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)441 __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
442 mbstate_t *state)
443 {
444 return ___cp_mbtowc (r, pwc, s, n, 21, state);
445 }
446
447 static int
__cp_20866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)448 __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
449 mbstate_t *state)
450 {
451 return ___cp_mbtowc (r, pwc, s, n, 22, state);
452 }
453
454 static int
__cp_21866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)455 __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
456 mbstate_t *state)
457 {
458 return ___cp_mbtowc (r, pwc, s, n, 23, state);
459 }
460
461 static int
__cp_101_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)462 __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
463 mbstate_t *state)
464 {
465 return ___cp_mbtowc (r, pwc, s, n, 24, state);
466 }
467
468 static int
__cp_102_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)469 __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
470 mbstate_t *state)
471 {
472 return ___cp_mbtowc (r, pwc, s, n, 25, state);
473 }
474
475 static int
__cp_103_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)476 __cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
477 mbstate_t *state)
478 {
479 return ___cp_mbtowc (r, pwc, s, n, 26, state);
480 }
481
482 static mbtowc_p __cp_xxx_mbtowc[27] = {
483 __cp_437_mbtowc,
484 __cp_720_mbtowc,
485 __cp_737_mbtowc,
486 __cp_775_mbtowc,
487 __cp_850_mbtowc,
488 __cp_852_mbtowc,
489 __cp_855_mbtowc,
490 __cp_857_mbtowc,
491 __cp_858_mbtowc,
492 __cp_862_mbtowc,
493 __cp_866_mbtowc,
494 __cp_874_mbtowc,
495 __cp_1125_mbtowc,
496 __cp_1250_mbtowc,
497 __cp_1251_mbtowc,
498 __cp_1252_mbtowc,
499 __cp_1253_mbtowc,
500 __cp_1254_mbtowc,
501 __cp_1255_mbtowc,
502 __cp_1256_mbtowc,
503 __cp_1257_mbtowc,
504 __cp_1258_mbtowc,
505 __cp_20866_mbtowc,
506 __cp_21866_mbtowc,
507 __cp_101_mbtowc,
508 __cp_102_mbtowc,
509 __cp_103_mbtowc,
510 };
511
512 /* val *MUST* be valid! All checks for validity are supposed to be
513 performed before calling this function. */
514 mbtowc_p
__cp_mbtowc(int val)515 __cp_mbtowc (int val)
516 {
517 return __cp_xxx_mbtowc[__cp_val_index (val)];
518 }
519 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
520
521 int
__utf8_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)522 __utf8_mbtowc (
523 wchar_t *pwc,
524 const char *s,
525 size_t n,
526 mbstate_t *state)
527 {
528 wchar_t dummy;
529 unsigned char *t = (unsigned char *)s;
530 int ch;
531 int i = 0;
532
533 if (pwc == NULL)
534 pwc = &dummy;
535
536 if (s == NULL)
537 return 0;
538
539 if (n == 0)
540 return -2;
541
542 if (state->__count == 0)
543 ch = t[i++];
544 else
545 ch = state->__value.__wchb[0];
546
547 if (ch == '\0')
548 {
549 *pwc = 0;
550 state->__count = 0;
551 return 0; /* s points to the null character */
552 }
553
554 if (ch <= 0x7f)
555 {
556 /* single-byte sequence */
557 state->__count = 0;
558 *pwc = ch;
559 return 1;
560 }
561 if (ch >= 0xc0 && ch <= 0xdf)
562 {
563 /* two-byte sequence */
564 state->__value.__wchb[0] = ch;
565 if (state->__count == 0)
566 state->__count = 1;
567 else if (n < (size_t)-1)
568 ++n;
569 if (n < 2)
570 return -2;
571 ch = t[i++];
572 if (ch < 0x80 || ch > 0xbf)
573 {
574 _REENT_ERRNO(r) = EILSEQ;
575 return -1;
576 }
577 if (state->__value.__wchb[0] < 0xc2)
578 {
579 /* overlong UTF-8 sequence */
580 _REENT_ERRNO(r) = EILSEQ;
581 return -1;
582 }
583 state->__count = 0;
584 *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
585 | (wchar_t)(ch & 0x3f);
586 return i;
587 }
588 if (ch >= 0xe0 && ch <= 0xef)
589 {
590 /* three-byte sequence */
591 wchar_t tmp;
592 state->__value.__wchb[0] = ch;
593 if (state->__count == 0)
594 state->__count = 1;
595 else if (n < (size_t)-1)
596 ++n;
597 if (n < 2)
598 return -2;
599 ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
600 if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
601 {
602 /* overlong UTF-8 sequence */
603 _REENT_ERRNO(r) = EILSEQ;
604 return -1;
605 }
606 if (ch < 0x80 || ch > 0xbf)
607 {
608 _REENT_ERRNO(r) = EILSEQ;
609 return -1;
610 }
611 state->__value.__wchb[1] = ch;
612 if (state->__count == 1)
613 state->__count = 2;
614 else if (n < (size_t)-1)
615 ++n;
616 if (n < 3)
617 return -2;
618 ch = t[i++];
619 if (ch < 0x80 || ch > 0xbf)
620 {
621 _REENT_ERRNO(r) = EILSEQ;
622 return -1;
623 }
624 state->__count = 0;
625 tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
626 | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
627 | (wchar_t)(ch & 0x3f);
628 *pwc = tmp;
629 return i;
630 }
631 if (ch >= 0xf0 && ch <= 0xf4)
632 {
633 /* four-byte sequence */
634 uint32_t tmp;
635 state->__value.__wchb[0] = ch;
636 if (state->__count == 0)
637 state->__count = 1;
638 else if (n < (size_t)-1)
639 ++n;
640 if (n < 2)
641 return -2;
642 ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
643 if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
644 || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
645 {
646 /* overlong UTF-8 sequence or result is > 0x10ffff */
647 _REENT_ERRNO(r) = EILSEQ;
648 return -1;
649 }
650 if (ch < 0x80 || ch > 0xbf)
651 {
652 _REENT_ERRNO(r) = EILSEQ;
653 return -1;
654 }
655 state->__value.__wchb[1] = ch;
656 if (state->__count == 1)
657 state->__count = 2;
658 else if (n < (size_t)-1)
659 ++n;
660 if (n < 3)
661 return -2;
662 ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
663 if (ch < 0x80 || ch > 0xbf)
664 {
665 _REENT_ERRNO(r) = EILSEQ;
666 return -1;
667 }
668 state->__value.__wchb[2] = ch;
669 if (state->__count == 2)
670 state->__count = 3;
671 else if (n < (size_t)-1)
672 ++n;
673 if (state->__count == 3 && sizeof(wchar_t) == 2)
674 {
675 /* On systems which have wchar_t being UTF-16 values, the value
676 doesn't fit into a single wchar_t in this case. So what we
677 do here is to store the state with a special value of __count
678 and return the first half of a surrogate pair. The first
679 three bytes of a UTF-8 sequence are enough to generate the
680 first half of a UTF-16 surrogate pair. As return value we
681 choose to return the number of bytes actually read up to
682 here.
683 The second half of the surrogate pair is returned in case we
684 recognize the special __count value of four, and the next
685 byte is actually a valid value. See below. */
686 tmp = (uint32_t)((state->__value.__wchb[0] & (uint32_t) 0x07) << 18)
687 | (uint32_t)((state->__value.__wchb[1] & (uint32_t) 0x3f) << 12)
688 | (uint32_t)((state->__value.__wchb[2] & (uint32_t) 0x3f) << 6);
689 state->__count = 4;
690 *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
691 return i;
692 }
693 if (n < 4)
694 return -2;
695 ch = t[i++];
696 if (ch < 0x80 || ch > 0xbf)
697 {
698 _REENT_ERRNO(r) = EILSEQ;
699 return -1;
700 }
701 tmp = (((uint32_t)state->__value.__wchb[0] & 0x07) << 18)
702 | (((uint32_t)state->__value.__wchb[1] & 0x3f) << 12)
703 | (((uint32_t)state->__value.__wchb[2] & 0x3f) << 6)
704 | ((uint32_t)ch & 0x3f);
705 if (state->__count == 4 && sizeof(wchar_t) == 2)
706 /* Create the second half of the surrogate pair for systems with
707 wchar_t == UTF-16 . */
708 *pwc = 0xdc00 | (tmp & 0x3ff);
709 else
710 *pwc = tmp;
711 state->__count = 0;
712 return i;
713 }
714
715 _REENT_ERRNO(r) = EILSEQ;
716 return -1;
717 }
718
719 /* Cygwin defines its own doublebyte charset conversion functions
720 because the underlying OS requires wchar_t == UTF-16. */
721 int
__sjis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)722 __sjis_mbtowc (
723 wchar_t *pwc,
724 const char *s,
725 size_t n,
726 mbstate_t *state)
727 {
728 wchar_t dummy;
729 unsigned char *t = (unsigned char *)s;
730 int ch;
731 int i = 0;
732
733 if (pwc == NULL)
734 pwc = &dummy;
735
736 if (s == NULL)
737 return 0; /* not state-dependent */
738
739 if (n == 0)
740 return -2;
741
742 ch = t[i++];
743 if (state->__count == 0)
744 {
745 if (_issjis1 (ch))
746 {
747 state->__value.__wchb[0] = ch;
748 state->__count = 1;
749 if (n <= 1)
750 return -2;
751 ch = t[i++];
752 }
753 }
754 if (state->__count == 1)
755 {
756 if (_issjis2 (ch))
757 {
758 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
759 state->__count = 0;
760 return i;
761 }
762 else
763 {
764 _REENT_ERRNO(r) = EILSEQ;
765 return -1;
766 }
767 }
768
769 *pwc = (wchar_t)*t;
770
771 if (*t == '\0')
772 return 0;
773
774 return 1;
775 }
776
777 int
__eucjp_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)778 __eucjp_mbtowc (
779 wchar_t *pwc,
780 const char *s,
781 size_t n,
782 mbstate_t *state)
783 {
784 wchar_t dummy;
785 unsigned char *t = (unsigned char *)s;
786 int ch;
787 int i = 0;
788
789 if (pwc == NULL)
790 pwc = &dummy;
791
792 if (s == NULL)
793 return 0;
794
795 if (n == 0)
796 return -2;
797
798 ch = t[i++];
799 if (state->__count == 0)
800 {
801 if (_iseucjp1 (ch))
802 {
803 state->__value.__wchb[0] = ch;
804 state->__count = 1;
805 if (n <= 1)
806 return -2;
807 ch = t[i++];
808 }
809 }
810 if (state->__count == 1)
811 {
812 if (_iseucjp2 (ch))
813 {
814 if (state->__value.__wchb[0] == 0x8f)
815 {
816 state->__value.__wchb[1] = ch;
817 state->__count = 2;
818 if (n <= (size_t) i)
819 return -2;
820 ch = t[i++];
821 }
822 else
823 {
824 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
825 state->__count = 0;
826 return i;
827 }
828 }
829 else
830 {
831 _REENT_ERRNO(r) = EILSEQ;
832 return -1;
833 }
834 }
835 if (state->__count == 2)
836 {
837 if (_iseucjp2 (ch))
838 {
839 *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
840 + (wchar_t)(ch & 0x7f);
841 state->__count = 0;
842 return i;
843 }
844 else
845 {
846 _REENT_ERRNO(r) = EILSEQ;
847 return -1;
848 }
849 }
850
851 *pwc = (wchar_t)*t;
852
853 if (*t == '\0')
854 return 0;
855
856 return 1;
857 }
858
859 int
__jis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)860 __jis_mbtowc (
861 wchar_t *pwc,
862 const char *s,
863 size_t n,
864 mbstate_t *state)
865 {
866 wchar_t dummy;
867 unsigned char *t = (unsigned char *)s;
868 JIS_STATE curr_state;
869 JIS_ACTION action;
870 JIS_CHAR_TYPE ch;
871 unsigned char *ptr;
872 unsigned int i;
873 int curr_ch;
874
875 if (pwc == NULL)
876 pwc = &dummy;
877
878 if (s == NULL)
879 {
880 state->__state = ASCII;
881 return 1; /* state-dependent */
882 }
883
884 if (n == 0)
885 return -2;
886
887 curr_state = state->__state;
888 ptr = t;
889
890 for (i = 0; i < n; ++i)
891 {
892 curr_ch = t[i];
893 switch (curr_ch)
894 {
895 case ESC_CHAR:
896 ch = ESCAPE;
897 break;
898 case '$':
899 ch = DOLLAR;
900 break;
901 case '@':
902 ch = AT;
903 break;
904 case '(':
905 ch = BRACKET;
906 break;
907 case 'B':
908 ch = B;
909 break;
910 case 'J':
911 ch = J;
912 break;
913 case '\0':
914 ch = NUL;
915 break;
916 default:
917 if (_isjis (curr_ch))
918 ch = JIS_CHAR;
919 else
920 ch = OTHER;
921 }
922
923 action = JIS_action_table[curr_state][ch];
924 curr_state = JIS_state_table[curr_state][ch];
925
926 switch (action)
927 {
928 case NOOP:
929 break;
930 case EMPTY:
931 state->__state = ASCII;
932 *pwc = (wchar_t)0;
933 return 0;
934 case COPY_A:
935 state->__state = ASCII;
936 *pwc = (wchar_t)*ptr;
937 return (i + 1);
938 case COPY_J1:
939 state->__value.__wchb[0] = t[i];
940 break;
941 case COPY_J2:
942 state->__state = JIS;
943 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
944 return (i + 1);
945 case MAKE_A:
946 ptr = (unsigned char *)(t + i + 1);
947 break;
948 case ERROR:
949 default:
950 _REENT_ERRNO(r) = EILSEQ;
951 return -1;
952 }
953
954 }
955
956 state->__state = curr_state;
957 return -2; /* n < bytes needed */
958 }
959 #endif /* _MB_CAPABLE */
960