1 /*
2 Copyright (c) 1990 Regents of the University of California.
3 All rights reserved.
4 */
5 #include <newlib.h>
6 #include <stdlib.h>
7 #include <locale.h>
8 #include "mbctype.h"
9 #include <wchar.h>
10 #include <string.h>
11 #include <errno.h>
12 #include "local.h"
13
14 int
__ascii_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)15 __ascii_mbtowc (
16 wchar_t *pwc,
17 const char *s,
18 size_t n,
19 mbstate_t *state)
20 {
21 wchar_t dummy;
22 unsigned char *t = (unsigned char *)s;
23
24 (void) state;
25 if (pwc == NULL)
26 pwc = &dummy;
27
28 if (s == NULL)
29 return 0;
30
31 if (n == 0)
32 return -2;
33
34 #ifdef __CYGWIN__
35 if ((wchar_t)*t >= 0x80)
36 {
37 _REENT_ERRNO(r) = EILSEQ;
38 return -1;
39 }
40 #endif
41
42 *pwc = (wchar_t)*t;
43
44 if (*t == '\0')
45 return 0;
46
47 return 1;
48 }
49
50 #ifdef _MB_CAPABLE
51 typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
52 NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
53 typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
54 INV, JIS_S_NUM } JIS_STATE;
55 typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
56
57 /**************************************************************************************
58 * state/action tables for processing JIS encoding
59 * Where possible, switches to JIS are grouped with proceding JIS characters and switches
60 * to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
61 * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
62 *************************************************************************************/
63
64 #ifndef __CYGWIN__
65 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
66 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
67 /* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
68 /* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
69 /* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
70 /* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
71 /* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
72 /* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
73 /* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
74 };
75
76 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
77 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
78 /* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
79 /* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
80 /* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
81 /* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
82 /* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
83 /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
84 /* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
85 };
86 #endif /* !__CYGWIN__ */
87
88 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
89 #define __state __count
90
91 #ifdef _MB_EXTENDED_CHARSETS_ISO
92 static int
___iso_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int iso_idx,mbstate_t * state)93 ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
94 int iso_idx, mbstate_t *state)
95 {
96 wchar_t dummy;
97 unsigned char *t = (unsigned char *)s;
98
99 if (pwc == NULL)
100 pwc = &dummy;
101
102 if (s == NULL)
103 return 0;
104
105 if (n == 0)
106 return -2;
107
108 if (*t >= 0xa0)
109 {
110 if (iso_idx >= 0)
111 {
112 *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
113 if (*pwc == 0) /* Invalid character */
114 {
115 _REENT_ERRNO(r) = EILSEQ;
116 return -1;
117 }
118 return 1;
119 }
120 }
121
122 *pwc = (wchar_t) *t;
123
124 if (*t == '\0')
125 return 0;
126
127 return 1;
128 }
129
130 static int
__iso_8859_1_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)131 __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
132 mbstate_t *state)
133 {
134 return ___iso_mbtowc (r, pwc, s, n, -1, state);
135 }
136
137 static int
__iso_8859_2_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)138 __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
139 mbstate_t *state)
140 {
141 return ___iso_mbtowc (r, pwc, s, n, 0, state);
142 }
143
144 static int
__iso_8859_3_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)145 __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
146 mbstate_t *state)
147 {
148 return ___iso_mbtowc (r, pwc, s, n, 1, state);
149 }
150
151 static int
__iso_8859_4_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)152 __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
153 mbstate_t *state)
154 {
155 return ___iso_mbtowc (r, pwc, s, n, 2, state);
156 }
157
158 static int
__iso_8859_5_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)159 __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
160 mbstate_t *state)
161 {
162 return ___iso_mbtowc (r, pwc, s, n, 3, state);
163 }
164
165 static int
__iso_8859_6_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)166 __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
167 mbstate_t *state)
168 {
169 return ___iso_mbtowc (r, pwc, s, n, 4, state);
170 }
171
172 static int
__iso_8859_7_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)173 __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
174 mbstate_t *state)
175 {
176 return ___iso_mbtowc (r, pwc, s, n, 5, state);
177 }
178
179 static int
__iso_8859_8_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)180 __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
181 mbstate_t *state)
182 {
183 return ___iso_mbtowc (r, pwc, s, n, 6, state);
184 }
185
186 static int
__iso_8859_9_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)187 __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
188 mbstate_t *state)
189 {
190 return ___iso_mbtowc (r, pwc, s, n, 7, state);
191 }
192
193 static int
__iso_8859_10_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)194 __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
195 mbstate_t *state)
196 {
197 return ___iso_mbtowc (r, pwc, s, n, 8, state);
198 }
199
200 static int
__iso_8859_11_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)201 __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
202 mbstate_t *state)
203 {
204 return ___iso_mbtowc (r, pwc, s, n, 9, state);
205 }
206
207 static int
__iso_8859_13_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)208 __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
209 mbstate_t *state)
210 {
211 return ___iso_mbtowc (r, pwc, s, n, 10, state);
212 }
213
214 static int
__iso_8859_14_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)215 __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
216 mbstate_t *state)
217 {
218 return ___iso_mbtowc (r, pwc, s, n, 11, state);
219 }
220
221 static int
__iso_8859_15_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)222 __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
223 mbstate_t *state)
224 {
225 return ___iso_mbtowc (r, pwc, s, n, 12, state);
226 }
227
228 static int
__iso_8859_16_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)229 __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
230 mbstate_t *state)
231 {
232 return ___iso_mbtowc (r, pwc, s, n, 13, state);
233 }
234
235 static mbtowc_p __iso_8859_mbtowc[17] = {
236 NULL,
237 __iso_8859_1_mbtowc,
238 __iso_8859_2_mbtowc,
239 __iso_8859_3_mbtowc,
240 __iso_8859_4_mbtowc,
241 __iso_8859_5_mbtowc,
242 __iso_8859_6_mbtowc,
243 __iso_8859_7_mbtowc,
244 __iso_8859_8_mbtowc,
245 __iso_8859_9_mbtowc,
246 __iso_8859_10_mbtowc,
247 __iso_8859_11_mbtowc,
248 NULL, /* No ISO 8859-12 */
249 __iso_8859_13_mbtowc,
250 __iso_8859_14_mbtowc,
251 __iso_8859_15_mbtowc,
252 __iso_8859_16_mbtowc
253 };
254
255 /* val *MUST* be valid! All checks for validity are supposed to be
256 performed before calling this function. */
257 mbtowc_p
__iso_mbtowc(int val)258 __iso_mbtowc (int val)
259 {
260 return __iso_8859_mbtowc[val];
261 }
262 #endif /* _MB_EXTENDED_CHARSETS_ISO */
263
264 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
265 static int
___cp_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int cp_idx,mbstate_t * state)266 ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
267 int cp_idx, mbstate_t *state)
268 {
269 wchar_t dummy;
270 unsigned char *t = (unsigned char *)s;
271
272 if (pwc == NULL)
273 pwc = &dummy;
274
275 if (s == NULL)
276 return 0;
277
278 if (n == 0)
279 return -2;
280
281 if (*t >= 0x80)
282 {
283 if (cp_idx >= 0)
284 {
285 *pwc = __cp_conv[cp_idx][*t - 0x80];
286 if (*pwc == 0) /* Invalid character */
287 {
288 _REENT_ERRNO(r) = EILSEQ;
289 return -1;
290 }
291 return 1;
292 }
293 }
294
295 *pwc = (wchar_t)*t;
296
297 if (*t == '\0')
298 return 0;
299
300 return 1;
301 }
302
303 static int
__cp_437_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)304 __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
305 mbstate_t *state)
306 {
307 return ___cp_mbtowc (r, pwc, s, n, 0, state);
308 }
309
310 static int
__cp_720_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)311 __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
312 mbstate_t *state)
313 {
314 return ___cp_mbtowc (r, pwc, s, n, 1, state);
315 }
316
317 static int
__cp_737_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)318 __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
319 mbstate_t *state)
320 {
321 return ___cp_mbtowc (r, pwc, s, n, 2, state);
322 }
323
324 static int
__cp_775_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)325 __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
326 mbstate_t *state)
327 {
328 return ___cp_mbtowc (r, pwc, s, n, 3, state);
329 }
330
331 static int
__cp_850_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)332 __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
333 mbstate_t *state)
334 {
335 return ___cp_mbtowc (r, pwc, s, n, 4, state);
336 }
337
338 static int
__cp_852_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)339 __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
340 mbstate_t *state)
341 {
342 return ___cp_mbtowc (r, pwc, s, n, 5, state);
343 }
344
345 static int
__cp_855_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)346 __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
347 mbstate_t *state)
348 {
349 return ___cp_mbtowc (r, pwc, s, n, 6, state);
350 }
351
352 static int
__cp_857_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)353 __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
354 mbstate_t *state)
355 {
356 return ___cp_mbtowc (r, pwc, s, n, 7, state);
357 }
358
359 static int
__cp_858_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)360 __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
361 mbstate_t *state)
362 {
363 return ___cp_mbtowc (r, pwc, s, n, 8, state);
364 }
365
366 static int
__cp_862_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)367 __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
368 mbstate_t *state)
369 {
370 return ___cp_mbtowc (r, pwc, s, n, 9, state);
371 }
372
373 static int
__cp_866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)374 __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
375 mbstate_t *state)
376 {
377 return ___cp_mbtowc (r, pwc, s, n, 10, state);
378 }
379
380 static int
__cp_874_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)381 __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
382 mbstate_t *state)
383 {
384 return ___cp_mbtowc (r, pwc, s, n, 11, state);
385 }
386
387 static int
__cp_1125_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)388 __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
389 mbstate_t *state)
390 {
391 return ___cp_mbtowc (r, pwc, s, n, 12, state);
392 }
393
394 static int
__cp_1250_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)395 __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
396 mbstate_t *state)
397 {
398 return ___cp_mbtowc (r, pwc, s, n, 13, state);
399 }
400
401 static int
__cp_1251_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)402 __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
403 mbstate_t *state)
404 {
405 return ___cp_mbtowc (r, pwc, s, n, 14, state);
406 }
407
408 static int
__cp_1252_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)409 __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
410 mbstate_t *state)
411 {
412 return ___cp_mbtowc (r, pwc, s, n, 15, state);
413 }
414
415 static int
__cp_1253_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)416 __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
417 mbstate_t *state)
418 {
419 return ___cp_mbtowc (r, pwc, s, n, 16, state);
420 }
421
422 static int
__cp_1254_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)423 __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
424 mbstate_t *state)
425 {
426 return ___cp_mbtowc (r, pwc, s, n, 17, state);
427 }
428
429 static int
__cp_1255_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)430 __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
431 mbstate_t *state)
432 {
433 return ___cp_mbtowc (r, pwc, s, n, 18, state);
434 }
435
436 static int
__cp_1256_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)437 __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
438 mbstate_t *state)
439 {
440 return ___cp_mbtowc (r, pwc, s, n, 19, state);
441 }
442
443 static int
__cp_1257_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)444 __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
445 mbstate_t *state)
446 {
447 return ___cp_mbtowc (r, pwc, s, n, 20, state);
448 }
449
450 static int
__cp_1258_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)451 __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
452 mbstate_t *state)
453 {
454 return ___cp_mbtowc (r, pwc, s, n, 21, state);
455 }
456
457 static int
__cp_20866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)458 __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
459 mbstate_t *state)
460 {
461 return ___cp_mbtowc (r, pwc, s, n, 22, state);
462 }
463
464 static int
__cp_21866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)465 __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
466 mbstate_t *state)
467 {
468 return ___cp_mbtowc (r, pwc, s, n, 23, state);
469 }
470
471 static int
__cp_101_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)472 __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
473 mbstate_t *state)
474 {
475 return ___cp_mbtowc (r, pwc, s, n, 24, state);
476 }
477
478 static int
__cp_102_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)479 __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
480 mbstate_t *state)
481 {
482 return ___cp_mbtowc (r, pwc, s, n, 25, state);
483 }
484
485 static int
__cp_103_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)486 __cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
487 mbstate_t *state)
488 {
489 return ___cp_mbtowc (r, pwc, s, n, 26, state);
490 }
491
492 static mbtowc_p __cp_xxx_mbtowc[27] = {
493 __cp_437_mbtowc,
494 __cp_720_mbtowc,
495 __cp_737_mbtowc,
496 __cp_775_mbtowc,
497 __cp_850_mbtowc,
498 __cp_852_mbtowc,
499 __cp_855_mbtowc,
500 __cp_857_mbtowc,
501 __cp_858_mbtowc,
502 __cp_862_mbtowc,
503 __cp_866_mbtowc,
504 __cp_874_mbtowc,
505 __cp_1125_mbtowc,
506 __cp_1250_mbtowc,
507 __cp_1251_mbtowc,
508 __cp_1252_mbtowc,
509 __cp_1253_mbtowc,
510 __cp_1254_mbtowc,
511 __cp_1255_mbtowc,
512 __cp_1256_mbtowc,
513 __cp_1257_mbtowc,
514 __cp_1258_mbtowc,
515 __cp_20866_mbtowc,
516 __cp_21866_mbtowc,
517 __cp_101_mbtowc,
518 __cp_102_mbtowc,
519 __cp_103_mbtowc,
520 };
521
522 /* val *MUST* be valid! All checks for validity are supposed to be
523 performed before calling this function. */
524 mbtowc_p
__cp_mbtowc(int val)525 __cp_mbtowc (int val)
526 {
527 return __cp_xxx_mbtowc[__cp_val_index (val)];
528 }
529 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
530
531 int
__utf8_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)532 __utf8_mbtowc (
533 wchar_t *pwc,
534 const char *s,
535 size_t n,
536 mbstate_t *state)
537 {
538 wchar_t dummy;
539 unsigned char *t = (unsigned char *)s;
540 int ch;
541 int i = 0;
542
543 if (pwc == NULL)
544 pwc = &dummy;
545
546 if (s == NULL)
547 return 0;
548
549 if (n == 0)
550 return -2;
551
552 if (state->__count == 0)
553 ch = t[i++];
554 else
555 ch = state->__value.__wchb[0];
556
557 if (ch == '\0')
558 {
559 *pwc = 0;
560 state->__count = 0;
561 return 0; /* s points to the null character */
562 }
563
564 if (ch <= 0x7f)
565 {
566 /* single-byte sequence */
567 state->__count = 0;
568 *pwc = ch;
569 return 1;
570 }
571 if (ch >= 0xc0 && ch <= 0xdf)
572 {
573 /* two-byte sequence */
574 state->__value.__wchb[0] = ch;
575 if (state->__count == 0)
576 state->__count = 1;
577 else if (n < (size_t)-1)
578 ++n;
579 if (n < 2)
580 return -2;
581 ch = t[i++];
582 if (ch < 0x80 || ch > 0xbf)
583 {
584 _REENT_ERRNO(r) = EILSEQ;
585 return -1;
586 }
587 if (state->__value.__wchb[0] < 0xc2)
588 {
589 /* overlong UTF-8 sequence */
590 _REENT_ERRNO(r) = EILSEQ;
591 return -1;
592 }
593 state->__count = 0;
594 *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
595 | (wchar_t)(ch & 0x3f);
596 return i;
597 }
598 if (ch >= 0xe0 && ch <= 0xef)
599 {
600 /* three-byte sequence */
601 wchar_t tmp;
602 state->__value.__wchb[0] = ch;
603 if (state->__count == 0)
604 state->__count = 1;
605 else if (n < (size_t)-1)
606 ++n;
607 if (n < 2)
608 return -2;
609 ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
610 if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
611 {
612 /* overlong UTF-8 sequence */
613 _REENT_ERRNO(r) = EILSEQ;
614 return -1;
615 }
616 if (ch < 0x80 || ch > 0xbf)
617 {
618 _REENT_ERRNO(r) = EILSEQ;
619 return -1;
620 }
621 state->__value.__wchb[1] = ch;
622 if (state->__count == 1)
623 state->__count = 2;
624 else if (n < (size_t)-1)
625 ++n;
626 if (n < 3)
627 return -2;
628 ch = t[i++];
629 if (ch < 0x80 || ch > 0xbf)
630 {
631 _REENT_ERRNO(r) = EILSEQ;
632 return -1;
633 }
634 state->__count = 0;
635 tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
636 | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
637 | (wchar_t)(ch & 0x3f);
638 *pwc = tmp;
639 return i;
640 }
641 if (ch >= 0xf0 && ch <= 0xf4)
642 {
643 /* four-byte sequence */
644 uint32_t tmp;
645 state->__value.__wchb[0] = ch;
646 if (state->__count == 0)
647 state->__count = 1;
648 else if (n < (size_t)-1)
649 ++n;
650 if (n < 2)
651 return -2;
652 ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
653 if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
654 || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
655 {
656 /* overlong UTF-8 sequence or result is > 0x10ffff */
657 _REENT_ERRNO(r) = EILSEQ;
658 return -1;
659 }
660 if (ch < 0x80 || ch > 0xbf)
661 {
662 _REENT_ERRNO(r) = EILSEQ;
663 return -1;
664 }
665 state->__value.__wchb[1] = ch;
666 if (state->__count == 1)
667 state->__count = 2;
668 else if (n < (size_t)-1)
669 ++n;
670 if (n < 3)
671 return -2;
672 ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
673 if (ch < 0x80 || ch > 0xbf)
674 {
675 _REENT_ERRNO(r) = EILSEQ;
676 return -1;
677 }
678 state->__value.__wchb[2] = ch;
679 if (state->__count == 2)
680 state->__count = 3;
681 else if (n < (size_t)-1)
682 ++n;
683 if (state->__count == 3 && sizeof(wchar_t) == 2)
684 {
685 /* On systems which have wchar_t being UTF-16 values, the value
686 doesn't fit into a single wchar_t in this case. So what we
687 do here is to store the state with a special value of __count
688 and return the first half of a surrogate pair. The first
689 three bytes of a UTF-8 sequence are enough to generate the
690 first half of a UTF-16 surrogate pair. As return value we
691 choose to return the number of bytes actually read up to
692 here.
693 The second half of the surrogate pair is returned in case we
694 recognize the special __count value of four, and the next
695 byte is actually a valid value. See below. */
696 tmp = (uint32_t)((state->__value.__wchb[0] & (uint32_t) 0x07) << 18)
697 | (uint32_t)((state->__value.__wchb[1] & (uint32_t) 0x3f) << 12)
698 | (uint32_t)((state->__value.__wchb[2] & (uint32_t) 0x3f) << 6);
699 state->__count = 4;
700 *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
701 return i;
702 }
703 if (n < 4)
704 return -2;
705 ch = t[i++];
706 if (ch < 0x80 || ch > 0xbf)
707 {
708 _REENT_ERRNO(r) = EILSEQ;
709 return -1;
710 }
711 tmp = (((uint32_t)state->__value.__wchb[0] & 0x07) << 18)
712 | (((uint32_t)state->__value.__wchb[1] & 0x3f) << 12)
713 | (((uint32_t)state->__value.__wchb[2] & 0x3f) << 6)
714 | ((uint32_t)ch & 0x3f);
715 if (state->__count == 4 && sizeof(wchar_t) == 2)
716 /* Create the second half of the surrogate pair for systems with
717 wchar_t == UTF-16 . */
718 *pwc = 0xdc00 | (tmp & 0x3ff);
719 else
720 *pwc = tmp;
721 state->__count = 0;
722 return i;
723 }
724
725 _REENT_ERRNO(r) = EILSEQ;
726 return -1;
727 }
728
729 /* Cygwin defines its own doublebyte charset conversion functions
730 because the underlying OS requires wchar_t == UTF-16. */
731 #ifndef __CYGWIN__
732 int
__sjis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)733 __sjis_mbtowc (
734 wchar_t *pwc,
735 const char *s,
736 size_t n,
737 mbstate_t *state)
738 {
739 wchar_t dummy;
740 unsigned char *t = (unsigned char *)s;
741 int ch;
742 int i = 0;
743
744 if (pwc == NULL)
745 pwc = &dummy;
746
747 if (s == NULL)
748 return 0; /* not state-dependent */
749
750 if (n == 0)
751 return -2;
752
753 ch = t[i++];
754 if (state->__count == 0)
755 {
756 if (_issjis1 (ch))
757 {
758 state->__value.__wchb[0] = ch;
759 state->__count = 1;
760 if (n <= 1)
761 return -2;
762 ch = t[i++];
763 }
764 }
765 if (state->__count == 1)
766 {
767 if (_issjis2 (ch))
768 {
769 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
770 state->__count = 0;
771 return i;
772 }
773 else
774 {
775 _REENT_ERRNO(r) = EILSEQ;
776 return -1;
777 }
778 }
779
780 *pwc = (wchar_t)*t;
781
782 if (*t == '\0')
783 return 0;
784
785 return 1;
786 }
787
788 int
__eucjp_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)789 __eucjp_mbtowc (
790 wchar_t *pwc,
791 const char *s,
792 size_t n,
793 mbstate_t *state)
794 {
795 wchar_t dummy;
796 unsigned char *t = (unsigned char *)s;
797 int ch;
798 int i = 0;
799
800 if (pwc == NULL)
801 pwc = &dummy;
802
803 if (s == NULL)
804 return 0;
805
806 if (n == 0)
807 return -2;
808
809 ch = t[i++];
810 if (state->__count == 0)
811 {
812 if (_iseucjp1 (ch))
813 {
814 state->__value.__wchb[0] = ch;
815 state->__count = 1;
816 if (n <= 1)
817 return -2;
818 ch = t[i++];
819 }
820 }
821 if (state->__count == 1)
822 {
823 if (_iseucjp2 (ch))
824 {
825 if (state->__value.__wchb[0] == 0x8f)
826 {
827 state->__value.__wchb[1] = ch;
828 state->__count = 2;
829 if (n <= (size_t) i)
830 return -2;
831 ch = t[i++];
832 }
833 else
834 {
835 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
836 state->__count = 0;
837 return i;
838 }
839 }
840 else
841 {
842 _REENT_ERRNO(r) = EILSEQ;
843 return -1;
844 }
845 }
846 if (state->__count == 2)
847 {
848 if (_iseucjp2 (ch))
849 {
850 *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
851 + (wchar_t)(ch & 0x7f);
852 state->__count = 0;
853 return i;
854 }
855 else
856 {
857 _REENT_ERRNO(r) = EILSEQ;
858 return -1;
859 }
860 }
861
862 *pwc = (wchar_t)*t;
863
864 if (*t == '\0')
865 return 0;
866
867 return 1;
868 }
869
870 int
__jis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)871 __jis_mbtowc (
872 wchar_t *pwc,
873 const char *s,
874 size_t n,
875 mbstate_t *state)
876 {
877 wchar_t dummy;
878 unsigned char *t = (unsigned char *)s;
879 JIS_STATE curr_state;
880 JIS_ACTION action;
881 JIS_CHAR_TYPE ch;
882 unsigned char *ptr;
883 unsigned int i;
884 int curr_ch;
885
886 if (pwc == NULL)
887 pwc = &dummy;
888
889 if (s == NULL)
890 {
891 state->__state = ASCII;
892 return 1; /* state-dependent */
893 }
894
895 if (n == 0)
896 return -2;
897
898 curr_state = state->__state;
899 ptr = t;
900
901 for (i = 0; i < n; ++i)
902 {
903 curr_ch = t[i];
904 switch (curr_ch)
905 {
906 case ESC_CHAR:
907 ch = ESCAPE;
908 break;
909 case '$':
910 ch = DOLLAR;
911 break;
912 case '@':
913 ch = AT;
914 break;
915 case '(':
916 ch = BRACKET;
917 break;
918 case 'B':
919 ch = B;
920 break;
921 case 'J':
922 ch = J;
923 break;
924 case '\0':
925 ch = NUL;
926 break;
927 default:
928 if (_isjis (curr_ch))
929 ch = JIS_CHAR;
930 else
931 ch = OTHER;
932 }
933
934 action = JIS_action_table[curr_state][ch];
935 curr_state = JIS_state_table[curr_state][ch];
936
937 switch (action)
938 {
939 case NOOP:
940 break;
941 case EMPTY:
942 state->__state = ASCII;
943 *pwc = (wchar_t)0;
944 return 0;
945 case COPY_A:
946 state->__state = ASCII;
947 *pwc = (wchar_t)*ptr;
948 return (i + 1);
949 case COPY_J1:
950 state->__value.__wchb[0] = t[i];
951 break;
952 case COPY_J2:
953 state->__state = JIS;
954 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
955 return (i + 1);
956 case MAKE_A:
957 ptr = (unsigned char *)(t + i + 1);
958 break;
959 case ERROR:
960 default:
961 _REENT_ERRNO(r) = EILSEQ;
962 return -1;
963 }
964
965 }
966
967 state->__state = curr_state;
968 return -2; /* n < bytes needed */
969 }
970 #endif /* !__CYGWIN__*/
971 #endif /* _MB_CAPABLE */
972