1 /*
2 Copyright (c) 1990 Regents of the University of California.
3 All rights reserved.
4 */
5 #include <newlib.h>
6 #include <stdlib.h>
7 #include <locale.h>
8 #include "mbctype.h"
9 #include <wchar.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include "local.h"
14
15 int
__ascii_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)16 __ascii_mbtowc (
17 wchar_t *pwc,
18 const char *s,
19 size_t n,
20 mbstate_t *state)
21 {
22 wchar_t dummy;
23 unsigned char *t = (unsigned char *)s;
24
25 (void) state;
26 if (pwc == NULL)
27 pwc = &dummy;
28
29 if (s == NULL)
30 return 0;
31
32 if (n == 0)
33 return -2;
34
35 *pwc = (wchar_t)*t;
36
37 if (*t == '\0')
38 return 0;
39
40 return 1;
41 }
42
43 #ifdef _MB_CAPABLE
44 typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
45 NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
46 typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
47 INV, JIS_S_NUM } JIS_STATE;
48 typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
49
50 /**************************************************************************************
51 * state/action tables for processing JIS encoding
52 * Where possible, switches to JIS are grouped with proceding JIS characters and switches
53 * to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
54 * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
55 *************************************************************************************/
56
57 #ifndef __CYGWIN__
58 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
59 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
60 /* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
61 /* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
62 /* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
63 /* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
64 /* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
65 /* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
66 /* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
67 };
68
69 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
70 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
71 /* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
72 /* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
73 /* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
74 /* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
75 /* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
76 /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
77 /* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
78 };
79 #endif /* !__CYGWIN__ */
80
81 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
82 #define __state __count
83
84 #ifdef _MB_EXTENDED_CHARSETS_ISO
85 static int
___iso_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int iso_idx,mbstate_t * state)86 ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
87 int iso_idx, mbstate_t *state)
88 {
89 wchar_t dummy;
90 unsigned char *t = (unsigned char *)s;
91
92 if (pwc == NULL)
93 pwc = &dummy;
94
95 if (s == NULL)
96 return 0;
97
98 if (n == 0)
99 return -2;
100
101 if (*t >= 0xa0)
102 {
103 if (iso_idx >= 0)
104 {
105 *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
106 if (*pwc == 0) /* Invalid character */
107 {
108 _REENT_ERRNO(r) = EILSEQ;
109 return -1;
110 }
111 return 1;
112 }
113 }
114
115 *pwc = (wchar_t) *t;
116
117 if (*t == '\0')
118 return 0;
119
120 return 1;
121 }
122
123 static int
__iso_8859_1_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)124 __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
125 mbstate_t *state)
126 {
127 return ___iso_mbtowc (r, pwc, s, n, -1, state);
128 }
129
130 static int
__iso_8859_2_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)131 __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
132 mbstate_t *state)
133 {
134 return ___iso_mbtowc (r, pwc, s, n, 0, state);
135 }
136
137 static int
__iso_8859_3_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)138 __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
139 mbstate_t *state)
140 {
141 return ___iso_mbtowc (r, pwc, s, n, 1, state);
142 }
143
144 static int
__iso_8859_4_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)145 __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
146 mbstate_t *state)
147 {
148 return ___iso_mbtowc (r, pwc, s, n, 2, state);
149 }
150
151 static int
__iso_8859_5_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)152 __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
153 mbstate_t *state)
154 {
155 return ___iso_mbtowc (r, pwc, s, n, 3, state);
156 }
157
158 static int
__iso_8859_6_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)159 __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
160 mbstate_t *state)
161 {
162 return ___iso_mbtowc (r, pwc, s, n, 4, state);
163 }
164
165 static int
__iso_8859_7_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)166 __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
167 mbstate_t *state)
168 {
169 return ___iso_mbtowc (r, pwc, s, n, 5, state);
170 }
171
172 static int
__iso_8859_8_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)173 __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
174 mbstate_t *state)
175 {
176 return ___iso_mbtowc (r, pwc, s, n, 6, state);
177 }
178
179 static int
__iso_8859_9_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)180 __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
181 mbstate_t *state)
182 {
183 return ___iso_mbtowc (r, pwc, s, n, 7, state);
184 }
185
186 static int
__iso_8859_10_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)187 __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
188 mbstate_t *state)
189 {
190 return ___iso_mbtowc (r, pwc, s, n, 8, state);
191 }
192
193 static int
__iso_8859_11_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)194 __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
195 mbstate_t *state)
196 {
197 return ___iso_mbtowc (r, pwc, s, n, 9, state);
198 }
199
200 static int
__iso_8859_13_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)201 __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
202 mbstate_t *state)
203 {
204 return ___iso_mbtowc (r, pwc, s, n, 10, state);
205 }
206
207 static int
__iso_8859_14_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)208 __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
209 mbstate_t *state)
210 {
211 return ___iso_mbtowc (r, pwc, s, n, 11, state);
212 }
213
214 static int
__iso_8859_15_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)215 __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
216 mbstate_t *state)
217 {
218 return ___iso_mbtowc (r, pwc, s, n, 12, state);
219 }
220
221 static int
__iso_8859_16_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)222 __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
223 mbstate_t *state)
224 {
225 return ___iso_mbtowc (r, pwc, s, n, 13, state);
226 }
227
228 static mbtowc_p __iso_8859_mbtowc[17] = {
229 NULL,
230 __iso_8859_1_mbtowc,
231 __iso_8859_2_mbtowc,
232 __iso_8859_3_mbtowc,
233 __iso_8859_4_mbtowc,
234 __iso_8859_5_mbtowc,
235 __iso_8859_6_mbtowc,
236 __iso_8859_7_mbtowc,
237 __iso_8859_8_mbtowc,
238 __iso_8859_9_mbtowc,
239 __iso_8859_10_mbtowc,
240 __iso_8859_11_mbtowc,
241 NULL, /* No ISO 8859-12 */
242 __iso_8859_13_mbtowc,
243 __iso_8859_14_mbtowc,
244 __iso_8859_15_mbtowc,
245 __iso_8859_16_mbtowc
246 };
247
248 /* val *MUST* be valid! All checks for validity are supposed to be
249 performed before calling this function. */
250 mbtowc_p
__iso_mbtowc(int val)251 __iso_mbtowc (int val)
252 {
253 return __iso_8859_mbtowc[val];
254 }
255 #endif /* _MB_EXTENDED_CHARSETS_ISO */
256
257 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
258 static int
___cp_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int cp_idx,mbstate_t * state)259 ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
260 int cp_idx, mbstate_t *state)
261 {
262 wchar_t dummy;
263 unsigned char *t = (unsigned char *)s;
264
265 if (pwc == NULL)
266 pwc = &dummy;
267
268 if (s == NULL)
269 return 0;
270
271 if (n == 0)
272 return -2;
273
274 if (*t >= 0x80)
275 {
276 if (cp_idx >= 0)
277 {
278 *pwc = __cp_conv[cp_idx][*t - 0x80];
279 if (*pwc == 0) /* Invalid character */
280 {
281 _REENT_ERRNO(r) = EILSEQ;
282 return -1;
283 }
284 return 1;
285 }
286 }
287
288 *pwc = (wchar_t)*t;
289
290 if (*t == '\0')
291 return 0;
292
293 return 1;
294 }
295
296 static int
__cp_437_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)297 __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
298 mbstate_t *state)
299 {
300 return ___cp_mbtowc (r, pwc, s, n, 0, state);
301 }
302
303 static int
__cp_720_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)304 __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
305 mbstate_t *state)
306 {
307 return ___cp_mbtowc (r, pwc, s, n, 1, state);
308 }
309
310 static int
__cp_737_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)311 __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
312 mbstate_t *state)
313 {
314 return ___cp_mbtowc (r, pwc, s, n, 2, state);
315 }
316
317 static int
__cp_775_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)318 __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
319 mbstate_t *state)
320 {
321 return ___cp_mbtowc (r, pwc, s, n, 3, state);
322 }
323
324 static int
__cp_850_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)325 __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
326 mbstate_t *state)
327 {
328 return ___cp_mbtowc (r, pwc, s, n, 4, state);
329 }
330
331 static int
__cp_852_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)332 __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
333 mbstate_t *state)
334 {
335 return ___cp_mbtowc (r, pwc, s, n, 5, state);
336 }
337
338 static int
__cp_855_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)339 __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
340 mbstate_t *state)
341 {
342 return ___cp_mbtowc (r, pwc, s, n, 6, state);
343 }
344
345 static int
__cp_857_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)346 __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
347 mbstate_t *state)
348 {
349 return ___cp_mbtowc (r, pwc, s, n, 7, state);
350 }
351
352 static int
__cp_858_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)353 __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
354 mbstate_t *state)
355 {
356 return ___cp_mbtowc (r, pwc, s, n, 8, state);
357 }
358
359 static int
__cp_862_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)360 __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
361 mbstate_t *state)
362 {
363 return ___cp_mbtowc (r, pwc, s, n, 9, state);
364 }
365
366 static int
__cp_866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)367 __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
368 mbstate_t *state)
369 {
370 return ___cp_mbtowc (r, pwc, s, n, 10, state);
371 }
372
373 static int
__cp_874_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)374 __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
375 mbstate_t *state)
376 {
377 return ___cp_mbtowc (r, pwc, s, n, 11, state);
378 }
379
380 static int
__cp_1125_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)381 __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
382 mbstate_t *state)
383 {
384 return ___cp_mbtowc (r, pwc, s, n, 12, state);
385 }
386
387 static int
__cp_1250_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)388 __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
389 mbstate_t *state)
390 {
391 return ___cp_mbtowc (r, pwc, s, n, 13, state);
392 }
393
394 static int
__cp_1251_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)395 __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
396 mbstate_t *state)
397 {
398 return ___cp_mbtowc (r, pwc, s, n, 14, state);
399 }
400
401 static int
__cp_1252_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)402 __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
403 mbstate_t *state)
404 {
405 return ___cp_mbtowc (r, pwc, s, n, 15, state);
406 }
407
408 static int
__cp_1253_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)409 __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
410 mbstate_t *state)
411 {
412 return ___cp_mbtowc (r, pwc, s, n, 16, state);
413 }
414
415 static int
__cp_1254_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)416 __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
417 mbstate_t *state)
418 {
419 return ___cp_mbtowc (r, pwc, s, n, 17, state);
420 }
421
422 static int
__cp_1255_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)423 __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
424 mbstate_t *state)
425 {
426 return ___cp_mbtowc (r, pwc, s, n, 18, state);
427 }
428
429 static int
__cp_1256_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)430 __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
431 mbstate_t *state)
432 {
433 return ___cp_mbtowc (r, pwc, s, n, 19, state);
434 }
435
436 static int
__cp_1257_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)437 __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
438 mbstate_t *state)
439 {
440 return ___cp_mbtowc (r, pwc, s, n, 20, state);
441 }
442
443 static int
__cp_1258_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)444 __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
445 mbstate_t *state)
446 {
447 return ___cp_mbtowc (r, pwc, s, n, 21, state);
448 }
449
450 static int
__cp_20866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)451 __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
452 mbstate_t *state)
453 {
454 return ___cp_mbtowc (r, pwc, s, n, 22, state);
455 }
456
457 static int
__cp_21866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)458 __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
459 mbstate_t *state)
460 {
461 return ___cp_mbtowc (r, pwc, s, n, 23, state);
462 }
463
464 static int
__cp_101_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)465 __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
466 mbstate_t *state)
467 {
468 return ___cp_mbtowc (r, pwc, s, n, 24, state);
469 }
470
471 static int
__cp_102_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)472 __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
473 mbstate_t *state)
474 {
475 return ___cp_mbtowc (r, pwc, s, n, 25, state);
476 }
477
478 static int
__cp_103_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)479 __cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
480 mbstate_t *state)
481 {
482 return ___cp_mbtowc (r, pwc, s, n, 26, state);
483 }
484
485 static mbtowc_p __cp_xxx_mbtowc[27] = {
486 __cp_437_mbtowc,
487 __cp_720_mbtowc,
488 __cp_737_mbtowc,
489 __cp_775_mbtowc,
490 __cp_850_mbtowc,
491 __cp_852_mbtowc,
492 __cp_855_mbtowc,
493 __cp_857_mbtowc,
494 __cp_858_mbtowc,
495 __cp_862_mbtowc,
496 __cp_866_mbtowc,
497 __cp_874_mbtowc,
498 __cp_1125_mbtowc,
499 __cp_1250_mbtowc,
500 __cp_1251_mbtowc,
501 __cp_1252_mbtowc,
502 __cp_1253_mbtowc,
503 __cp_1254_mbtowc,
504 __cp_1255_mbtowc,
505 __cp_1256_mbtowc,
506 __cp_1257_mbtowc,
507 __cp_1258_mbtowc,
508 __cp_20866_mbtowc,
509 __cp_21866_mbtowc,
510 __cp_101_mbtowc,
511 __cp_102_mbtowc,
512 __cp_103_mbtowc,
513 };
514
515 /* val *MUST* be valid! All checks for validity are supposed to be
516 performed before calling this function. */
517 mbtowc_p
__cp_mbtowc(int val)518 __cp_mbtowc (int val)
519 {
520 return __cp_xxx_mbtowc[__cp_val_index (val)];
521 }
522 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
523
524 int
__utf8_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)525 __utf8_mbtowc (
526 wchar_t *pwc,
527 const char *s,
528 size_t n,
529 mbstate_t *state)
530 {
531 wchar_t dummy;
532 unsigned char *t = (unsigned char *)s;
533 int ch;
534 int i = 0;
535
536 if (pwc == NULL)
537 pwc = &dummy;
538
539 if (s == NULL)
540 return 0;
541
542 if (n == 0)
543 return -2;
544
545 if (state->__count == 0)
546 ch = t[i++];
547 else
548 ch = state->__value.__wchb[0];
549
550 if (ch == '\0')
551 {
552 *pwc = 0;
553 state->__count = 0;
554 return 0; /* s points to the null character */
555 }
556
557 if (ch <= 0x7f)
558 {
559 /* single-byte sequence */
560 state->__count = 0;
561 *pwc = ch;
562 return 1;
563 }
564 if (ch >= 0xc0 && ch <= 0xdf)
565 {
566 /* two-byte sequence */
567 state->__value.__wchb[0] = ch;
568 if (state->__count == 0)
569 state->__count = 1;
570 else if (n < (size_t)-1)
571 ++n;
572 if (n < 2)
573 return -2;
574 ch = t[i++];
575 if (ch < 0x80 || ch > 0xbf)
576 {
577 _REENT_ERRNO(r) = EILSEQ;
578 return -1;
579 }
580 if (state->__value.__wchb[0] < 0xc2)
581 {
582 /* overlong UTF-8 sequence */
583 _REENT_ERRNO(r) = EILSEQ;
584 return -1;
585 }
586 state->__count = 0;
587 *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
588 | (wchar_t)(ch & 0x3f);
589 return i;
590 }
591 if (ch >= 0xe0 && ch <= 0xef)
592 {
593 /* three-byte sequence */
594 wchar_t tmp;
595 state->__value.__wchb[0] = ch;
596 if (state->__count == 0)
597 state->__count = 1;
598 else if (n < (size_t)-1)
599 ++n;
600 if (n < 2)
601 return -2;
602 ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
603 if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
604 {
605 /* overlong UTF-8 sequence */
606 _REENT_ERRNO(r) = EILSEQ;
607 return -1;
608 }
609 if (ch < 0x80 || ch > 0xbf)
610 {
611 _REENT_ERRNO(r) = EILSEQ;
612 return -1;
613 }
614 state->__value.__wchb[1] = ch;
615 if (state->__count == 1)
616 state->__count = 2;
617 else if (n < (size_t)-1)
618 ++n;
619 if (n < 3)
620 return -2;
621 ch = t[i++];
622 if (ch < 0x80 || ch > 0xbf)
623 {
624 _REENT_ERRNO(r) = EILSEQ;
625 return -1;
626 }
627 state->__count = 0;
628 tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
629 | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
630 | (wchar_t)(ch & 0x3f);
631 *pwc = tmp;
632 return i;
633 }
634 if (ch >= 0xf0 && ch <= 0xf4)
635 {
636 /* four-byte sequence */
637 uint32_t tmp;
638 state->__value.__wchb[0] = ch;
639 if (state->__count == 0)
640 state->__count = 1;
641 else if (n < (size_t)-1)
642 ++n;
643 if (n < 2)
644 return -2;
645 ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
646 if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
647 || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
648 {
649 /* overlong UTF-8 sequence or result is > 0x10ffff */
650 _REENT_ERRNO(r) = EILSEQ;
651 return -1;
652 }
653 if (ch < 0x80 || ch > 0xbf)
654 {
655 _REENT_ERRNO(r) = EILSEQ;
656 return -1;
657 }
658 state->__value.__wchb[1] = ch;
659 if (state->__count == 1)
660 state->__count = 2;
661 else if (n < (size_t)-1)
662 ++n;
663 if (n < 3)
664 return -2;
665 ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
666 if (ch < 0x80 || ch > 0xbf)
667 {
668 _REENT_ERRNO(r) = EILSEQ;
669 return -1;
670 }
671 state->__value.__wchb[2] = ch;
672 if (state->__count == 2)
673 state->__count = 3;
674 else if (n < (size_t)-1)
675 ++n;
676 if (state->__count == 3 && sizeof(wchar_t) == 2)
677 {
678 /* On systems which have wchar_t being UTF-16 values, the value
679 doesn't fit into a single wchar_t in this case. So what we
680 do here is to store the state with a special value of __count
681 and return the first half of a surrogate pair. The first
682 three bytes of a UTF-8 sequence are enough to generate the
683 first half of a UTF-16 surrogate pair. As return value we
684 choose to return the number of bytes actually read up to
685 here.
686 The second half of the surrogate pair is returned in case we
687 recognize the special __count value of four, and the next
688 byte is actually a valid value. See below. */
689 tmp = (uint32_t)((state->__value.__wchb[0] & (uint32_t) 0x07) << 18)
690 | (uint32_t)((state->__value.__wchb[1] & (uint32_t) 0x3f) << 12)
691 | (uint32_t)((state->__value.__wchb[2] & (uint32_t) 0x3f) << 6);
692 state->__count = 4;
693 *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
694 return i;
695 }
696 if (n < 4)
697 return -2;
698 ch = t[i++];
699 if (ch < 0x80 || ch > 0xbf)
700 {
701 _REENT_ERRNO(r) = EILSEQ;
702 return -1;
703 }
704 tmp = (((uint32_t)state->__value.__wchb[0] & 0x07) << 18)
705 | (((uint32_t)state->__value.__wchb[1] & 0x3f) << 12)
706 | (((uint32_t)state->__value.__wchb[2] & 0x3f) << 6)
707 | ((uint32_t)ch & 0x3f);
708 if (state->__count == 4 && sizeof(wchar_t) == 2)
709 /* Create the second half of the surrogate pair for systems with
710 wchar_t == UTF-16 . */
711 *pwc = 0xdc00 | (tmp & 0x3ff);
712 else
713 *pwc = tmp;
714 state->__count = 0;
715 return i;
716 }
717
718 _REENT_ERRNO(r) = EILSEQ;
719 return -1;
720 }
721
722 /* Cygwin defines its own doublebyte charset conversion functions
723 because the underlying OS requires wchar_t == UTF-16. */
724 #ifndef __CYGWIN__
725 int
__sjis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)726 __sjis_mbtowc (
727 wchar_t *pwc,
728 const char *s,
729 size_t n,
730 mbstate_t *state)
731 {
732 wchar_t dummy;
733 unsigned char *t = (unsigned char *)s;
734 int ch;
735 int i = 0;
736
737 if (pwc == NULL)
738 pwc = &dummy;
739
740 if (s == NULL)
741 return 0; /* not state-dependent */
742
743 if (n == 0)
744 return -2;
745
746 ch = t[i++];
747 if (state->__count == 0)
748 {
749 if (_issjis1 (ch))
750 {
751 state->__value.__wchb[0] = ch;
752 state->__count = 1;
753 if (n <= 1)
754 return -2;
755 ch = t[i++];
756 }
757 }
758 if (state->__count == 1)
759 {
760 if (_issjis2 (ch))
761 {
762 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
763 state->__count = 0;
764 return i;
765 }
766 else
767 {
768 _REENT_ERRNO(r) = EILSEQ;
769 return -1;
770 }
771 }
772
773 *pwc = (wchar_t)*t;
774
775 if (*t == '\0')
776 return 0;
777
778 return 1;
779 }
780
781 int
__eucjp_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)782 __eucjp_mbtowc (
783 wchar_t *pwc,
784 const char *s,
785 size_t n,
786 mbstate_t *state)
787 {
788 wchar_t dummy;
789 unsigned char *t = (unsigned char *)s;
790 int ch;
791 int i = 0;
792
793 if (pwc == NULL)
794 pwc = &dummy;
795
796 if (s == NULL)
797 return 0;
798
799 if (n == 0)
800 return -2;
801
802 ch = t[i++];
803 if (state->__count == 0)
804 {
805 if (_iseucjp1 (ch))
806 {
807 state->__value.__wchb[0] = ch;
808 state->__count = 1;
809 if (n <= 1)
810 return -2;
811 ch = t[i++];
812 }
813 }
814 if (state->__count == 1)
815 {
816 if (_iseucjp2 (ch))
817 {
818 if (state->__value.__wchb[0] == 0x8f)
819 {
820 state->__value.__wchb[1] = ch;
821 state->__count = 2;
822 if (n <= (size_t) i)
823 return -2;
824 ch = t[i++];
825 }
826 else
827 {
828 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
829 state->__count = 0;
830 return i;
831 }
832 }
833 else
834 {
835 _REENT_ERRNO(r) = EILSEQ;
836 return -1;
837 }
838 }
839 if (state->__count == 2)
840 {
841 if (_iseucjp2 (ch))
842 {
843 *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
844 + (wchar_t)(ch & 0x7f);
845 state->__count = 0;
846 return i;
847 }
848 else
849 {
850 _REENT_ERRNO(r) = EILSEQ;
851 return -1;
852 }
853 }
854
855 *pwc = (wchar_t)*t;
856
857 if (*t == '\0')
858 return 0;
859
860 return 1;
861 }
862
863 int
__jis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)864 __jis_mbtowc (
865 wchar_t *pwc,
866 const char *s,
867 size_t n,
868 mbstate_t *state)
869 {
870 wchar_t dummy;
871 unsigned char *t = (unsigned char *)s;
872 JIS_STATE curr_state;
873 JIS_ACTION action;
874 JIS_CHAR_TYPE ch;
875 unsigned char *ptr;
876 unsigned int i;
877 int curr_ch;
878
879 if (pwc == NULL)
880 pwc = &dummy;
881
882 if (s == NULL)
883 {
884 state->__state = ASCII;
885 return 1; /* state-dependent */
886 }
887
888 if (n == 0)
889 return -2;
890
891 curr_state = state->__state;
892 ptr = t;
893
894 for (i = 0; i < n; ++i)
895 {
896 curr_ch = t[i];
897 switch (curr_ch)
898 {
899 case ESC_CHAR:
900 ch = ESCAPE;
901 break;
902 case '$':
903 ch = DOLLAR;
904 break;
905 case '@':
906 ch = AT;
907 break;
908 case '(':
909 ch = BRACKET;
910 break;
911 case 'B':
912 ch = B;
913 break;
914 case 'J':
915 ch = J;
916 break;
917 case '\0':
918 ch = NUL;
919 break;
920 default:
921 if (_isjis (curr_ch))
922 ch = JIS_CHAR;
923 else
924 ch = OTHER;
925 }
926
927 action = JIS_action_table[curr_state][ch];
928 curr_state = JIS_state_table[curr_state][ch];
929
930 switch (action)
931 {
932 case NOOP:
933 break;
934 case EMPTY:
935 state->__state = ASCII;
936 *pwc = (wchar_t)0;
937 return 0;
938 case COPY_A:
939 state->__state = ASCII;
940 *pwc = (wchar_t)*ptr;
941 return (i + 1);
942 case COPY_J1:
943 state->__value.__wchb[0] = t[i];
944 break;
945 case COPY_J2:
946 state->__state = JIS;
947 *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
948 return (i + 1);
949 case MAKE_A:
950 ptr = (unsigned char *)(t + i + 1);
951 break;
952 case ERROR:
953 default:
954 _REENT_ERRNO(r) = EILSEQ;
955 return -1;
956 }
957
958 }
959
960 state->__state = curr_state;
961 return -2; /* n < bytes needed */
962 }
963 #endif /* !__CYGWIN__*/
964 #endif /* _MB_CAPABLE */
965