1 /*
2 Copyright (c) 1990 Regents of the University of California.
3 All rights reserved.
4  */
5 #include <newlib.h>
6 #include <stdlib.h>
7 #include <locale.h>
8 #include "mbctype.h"
9 #include <wchar.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include "local.h"
14 
15 int
__ascii_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)16 __ascii_mbtowc (
17         wchar_t       *pwc,
18         const char    *s,
19         size_t         n,
20         mbstate_t      *state)
21 {
22   wchar_t dummy;
23   unsigned char *t = (unsigned char *)s;
24 
25   (void) state;
26   if (pwc == NULL)
27     pwc = &dummy;
28 
29   if (s == NULL)
30     return 0;
31 
32   if (n == 0)
33     return -2;
34 
35   *pwc = (wchar_t)*t;
36 
37   if (*t == '\0')
38     return 0;
39 
40   return 1;
41 }
42 
43 #ifdef _MB_CAPABLE
44 typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
45                NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
46 typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
47                INV, JIS_S_NUM } JIS_STATE;
48 typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
49 
50 /**************************************************************************************
51  * state/action tables for processing JIS encoding
52  * Where possible, switches to JIS are grouped with proceding JIS characters and switches
53  * to ASCII are grouped with preceding JIS characters.  Thus, maximum returned length
54  * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
55  *************************************************************************************/
56 
57 #ifndef  __CYGWIN__
58 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
59 /*              ESCAPE   DOLLAR    BRACKET   AT       B       J        NUL      JIS_CHAR  OTHER */
60 /* ASCII */   { A_ESC,   ASCII,    ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
61 /* JIS */     { J_ESC,   JIS_1,    JIS_1,    JIS_1,   JIS_1,  JIS_1,   INV,     JIS_1,    INV },
62 /* A_ESC */   { ASCII,   A_ESC_DL, ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
63 /* A_ESC_DL */{ ASCII,   ASCII,    ASCII,    JIS,     JIS,    ASCII,   ASCII,   ASCII,    ASCII },
64 /* JIS_1 */   { INV,     JIS,      JIS,      JIS,     JIS,    JIS,     INV,     JIS,      INV },
65 /* J_ESC */   { INV,     INV,      J_ESC_BR, INV,     INV,    INV,     INV,     INV,      INV },
66 /* J_ESC_BR */{ INV,     INV,      INV,      INV,     ASCII,  ASCII,   INV,     INV,      INV },
67 };
68 
69 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
70 /*              ESCAPE   DOLLAR    BRACKET   AT       B        J        NUL      JIS_CHAR  OTHER */
71 /* ASCII */   { NOOP,    COPY_A,   COPY_A,   COPY_A,  COPY_A,  COPY_A,  EMPTY,   COPY_A,  COPY_A},
72 /* JIS */     { NOOP,    COPY_J1,  COPY_J1,  COPY_J1, COPY_J1, COPY_J1, ERROR,   COPY_J1, ERROR },
73 /* A_ESC */   { COPY_A,  NOOP,     COPY_A,   COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A},
74 /* A_ESC_DL */{ COPY_A,  COPY_A,   COPY_A,   NOOP,    NOOP,    COPY_A,  COPY_A,  COPY_A,  COPY_A},
75 /* JIS_1 */   { ERROR,   COPY_J2,  COPY_J2,  COPY_J2, COPY_J2, COPY_J2, ERROR,   COPY_J2, ERROR },
76 /* J_ESC */   { ERROR,   ERROR,    NOOP,     ERROR,   ERROR,   ERROR,   ERROR,   ERROR,   ERROR },
77 /* J_ESC_BR */{ ERROR,   ERROR,    ERROR,    ERROR,   MAKE_A,  MAKE_A,  ERROR,   ERROR,   ERROR },
78 };
79 #endif /* !__CYGWIN__ */
80 
81 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
82 #define __state __count
83 
84 #ifdef _MB_EXTENDED_CHARSETS_ISO
85 static int
___iso_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int iso_idx,mbstate_t * state)86 ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
87 	       int iso_idx, mbstate_t *state)
88 {
89   wchar_t dummy;
90   unsigned char *t = (unsigned char *)s;
91 
92   if (pwc == NULL)
93     pwc = &dummy;
94 
95   if (s == NULL)
96     return 0;
97 
98   if (n == 0)
99     return -2;
100 
101   if (*t >= 0xa0)
102     {
103       if (iso_idx >= 0)
104 	{
105 	  *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
106 	  if (*pwc == 0) /* Invalid character */
107 	    {
108 	      _REENT_ERRNO(r) = EILSEQ;
109 	      return -1;
110 	    }
111 	  return 1;
112 	}
113     }
114 
115   *pwc = (wchar_t) *t;
116 
117   if (*t == '\0')
118     return 0;
119 
120   return 1;
121 }
122 
123 static int
__iso_8859_1_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)124 __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
125 		     mbstate_t *state)
126 {
127   return ___iso_mbtowc (r, pwc, s, n, -1, state);
128 }
129 
130 static int
__iso_8859_2_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)131 __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
132 		     mbstate_t *state)
133 {
134   return ___iso_mbtowc (r, pwc, s, n, 0, state);
135 }
136 
137 static int
__iso_8859_3_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)138 __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
139 		     mbstate_t *state)
140 {
141   return ___iso_mbtowc (r, pwc, s, n, 1, state);
142 }
143 
144 static int
__iso_8859_4_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)145 __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
146 		     mbstate_t *state)
147 {
148   return ___iso_mbtowc (r, pwc, s, n, 2, state);
149 }
150 
151 static int
__iso_8859_5_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)152 __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
153 		     mbstate_t *state)
154 {
155   return ___iso_mbtowc (r, pwc, s, n, 3, state);
156 }
157 
158 static int
__iso_8859_6_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)159 __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
160 		     mbstate_t *state)
161 {
162   return ___iso_mbtowc (r, pwc, s, n, 4, state);
163 }
164 
165 static int
__iso_8859_7_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)166 __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
167 		     mbstate_t *state)
168 {
169   return ___iso_mbtowc (r, pwc, s, n, 5, state);
170 }
171 
172 static int
__iso_8859_8_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)173 __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
174 		     mbstate_t *state)
175 {
176   return ___iso_mbtowc (r, pwc, s, n, 6, state);
177 }
178 
179 static int
__iso_8859_9_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)180 __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
181 		     mbstate_t *state)
182 {
183   return ___iso_mbtowc (r, pwc, s, n, 7, state);
184 }
185 
186 static int
__iso_8859_10_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)187 __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
188 		      mbstate_t *state)
189 {
190   return ___iso_mbtowc (r, pwc, s, n, 8, state);
191 }
192 
193 static int
__iso_8859_11_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)194 __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
195 		      mbstate_t *state)
196 {
197   return ___iso_mbtowc (r, pwc, s, n, 9, state);
198 }
199 
200 static int
__iso_8859_13_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)201 __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
202 		      mbstate_t *state)
203 {
204   return ___iso_mbtowc (r, pwc, s, n, 10, state);
205 }
206 
207 static int
__iso_8859_14_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)208 __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
209 		      mbstate_t *state)
210 {
211   return ___iso_mbtowc (r, pwc, s, n, 11, state);
212 }
213 
214 static int
__iso_8859_15_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)215 __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
216 		      mbstate_t *state)
217 {
218   return ___iso_mbtowc (r, pwc, s, n, 12, state);
219 }
220 
221 static int
__iso_8859_16_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)222 __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
223 		      mbstate_t *state)
224 {
225   return ___iso_mbtowc (r, pwc, s, n, 13, state);
226 }
227 
228 static mbtowc_p __iso_8859_mbtowc[17] = {
229   NULL,
230   __iso_8859_1_mbtowc,
231   __iso_8859_2_mbtowc,
232   __iso_8859_3_mbtowc,
233   __iso_8859_4_mbtowc,
234   __iso_8859_5_mbtowc,
235   __iso_8859_6_mbtowc,
236   __iso_8859_7_mbtowc,
237   __iso_8859_8_mbtowc,
238   __iso_8859_9_mbtowc,
239   __iso_8859_10_mbtowc,
240   __iso_8859_11_mbtowc,
241   NULL,			/* No ISO 8859-12 */
242   __iso_8859_13_mbtowc,
243   __iso_8859_14_mbtowc,
244   __iso_8859_15_mbtowc,
245   __iso_8859_16_mbtowc
246 };
247 
248 /* val *MUST* be valid!  All checks for validity are supposed to be
249    performed before calling this function. */
250 mbtowc_p
__iso_mbtowc(int val)251 __iso_mbtowc (int val)
252 {
253   return __iso_8859_mbtowc[val];
254 }
255 #endif /* _MB_EXTENDED_CHARSETS_ISO */
256 
257 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
258 static int
___cp_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int cp_idx,mbstate_t * state)259 ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
260 	      int cp_idx, mbstate_t *state)
261 {
262   wchar_t dummy;
263   unsigned char *t = (unsigned char *)s;
264 
265   if (pwc == NULL)
266     pwc = &dummy;
267 
268   if (s == NULL)
269     return 0;
270 
271   if (n == 0)
272     return -2;
273 
274   if (*t >= 0x80)
275     {
276       if (cp_idx >= 0)
277 	{
278 	  *pwc = __cp_conv[cp_idx][*t - 0x80];
279 	  if (*pwc == 0) /* Invalid character */
280 	    {
281 	      _REENT_ERRNO(r) = EILSEQ;
282 	      return -1;
283 	    }
284 	  return 1;
285 	}
286     }
287 
288   *pwc = (wchar_t)*t;
289 
290   if (*t == '\0')
291     return 0;
292 
293   return 1;
294 }
295 
296 static int
__cp_437_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)297 __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
298 		 mbstate_t *state)
299 {
300   return ___cp_mbtowc (r, pwc, s, n, 0, state);
301 }
302 
303 static int
__cp_720_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)304 __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
305 		 mbstate_t *state)
306 {
307   return ___cp_mbtowc (r, pwc, s, n, 1, state);
308 }
309 
310 static int
__cp_737_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)311 __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
312 		 mbstate_t *state)
313 {
314   return ___cp_mbtowc (r, pwc, s, n, 2, state);
315 }
316 
317 static int
__cp_775_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)318 __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
319 		 mbstate_t *state)
320 {
321   return ___cp_mbtowc (r, pwc, s, n, 3, state);
322 }
323 
324 static int
__cp_850_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)325 __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
326 		 mbstate_t *state)
327 {
328   return ___cp_mbtowc (r, pwc, s, n, 4, state);
329 }
330 
331 static int
__cp_852_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)332 __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
333 		 mbstate_t *state)
334 {
335   return ___cp_mbtowc (r, pwc, s, n, 5, state);
336 }
337 
338 static int
__cp_855_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)339 __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
340 		 mbstate_t *state)
341 {
342   return ___cp_mbtowc (r, pwc, s, n, 6, state);
343 }
344 
345 static int
__cp_857_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)346 __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
347 		 mbstate_t *state)
348 {
349   return ___cp_mbtowc (r, pwc, s, n, 7, state);
350 }
351 
352 static int
__cp_858_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)353 __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
354 		 mbstate_t *state)
355 {
356   return ___cp_mbtowc (r, pwc, s, n, 8, state);
357 }
358 
359 static int
__cp_862_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)360 __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
361 		 mbstate_t *state)
362 {
363   return ___cp_mbtowc (r, pwc, s, n, 9, state);
364 }
365 
366 static int
__cp_866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)367 __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
368 		 mbstate_t *state)
369 {
370   return ___cp_mbtowc (r, pwc, s, n, 10, state);
371 }
372 
373 static int
__cp_874_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)374 __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
375 		 mbstate_t *state)
376 {
377   return ___cp_mbtowc (r, pwc, s, n, 11, state);
378 }
379 
380 static int
__cp_1125_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)381 __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
382 		  mbstate_t *state)
383 {
384   return ___cp_mbtowc (r, pwc, s, n, 12, state);
385 }
386 
387 static int
__cp_1250_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)388 __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
389 		  mbstate_t *state)
390 {
391   return ___cp_mbtowc (r, pwc, s, n, 13, state);
392 }
393 
394 static int
__cp_1251_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)395 __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
396 		  mbstate_t *state)
397 {
398   return ___cp_mbtowc (r, pwc, s, n, 14, state);
399 }
400 
401 static int
__cp_1252_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)402 __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
403 		  mbstate_t *state)
404 {
405   return ___cp_mbtowc (r, pwc, s, n, 15, state);
406 }
407 
408 static int
__cp_1253_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)409 __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
410 		  mbstate_t *state)
411 {
412   return ___cp_mbtowc (r, pwc, s, n, 16, state);
413 }
414 
415 static int
__cp_1254_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)416 __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
417 		  mbstate_t *state)
418 {
419   return ___cp_mbtowc (r, pwc, s, n, 17, state);
420 }
421 
422 static int
__cp_1255_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)423 __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
424 		  mbstate_t *state)
425 {
426   return ___cp_mbtowc (r, pwc, s, n, 18, state);
427 }
428 
429 static int
__cp_1256_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)430 __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
431 		  mbstate_t *state)
432 {
433   return ___cp_mbtowc (r, pwc, s, n, 19, state);
434 }
435 
436 static int
__cp_1257_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)437 __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
438 		  mbstate_t *state)
439 {
440   return ___cp_mbtowc (r, pwc, s, n, 20, state);
441 }
442 
443 static int
__cp_1258_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)444 __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
445 		  mbstate_t *state)
446 {
447   return ___cp_mbtowc (r, pwc, s, n, 21, state);
448 }
449 
450 static int
__cp_20866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)451 __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
452 		   mbstate_t *state)
453 {
454   return ___cp_mbtowc (r, pwc, s, n, 22, state);
455 }
456 
457 static int
__cp_21866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)458 __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
459 		   mbstate_t *state)
460 {
461   return ___cp_mbtowc (r, pwc, s, n, 23, state);
462 }
463 
464 static int
__cp_101_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)465 __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
466 		 mbstate_t *state)
467 {
468   return ___cp_mbtowc (r, pwc, s, n, 24, state);
469 }
470 
471 static int
__cp_102_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)472 __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
473 		 mbstate_t *state)
474 {
475   return ___cp_mbtowc (r, pwc, s, n, 25, state);
476 }
477 
478 static int
__cp_103_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)479 __cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
480 		 mbstate_t *state)
481 {
482   return ___cp_mbtowc (r, pwc, s, n, 26, state);
483 }
484 
485 static mbtowc_p __cp_xxx_mbtowc[27] = {
486   __cp_437_mbtowc,
487   __cp_720_mbtowc,
488   __cp_737_mbtowc,
489   __cp_775_mbtowc,
490   __cp_850_mbtowc,
491   __cp_852_mbtowc,
492   __cp_855_mbtowc,
493   __cp_857_mbtowc,
494   __cp_858_mbtowc,
495   __cp_862_mbtowc,
496   __cp_866_mbtowc,
497   __cp_874_mbtowc,
498   __cp_1125_mbtowc,
499   __cp_1250_mbtowc,
500   __cp_1251_mbtowc,
501   __cp_1252_mbtowc,
502   __cp_1253_mbtowc,
503   __cp_1254_mbtowc,
504   __cp_1255_mbtowc,
505   __cp_1256_mbtowc,
506   __cp_1257_mbtowc,
507   __cp_1258_mbtowc,
508   __cp_20866_mbtowc,
509   __cp_21866_mbtowc,
510   __cp_101_mbtowc,
511   __cp_102_mbtowc,
512   __cp_103_mbtowc,
513 };
514 
515 /* val *MUST* be valid!  All checks for validity are supposed to be
516    performed before calling this function. */
517 mbtowc_p
__cp_mbtowc(int val)518 __cp_mbtowc (int val)
519 {
520   return __cp_xxx_mbtowc[__cp_val_index (val)];
521 }
522 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
523 
524 int
__utf8_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)525 __utf8_mbtowc (
526         wchar_t       *pwc,
527         const char    *s,
528         size_t         n,
529         mbstate_t      *state)
530 {
531   wchar_t dummy;
532   unsigned char *t = (unsigned char *)s;
533   int ch;
534   int i = 0;
535 
536   if (pwc == NULL)
537     pwc = &dummy;
538 
539   if (s == NULL)
540     return 0;
541 
542   if (n == 0)
543     return -2;
544 
545   if (state->__count == 0)
546     ch = t[i++];
547   else
548     ch = state->__value.__wchb[0];
549 
550   if (ch == '\0')
551     {
552       *pwc = 0;
553       state->__count = 0;
554       return 0; /* s points to the null character */
555     }
556 
557   if (ch <= 0x7f)
558     {
559       /* single-byte sequence */
560       state->__count = 0;
561       *pwc = ch;
562       return 1;
563     }
564   if (ch >= 0xc0 && ch <= 0xdf)
565     {
566       /* two-byte sequence */
567       state->__value.__wchb[0] = ch;
568       if (state->__count == 0)
569 	state->__count = 1;
570       else if (n < (size_t)-1)
571 	++n;
572       if (n < 2)
573 	return -2;
574       ch = t[i++];
575       if (ch < 0x80 || ch > 0xbf)
576 	{
577 	  _REENT_ERRNO(r) = EILSEQ;
578 	  return -1;
579 	}
580       if (state->__value.__wchb[0] < 0xc2)
581 	{
582 	  /* overlong UTF-8 sequence */
583 	  _REENT_ERRNO(r) = EILSEQ;
584 	  return -1;
585 	}
586       state->__count = 0;
587       *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
588 	|    (wchar_t)(ch & 0x3f);
589       return i;
590     }
591   if (ch >= 0xe0 && ch <= 0xef)
592     {
593       /* three-byte sequence */
594       wchar_t tmp;
595       state->__value.__wchb[0] = ch;
596       if (state->__count == 0)
597 	state->__count = 1;
598       else if (n < (size_t)-1)
599 	++n;
600       if (n < 2)
601 	return -2;
602       ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
603       if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
604 	{
605 	  /* overlong UTF-8 sequence */
606 	  _REENT_ERRNO(r) = EILSEQ;
607 	  return -1;
608 	}
609       if (ch < 0x80 || ch > 0xbf)
610 	{
611 	  _REENT_ERRNO(r) = EILSEQ;
612 	  return -1;
613 	}
614       state->__value.__wchb[1] = ch;
615       if (state->__count == 1)
616 	state->__count = 2;
617       else if (n < (size_t)-1)
618 	++n;
619       if (n < 3)
620 	return -2;
621       ch = t[i++];
622       if (ch < 0x80 || ch > 0xbf)
623 	{
624 	  _REENT_ERRNO(r) = EILSEQ;
625 	  return -1;
626 	}
627       state->__count = 0;
628       tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
629 	|    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
630 	|     (wchar_t)(ch & 0x3f);
631       *pwc = tmp;
632       return i;
633     }
634   if (ch >= 0xf0 && ch <= 0xf4)
635     {
636       /* four-byte sequence */
637       uint32_t tmp;
638       state->__value.__wchb[0] = ch;
639       if (state->__count == 0)
640 	state->__count = 1;
641       else if (n < (size_t)-1)
642 	++n;
643       if (n < 2)
644 	return -2;
645       ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
646       if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
647 	  || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
648 	{
649 	  /* overlong UTF-8 sequence or result is > 0x10ffff */
650 	  _REENT_ERRNO(r) = EILSEQ;
651 	  return -1;
652 	}
653       if (ch < 0x80 || ch > 0xbf)
654 	{
655 	  _REENT_ERRNO(r) = EILSEQ;
656 	  return -1;
657 	}
658       state->__value.__wchb[1] = ch;
659       if (state->__count == 1)
660 	state->__count = 2;
661       else if (n < (size_t)-1)
662 	++n;
663       if (n < 3)
664 	return -2;
665       ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
666       if (ch < 0x80 || ch > 0xbf)
667 	{
668 	  _REENT_ERRNO(r) = EILSEQ;
669 	  return -1;
670 	}
671       state->__value.__wchb[2] = ch;
672       if (state->__count == 2)
673 	state->__count = 3;
674       else if (n < (size_t)-1)
675 	++n;
676       if (state->__count == 3 && sizeof(wchar_t) == 2)
677 	{
678 	  /* On systems which have wchar_t being UTF-16 values, the value
679 	     doesn't fit into a single wchar_t in this case.  So what we
680 	     do here is to store the state with a special value of __count
681 	     and return the first half of a surrogate pair.  The first
682 	     three bytes of a UTF-8 sequence are enough to generate the
683 	     first half of a UTF-16 surrogate pair.  As return value we
684 	     choose to return the number of bytes actually read up to
685 	     here.
686 	     The second half of the surrogate pair is returned in case we
687 	     recognize the special __count value of four, and the next
688 	     byte is actually a valid value.  See below. */
689             tmp = (uint32_t)((state->__value.__wchb[0] & (uint32_t) 0x07) << 18)
690                 |   (uint32_t)((state->__value.__wchb[1] & (uint32_t) 0x3f) << 12)
691                 |   (uint32_t)((state->__value.__wchb[2] & (uint32_t) 0x3f) << 6);
692 	  state->__count = 4;
693 	  *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
694 	  return i;
695 	}
696       if (n < 4)
697 	return -2;
698       ch = t[i++];
699       if (ch < 0x80 || ch > 0xbf)
700 	{
701 	  _REENT_ERRNO(r) = EILSEQ;
702 	  return -1;
703 	}
704       tmp = (((uint32_t)state->__value.__wchb[0] & 0x07) << 18)
705         |   (((uint32_t)state->__value.__wchb[1] & 0x3f) << 12)
706         |   (((uint32_t)state->__value.__wchb[2] & 0x3f) << 6)
707         |   ((uint32_t)ch & 0x3f);
708       if (state->__count == 4 && sizeof(wchar_t) == 2)
709 	/* Create the second half of the surrogate pair for systems with
710 	   wchar_t == UTF-16 . */
711 	*pwc = 0xdc00 | (tmp & 0x3ff);
712       else
713 	*pwc = tmp;
714       state->__count = 0;
715       return i;
716     }
717 
718   _REENT_ERRNO(r) = EILSEQ;
719   return -1;
720 }
721 
722 /* Cygwin defines its own doublebyte charset conversion functions
723    because the underlying OS requires wchar_t == UTF-16. */
724 #ifndef  __CYGWIN__
725 int
__sjis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)726 __sjis_mbtowc (
727         wchar_t       *pwc,
728         const char    *s,
729         size_t         n,
730         mbstate_t      *state)
731 {
732   wchar_t dummy;
733   unsigned char *t = (unsigned char *)s;
734   int ch;
735   int i = 0;
736 
737   if (pwc == NULL)
738     pwc = &dummy;
739 
740   if (s == NULL)
741     return 0;  /* not state-dependent */
742 
743   if (n == 0)
744     return -2;
745 
746   ch = t[i++];
747   if (state->__count == 0)
748     {
749       if (_issjis1 (ch))
750 	{
751 	  state->__value.__wchb[0] = ch;
752 	  state->__count = 1;
753 	  if (n <= 1)
754 	    return -2;
755 	  ch = t[i++];
756 	}
757     }
758   if (state->__count == 1)
759     {
760       if (_issjis2 (ch))
761 	{
762 	  *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
763 	  state->__count = 0;
764 	  return i;
765 	}
766       else
767 	{
768 	  _REENT_ERRNO(r) = EILSEQ;
769 	  return -1;
770 	}
771     }
772 
773   *pwc = (wchar_t)*t;
774 
775   if (*t == '\0')
776     return 0;
777 
778   return 1;
779 }
780 
781 int
__eucjp_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)782 __eucjp_mbtowc (
783         wchar_t       *pwc,
784         const char    *s,
785         size_t         n,
786         mbstate_t      *state)
787 {
788   wchar_t dummy;
789   unsigned char *t = (unsigned char *)s;
790   int ch;
791   int i = 0;
792 
793   if (pwc == NULL)
794     pwc = &dummy;
795 
796   if (s == NULL)
797     return 0;
798 
799   if (n == 0)
800     return -2;
801 
802   ch = t[i++];
803   if (state->__count == 0)
804     {
805       if (_iseucjp1 (ch))
806 	{
807 	  state->__value.__wchb[0] = ch;
808 	  state->__count = 1;
809 	  if (n <= 1)
810 	    return -2;
811 	  ch = t[i++];
812 	}
813     }
814   if (state->__count == 1)
815     {
816       if (_iseucjp2 (ch))
817 	{
818 	  if (state->__value.__wchb[0] == 0x8f)
819 	    {
820 	      state->__value.__wchb[1] = ch;
821 	      state->__count = 2;
822 	      if (n <= (size_t) i)
823 		return -2;
824 	      ch = t[i++];
825 	    }
826 	  else
827 	    {
828 	      *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
829 	      state->__count = 0;
830 	      return i;
831 	    }
832 	}
833       else
834 	{
835 	  _REENT_ERRNO(r) = EILSEQ;
836 	  return -1;
837 	}
838     }
839   if (state->__count == 2)
840     {
841       if (_iseucjp2 (ch))
842 	{
843 	  *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
844 		 + (wchar_t)(ch & 0x7f);
845 	  state->__count = 0;
846 	  return i;
847 	}
848       else
849 	{
850 	  _REENT_ERRNO(r) = EILSEQ;
851 	  return -1;
852 	}
853     }
854 
855   *pwc = (wchar_t)*t;
856 
857   if (*t == '\0')
858     return 0;
859 
860   return 1;
861 }
862 
863 int
__jis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)864 __jis_mbtowc (
865         wchar_t       *pwc,
866         const char    *s,
867         size_t         n,
868         mbstate_t      *state)
869 {
870   wchar_t dummy;
871   unsigned char *t = (unsigned char *)s;
872   JIS_STATE curr_state;
873   JIS_ACTION action;
874   JIS_CHAR_TYPE ch;
875   unsigned char *ptr;
876   unsigned int i;
877   int curr_ch;
878 
879   if (pwc == NULL)
880     pwc = &dummy;
881 
882   if (s == NULL)
883     {
884       state->__state = ASCII;
885       return 1;  /* state-dependent */
886     }
887 
888   if (n == 0)
889     return -2;
890 
891   curr_state = state->__state;
892   ptr = t;
893 
894   for (i = 0; i < n; ++i)
895     {
896       curr_ch = t[i];
897       switch (curr_ch)
898 	{
899 	case ESC_CHAR:
900 	  ch = ESCAPE;
901 	  break;
902 	case '$':
903 	  ch = DOLLAR;
904 	  break;
905 	case '@':
906 	  ch = AT;
907 	  break;
908 	case '(':
909 	  ch = BRACKET;
910 	  break;
911 	case 'B':
912 	  ch = B;
913 	  break;
914 	case 'J':
915 	  ch = J;
916 	  break;
917 	case '\0':
918 	  ch = NUL;
919 	  break;
920 	default:
921 	  if (_isjis (curr_ch))
922 	    ch = JIS_CHAR;
923 	  else
924 	    ch = OTHER;
925 	}
926 
927       action = JIS_action_table[curr_state][ch];
928       curr_state = JIS_state_table[curr_state][ch];
929 
930       switch (action)
931 	{
932 	case NOOP:
933 	  break;
934 	case EMPTY:
935 	  state->__state = ASCII;
936 	  *pwc = (wchar_t)0;
937 	  return 0;
938 	case COPY_A:
939 	  state->__state = ASCII;
940 	  *pwc = (wchar_t)*ptr;
941 	  return (i + 1);
942 	case COPY_J1:
943 	  state->__value.__wchb[0] = t[i];
944 	  break;
945 	case COPY_J2:
946 	  state->__state = JIS;
947 	  *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
948 	  return (i + 1);
949 	case MAKE_A:
950 	  ptr = (unsigned char *)(t + i + 1);
951 	  break;
952 	case ERROR:
953 	default:
954 	  _REENT_ERRNO(r) = EILSEQ;
955 	  return -1;
956 	}
957 
958     }
959 
960   state->__state = curr_state;
961   return -2;  /* n < bytes needed */
962 }
963 #endif /* !__CYGWIN__*/
964 #endif /* _MB_CAPABLE */
965