1 /*
2 Copyright (c) 1990 Regents of the University of California.
3 All rights reserved.
4  */
5 #include <newlib.h>
6 #include <stdlib.h>
7 #include <locale.h>
8 #include "mbctype.h"
9 #include <wchar.h>
10 #include <string.h>
11 #include <errno.h>
12 #include "local.h"
13 
14 int
__ascii_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)15 __ascii_mbtowc (
16         wchar_t       *pwc,
17         const char    *s,
18         size_t         n,
19         mbstate_t      *state)
20 {
21   wchar_t dummy;
22   unsigned char *t = (unsigned char *)s;
23 
24   (void) state;
25   if (pwc == NULL)
26     pwc = &dummy;
27 
28   if (s == NULL)
29     return 0;
30 
31   if (n == 0)
32     return -2;
33 
34 #ifdef __CYGWIN__
35   if ((wchar_t)*t >= 0x80)
36     {
37       _REENT_ERRNO(r) = EILSEQ;
38       return -1;
39     }
40 #endif
41 
42   *pwc = (wchar_t)*t;
43 
44   if (*t == '\0')
45     return 0;
46 
47   return 1;
48 }
49 
50 #ifdef _MB_CAPABLE
51 typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
52                NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
53 typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
54                INV, JIS_S_NUM } JIS_STATE;
55 typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
56 
57 /**************************************************************************************
58  * state/action tables for processing JIS encoding
59  * Where possible, switches to JIS are grouped with proceding JIS characters and switches
60  * to ASCII are grouped with preceding JIS characters.  Thus, maximum returned length
61  * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
62  *************************************************************************************/
63 
64 #ifndef  __CYGWIN__
65 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
66 /*              ESCAPE   DOLLAR    BRACKET   AT       B       J        NUL      JIS_CHAR  OTHER */
67 /* ASCII */   { A_ESC,   ASCII,    ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
68 /* JIS */     { J_ESC,   JIS_1,    JIS_1,    JIS_1,   JIS_1,  JIS_1,   INV,     JIS_1,    INV },
69 /* A_ESC */   { ASCII,   A_ESC_DL, ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
70 /* A_ESC_DL */{ ASCII,   ASCII,    ASCII,    JIS,     JIS,    ASCII,   ASCII,   ASCII,    ASCII },
71 /* JIS_1 */   { INV,     JIS,      JIS,      JIS,     JIS,    JIS,     INV,     JIS,      INV },
72 /* J_ESC */   { INV,     INV,      J_ESC_BR, INV,     INV,    INV,     INV,     INV,      INV },
73 /* J_ESC_BR */{ INV,     INV,      INV,      INV,     ASCII,  ASCII,   INV,     INV,      INV },
74 };
75 
76 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
77 /*              ESCAPE   DOLLAR    BRACKET   AT       B        J        NUL      JIS_CHAR  OTHER */
78 /* ASCII */   { NOOP,    COPY_A,   COPY_A,   COPY_A,  COPY_A,  COPY_A,  EMPTY,   COPY_A,  COPY_A},
79 /* JIS */     { NOOP,    COPY_J1,  COPY_J1,  COPY_J1, COPY_J1, COPY_J1, ERROR,   COPY_J1, ERROR },
80 /* A_ESC */   { COPY_A,  NOOP,     COPY_A,   COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A},
81 /* A_ESC_DL */{ COPY_A,  COPY_A,   COPY_A,   NOOP,    NOOP,    COPY_A,  COPY_A,  COPY_A,  COPY_A},
82 /* JIS_1 */   { ERROR,   COPY_J2,  COPY_J2,  COPY_J2, COPY_J2, COPY_J2, ERROR,   COPY_J2, ERROR },
83 /* J_ESC */   { ERROR,   ERROR,    NOOP,     ERROR,   ERROR,   ERROR,   ERROR,   ERROR,   ERROR },
84 /* J_ESC_BR */{ ERROR,   ERROR,    ERROR,    ERROR,   MAKE_A,  MAKE_A,  ERROR,   ERROR,   ERROR },
85 };
86 #endif /* !__CYGWIN__ */
87 
88 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
89 #define __state __count
90 
91 #ifdef _MB_EXTENDED_CHARSETS_ISO
92 static int
___iso_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int iso_idx,mbstate_t * state)93 ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
94 	       int iso_idx, mbstate_t *state)
95 {
96   wchar_t dummy;
97   unsigned char *t = (unsigned char *)s;
98 
99   if (pwc == NULL)
100     pwc = &dummy;
101 
102   if (s == NULL)
103     return 0;
104 
105   if (n == 0)
106     return -2;
107 
108   if (*t >= 0xa0)
109     {
110       if (iso_idx >= 0)
111 	{
112 	  *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
113 	  if (*pwc == 0) /* Invalid character */
114 	    {
115 	      _REENT_ERRNO(r) = EILSEQ;
116 	      return -1;
117 	    }
118 	  return 1;
119 	}
120     }
121 
122   *pwc = (wchar_t) *t;
123 
124   if (*t == '\0')
125     return 0;
126 
127   return 1;
128 }
129 
130 static int
__iso_8859_1_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)131 __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
132 		     mbstate_t *state)
133 {
134   return ___iso_mbtowc (r, pwc, s, n, -1, state);
135 }
136 
137 static int
__iso_8859_2_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)138 __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
139 		     mbstate_t *state)
140 {
141   return ___iso_mbtowc (r, pwc, s, n, 0, state);
142 }
143 
144 static int
__iso_8859_3_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)145 __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
146 		     mbstate_t *state)
147 {
148   return ___iso_mbtowc (r, pwc, s, n, 1, state);
149 }
150 
151 static int
__iso_8859_4_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)152 __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
153 		     mbstate_t *state)
154 {
155   return ___iso_mbtowc (r, pwc, s, n, 2, state);
156 }
157 
158 static int
__iso_8859_5_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)159 __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
160 		     mbstate_t *state)
161 {
162   return ___iso_mbtowc (r, pwc, s, n, 3, state);
163 }
164 
165 static int
__iso_8859_6_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)166 __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
167 		     mbstate_t *state)
168 {
169   return ___iso_mbtowc (r, pwc, s, n, 4, state);
170 }
171 
172 static int
__iso_8859_7_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)173 __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
174 		     mbstate_t *state)
175 {
176   return ___iso_mbtowc (r, pwc, s, n, 5, state);
177 }
178 
179 static int
__iso_8859_8_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)180 __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
181 		     mbstate_t *state)
182 {
183   return ___iso_mbtowc (r, pwc, s, n, 6, state);
184 }
185 
186 static int
__iso_8859_9_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)187 __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
188 		     mbstate_t *state)
189 {
190   return ___iso_mbtowc (r, pwc, s, n, 7, state);
191 }
192 
193 static int
__iso_8859_10_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)194 __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
195 		      mbstate_t *state)
196 {
197   return ___iso_mbtowc (r, pwc, s, n, 8, state);
198 }
199 
200 static int
__iso_8859_11_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)201 __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
202 		      mbstate_t *state)
203 {
204   return ___iso_mbtowc (r, pwc, s, n, 9, state);
205 }
206 
207 static int
__iso_8859_13_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)208 __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
209 		      mbstate_t *state)
210 {
211   return ___iso_mbtowc (r, pwc, s, n, 10, state);
212 }
213 
214 static int
__iso_8859_14_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)215 __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
216 		      mbstate_t *state)
217 {
218   return ___iso_mbtowc (r, pwc, s, n, 11, state);
219 }
220 
221 static int
__iso_8859_15_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)222 __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
223 		      mbstate_t *state)
224 {
225   return ___iso_mbtowc (r, pwc, s, n, 12, state);
226 }
227 
228 static int
__iso_8859_16_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)229 __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
230 		      mbstate_t *state)
231 {
232   return ___iso_mbtowc (r, pwc, s, n, 13, state);
233 }
234 
235 static mbtowc_p __iso_8859_mbtowc[17] = {
236   NULL,
237   __iso_8859_1_mbtowc,
238   __iso_8859_2_mbtowc,
239   __iso_8859_3_mbtowc,
240   __iso_8859_4_mbtowc,
241   __iso_8859_5_mbtowc,
242   __iso_8859_6_mbtowc,
243   __iso_8859_7_mbtowc,
244   __iso_8859_8_mbtowc,
245   __iso_8859_9_mbtowc,
246   __iso_8859_10_mbtowc,
247   __iso_8859_11_mbtowc,
248   NULL,			/* No ISO 8859-12 */
249   __iso_8859_13_mbtowc,
250   __iso_8859_14_mbtowc,
251   __iso_8859_15_mbtowc,
252   __iso_8859_16_mbtowc
253 };
254 
255 /* val *MUST* be valid!  All checks for validity are supposed to be
256    performed before calling this function. */
257 mbtowc_p
__iso_mbtowc(int val)258 __iso_mbtowc (int val)
259 {
260   return __iso_8859_mbtowc[val];
261 }
262 #endif /* _MB_EXTENDED_CHARSETS_ISO */
263 
264 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
265 static int
___cp_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int cp_idx,mbstate_t * state)266 ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
267 	      int cp_idx, mbstate_t *state)
268 {
269   wchar_t dummy;
270   unsigned char *t = (unsigned char *)s;
271 
272   if (pwc == NULL)
273     pwc = &dummy;
274 
275   if (s == NULL)
276     return 0;
277 
278   if (n == 0)
279     return -2;
280 
281   if (*t >= 0x80)
282     {
283       if (cp_idx >= 0)
284 	{
285 	  *pwc = __cp_conv[cp_idx][*t - 0x80];
286 	  if (*pwc == 0) /* Invalid character */
287 	    {
288 	      _REENT_ERRNO(r) = EILSEQ;
289 	      return -1;
290 	    }
291 	  return 1;
292 	}
293     }
294 
295   *pwc = (wchar_t)*t;
296 
297   if (*t == '\0')
298     return 0;
299 
300   return 1;
301 }
302 
303 static int
__cp_437_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)304 __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
305 		 mbstate_t *state)
306 {
307   return ___cp_mbtowc (r, pwc, s, n, 0, state);
308 }
309 
310 static int
__cp_720_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)311 __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
312 		 mbstate_t *state)
313 {
314   return ___cp_mbtowc (r, pwc, s, n, 1, state);
315 }
316 
317 static int
__cp_737_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)318 __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
319 		 mbstate_t *state)
320 {
321   return ___cp_mbtowc (r, pwc, s, n, 2, state);
322 }
323 
324 static int
__cp_775_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)325 __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
326 		 mbstate_t *state)
327 {
328   return ___cp_mbtowc (r, pwc, s, n, 3, state);
329 }
330 
331 static int
__cp_850_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)332 __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
333 		 mbstate_t *state)
334 {
335   return ___cp_mbtowc (r, pwc, s, n, 4, state);
336 }
337 
338 static int
__cp_852_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)339 __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
340 		 mbstate_t *state)
341 {
342   return ___cp_mbtowc (r, pwc, s, n, 5, state);
343 }
344 
345 static int
__cp_855_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)346 __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
347 		 mbstate_t *state)
348 {
349   return ___cp_mbtowc (r, pwc, s, n, 6, state);
350 }
351 
352 static int
__cp_857_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)353 __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
354 		 mbstate_t *state)
355 {
356   return ___cp_mbtowc (r, pwc, s, n, 7, state);
357 }
358 
359 static int
__cp_858_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)360 __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
361 		 mbstate_t *state)
362 {
363   return ___cp_mbtowc (r, pwc, s, n, 8, state);
364 }
365 
366 static int
__cp_862_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)367 __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
368 		 mbstate_t *state)
369 {
370   return ___cp_mbtowc (r, pwc, s, n, 9, state);
371 }
372 
373 static int
__cp_866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)374 __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
375 		 mbstate_t *state)
376 {
377   return ___cp_mbtowc (r, pwc, s, n, 10, state);
378 }
379 
380 static int
__cp_874_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)381 __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
382 		 mbstate_t *state)
383 {
384   return ___cp_mbtowc (r, pwc, s, n, 11, state);
385 }
386 
387 static int
__cp_1125_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)388 __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
389 		  mbstate_t *state)
390 {
391   return ___cp_mbtowc (r, pwc, s, n, 12, state);
392 }
393 
394 static int
__cp_1250_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)395 __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
396 		  mbstate_t *state)
397 {
398   return ___cp_mbtowc (r, pwc, s, n, 13, state);
399 }
400 
401 static int
__cp_1251_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)402 __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
403 		  mbstate_t *state)
404 {
405   return ___cp_mbtowc (r, pwc, s, n, 14, state);
406 }
407 
408 static int
__cp_1252_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)409 __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
410 		  mbstate_t *state)
411 {
412   return ___cp_mbtowc (r, pwc, s, n, 15, state);
413 }
414 
415 static int
__cp_1253_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)416 __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
417 		  mbstate_t *state)
418 {
419   return ___cp_mbtowc (r, pwc, s, n, 16, state);
420 }
421 
422 static int
__cp_1254_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)423 __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
424 		  mbstate_t *state)
425 {
426   return ___cp_mbtowc (r, pwc, s, n, 17, state);
427 }
428 
429 static int
__cp_1255_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)430 __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
431 		  mbstate_t *state)
432 {
433   return ___cp_mbtowc (r, pwc, s, n, 18, state);
434 }
435 
436 static int
__cp_1256_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)437 __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
438 		  mbstate_t *state)
439 {
440   return ___cp_mbtowc (r, pwc, s, n, 19, state);
441 }
442 
443 static int
__cp_1257_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)444 __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
445 		  mbstate_t *state)
446 {
447   return ___cp_mbtowc (r, pwc, s, n, 20, state);
448 }
449 
450 static int
__cp_1258_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)451 __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
452 		  mbstate_t *state)
453 {
454   return ___cp_mbtowc (r, pwc, s, n, 21, state);
455 }
456 
457 static int
__cp_20866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)458 __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
459 		   mbstate_t *state)
460 {
461   return ___cp_mbtowc (r, pwc, s, n, 22, state);
462 }
463 
464 static int
__cp_21866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)465 __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
466 		   mbstate_t *state)
467 {
468   return ___cp_mbtowc (r, pwc, s, n, 23, state);
469 }
470 
471 static int
__cp_101_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)472 __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
473 		 mbstate_t *state)
474 {
475   return ___cp_mbtowc (r, pwc, s, n, 24, state);
476 }
477 
478 static int
__cp_102_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)479 __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
480 		 mbstate_t *state)
481 {
482   return ___cp_mbtowc (r, pwc, s, n, 25, state);
483 }
484 
485 static int
__cp_103_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)486 __cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
487 		 mbstate_t *state)
488 {
489   return ___cp_mbtowc (r, pwc, s, n, 26, state);
490 }
491 
492 static mbtowc_p __cp_xxx_mbtowc[27] = {
493   __cp_437_mbtowc,
494   __cp_720_mbtowc,
495   __cp_737_mbtowc,
496   __cp_775_mbtowc,
497   __cp_850_mbtowc,
498   __cp_852_mbtowc,
499   __cp_855_mbtowc,
500   __cp_857_mbtowc,
501   __cp_858_mbtowc,
502   __cp_862_mbtowc,
503   __cp_866_mbtowc,
504   __cp_874_mbtowc,
505   __cp_1125_mbtowc,
506   __cp_1250_mbtowc,
507   __cp_1251_mbtowc,
508   __cp_1252_mbtowc,
509   __cp_1253_mbtowc,
510   __cp_1254_mbtowc,
511   __cp_1255_mbtowc,
512   __cp_1256_mbtowc,
513   __cp_1257_mbtowc,
514   __cp_1258_mbtowc,
515   __cp_20866_mbtowc,
516   __cp_21866_mbtowc,
517   __cp_101_mbtowc,
518   __cp_102_mbtowc,
519   __cp_103_mbtowc,
520 };
521 
522 /* val *MUST* be valid!  All checks for validity are supposed to be
523    performed before calling this function. */
524 mbtowc_p
__cp_mbtowc(int val)525 __cp_mbtowc (int val)
526 {
527   return __cp_xxx_mbtowc[__cp_val_index (val)];
528 }
529 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
530 
531 int
__utf8_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)532 __utf8_mbtowc (
533         wchar_t       *pwc,
534         const char    *s,
535         size_t         n,
536         mbstate_t      *state)
537 {
538   wchar_t dummy;
539   unsigned char *t = (unsigned char *)s;
540   int ch;
541   int i = 0;
542 
543   if (pwc == NULL)
544     pwc = &dummy;
545 
546   if (s == NULL)
547     return 0;
548 
549   if (n == 0)
550     return -2;
551 
552   if (state->__count == 0)
553     ch = t[i++];
554   else
555     ch = state->__value.__wchb[0];
556 
557   if (ch == '\0')
558     {
559       *pwc = 0;
560       state->__count = 0;
561       return 0; /* s points to the null character */
562     }
563 
564   if (ch <= 0x7f)
565     {
566       /* single-byte sequence */
567       state->__count = 0;
568       *pwc = ch;
569       return 1;
570     }
571   if (ch >= 0xc0 && ch <= 0xdf)
572     {
573       /* two-byte sequence */
574       state->__value.__wchb[0] = ch;
575       if (state->__count == 0)
576 	state->__count = 1;
577       else if (n < (size_t)-1)
578 	++n;
579       if (n < 2)
580 	return -2;
581       ch = t[i++];
582       if (ch < 0x80 || ch > 0xbf)
583 	{
584 	  _REENT_ERRNO(r) = EILSEQ;
585 	  return -1;
586 	}
587       if (state->__value.__wchb[0] < 0xc2)
588 	{
589 	  /* overlong UTF-8 sequence */
590 	  _REENT_ERRNO(r) = EILSEQ;
591 	  return -1;
592 	}
593       state->__count = 0;
594       *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
595 	|    (wchar_t)(ch & 0x3f);
596       return i;
597     }
598   if (ch >= 0xe0 && ch <= 0xef)
599     {
600       /* three-byte sequence */
601       wchar_t tmp;
602       state->__value.__wchb[0] = ch;
603       if (state->__count == 0)
604 	state->__count = 1;
605       else if (n < (size_t)-1)
606 	++n;
607       if (n < 2)
608 	return -2;
609       ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
610       if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
611 	{
612 	  /* overlong UTF-8 sequence */
613 	  _REENT_ERRNO(r) = EILSEQ;
614 	  return -1;
615 	}
616       if (ch < 0x80 || ch > 0xbf)
617 	{
618 	  _REENT_ERRNO(r) = EILSEQ;
619 	  return -1;
620 	}
621       state->__value.__wchb[1] = ch;
622       if (state->__count == 1)
623 	state->__count = 2;
624       else if (n < (size_t)-1)
625 	++n;
626       if (n < 3)
627 	return -2;
628       ch = t[i++];
629       if (ch < 0x80 || ch > 0xbf)
630 	{
631 	  _REENT_ERRNO(r) = EILSEQ;
632 	  return -1;
633 	}
634       state->__count = 0;
635       tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
636 	|    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
637 	|     (wchar_t)(ch & 0x3f);
638       *pwc = tmp;
639       return i;
640     }
641   if (ch >= 0xf0 && ch <= 0xf4)
642     {
643       /* four-byte sequence */
644       uint32_t tmp;
645       state->__value.__wchb[0] = ch;
646       if (state->__count == 0)
647 	state->__count = 1;
648       else if (n < (size_t)-1)
649 	++n;
650       if (n < 2)
651 	return -2;
652       ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
653       if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
654 	  || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
655 	{
656 	  /* overlong UTF-8 sequence or result is > 0x10ffff */
657 	  _REENT_ERRNO(r) = EILSEQ;
658 	  return -1;
659 	}
660       if (ch < 0x80 || ch > 0xbf)
661 	{
662 	  _REENT_ERRNO(r) = EILSEQ;
663 	  return -1;
664 	}
665       state->__value.__wchb[1] = ch;
666       if (state->__count == 1)
667 	state->__count = 2;
668       else if (n < (size_t)-1)
669 	++n;
670       if (n < 3)
671 	return -2;
672       ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
673       if (ch < 0x80 || ch > 0xbf)
674 	{
675 	  _REENT_ERRNO(r) = EILSEQ;
676 	  return -1;
677 	}
678       state->__value.__wchb[2] = ch;
679       if (state->__count == 2)
680 	state->__count = 3;
681       else if (n < (size_t)-1)
682 	++n;
683       if (state->__count == 3 && sizeof(wchar_t) == 2)
684 	{
685 	  /* On systems which have wchar_t being UTF-16 values, the value
686 	     doesn't fit into a single wchar_t in this case.  So what we
687 	     do here is to store the state with a special value of __count
688 	     and return the first half of a surrogate pair.  The first
689 	     three bytes of a UTF-8 sequence are enough to generate the
690 	     first half of a UTF-16 surrogate pair.  As return value we
691 	     choose to return the number of bytes actually read up to
692 	     here.
693 	     The second half of the surrogate pair is returned in case we
694 	     recognize the special __count value of four, and the next
695 	     byte is actually a valid value.  See below. */
696             tmp = (uint32_t)((state->__value.__wchb[0] & (uint32_t) 0x07) << 18)
697                 |   (uint32_t)((state->__value.__wchb[1] & (uint32_t) 0x3f) << 12)
698                 |   (uint32_t)((state->__value.__wchb[2] & (uint32_t) 0x3f) << 6);
699 	  state->__count = 4;
700 	  *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
701 	  return i;
702 	}
703       if (n < 4)
704 	return -2;
705       ch = t[i++];
706       if (ch < 0x80 || ch > 0xbf)
707 	{
708 	  _REENT_ERRNO(r) = EILSEQ;
709 	  return -1;
710 	}
711       tmp = (((uint32_t)state->__value.__wchb[0] & 0x07) << 18)
712         |   (((uint32_t)state->__value.__wchb[1] & 0x3f) << 12)
713         |   (((uint32_t)state->__value.__wchb[2] & 0x3f) << 6)
714         |   ((uint32_t)ch & 0x3f);
715       if (state->__count == 4 && sizeof(wchar_t) == 2)
716 	/* Create the second half of the surrogate pair for systems with
717 	   wchar_t == UTF-16 . */
718 	*pwc = 0xdc00 | (tmp & 0x3ff);
719       else
720 	*pwc = tmp;
721       state->__count = 0;
722       return i;
723     }
724 
725   _REENT_ERRNO(r) = EILSEQ;
726   return -1;
727 }
728 
729 /* Cygwin defines its own doublebyte charset conversion functions
730    because the underlying OS requires wchar_t == UTF-16. */
731 #ifndef  __CYGWIN__
732 int
__sjis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)733 __sjis_mbtowc (
734         wchar_t       *pwc,
735         const char    *s,
736         size_t         n,
737         mbstate_t      *state)
738 {
739   wchar_t dummy;
740   unsigned char *t = (unsigned char *)s;
741   int ch;
742   int i = 0;
743 
744   if (pwc == NULL)
745     pwc = &dummy;
746 
747   if (s == NULL)
748     return 0;  /* not state-dependent */
749 
750   if (n == 0)
751     return -2;
752 
753   ch = t[i++];
754   if (state->__count == 0)
755     {
756       if (_issjis1 (ch))
757 	{
758 	  state->__value.__wchb[0] = ch;
759 	  state->__count = 1;
760 	  if (n <= 1)
761 	    return -2;
762 	  ch = t[i++];
763 	}
764     }
765   if (state->__count == 1)
766     {
767       if (_issjis2 (ch))
768 	{
769 	  *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
770 	  state->__count = 0;
771 	  return i;
772 	}
773       else
774 	{
775 	  _REENT_ERRNO(r) = EILSEQ;
776 	  return -1;
777 	}
778     }
779 
780   *pwc = (wchar_t)*t;
781 
782   if (*t == '\0')
783     return 0;
784 
785   return 1;
786 }
787 
788 int
__eucjp_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)789 __eucjp_mbtowc (
790         wchar_t       *pwc,
791         const char    *s,
792         size_t         n,
793         mbstate_t      *state)
794 {
795   wchar_t dummy;
796   unsigned char *t = (unsigned char *)s;
797   int ch;
798   int i = 0;
799 
800   if (pwc == NULL)
801     pwc = &dummy;
802 
803   if (s == NULL)
804     return 0;
805 
806   if (n == 0)
807     return -2;
808 
809   ch = t[i++];
810   if (state->__count == 0)
811     {
812       if (_iseucjp1 (ch))
813 	{
814 	  state->__value.__wchb[0] = ch;
815 	  state->__count = 1;
816 	  if (n <= 1)
817 	    return -2;
818 	  ch = t[i++];
819 	}
820     }
821   if (state->__count == 1)
822     {
823       if (_iseucjp2 (ch))
824 	{
825 	  if (state->__value.__wchb[0] == 0x8f)
826 	    {
827 	      state->__value.__wchb[1] = ch;
828 	      state->__count = 2;
829 	      if (n <= (size_t) i)
830 		return -2;
831 	      ch = t[i++];
832 	    }
833 	  else
834 	    {
835 	      *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
836 	      state->__count = 0;
837 	      return i;
838 	    }
839 	}
840       else
841 	{
842 	  _REENT_ERRNO(r) = EILSEQ;
843 	  return -1;
844 	}
845     }
846   if (state->__count == 2)
847     {
848       if (_iseucjp2 (ch))
849 	{
850 	  *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
851 		 + (wchar_t)(ch & 0x7f);
852 	  state->__count = 0;
853 	  return i;
854 	}
855       else
856 	{
857 	  _REENT_ERRNO(r) = EILSEQ;
858 	  return -1;
859 	}
860     }
861 
862   *pwc = (wchar_t)*t;
863 
864   if (*t == '\0')
865     return 0;
866 
867   return 1;
868 }
869 
870 int
__jis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)871 __jis_mbtowc (
872         wchar_t       *pwc,
873         const char    *s,
874         size_t         n,
875         mbstate_t      *state)
876 {
877   wchar_t dummy;
878   unsigned char *t = (unsigned char *)s;
879   JIS_STATE curr_state;
880   JIS_ACTION action;
881   JIS_CHAR_TYPE ch;
882   unsigned char *ptr;
883   unsigned int i;
884   int curr_ch;
885 
886   if (pwc == NULL)
887     pwc = &dummy;
888 
889   if (s == NULL)
890     {
891       state->__state = ASCII;
892       return 1;  /* state-dependent */
893     }
894 
895   if (n == 0)
896     return -2;
897 
898   curr_state = state->__state;
899   ptr = t;
900 
901   for (i = 0; i < n; ++i)
902     {
903       curr_ch = t[i];
904       switch (curr_ch)
905 	{
906 	case ESC_CHAR:
907 	  ch = ESCAPE;
908 	  break;
909 	case '$':
910 	  ch = DOLLAR;
911 	  break;
912 	case '@':
913 	  ch = AT;
914 	  break;
915 	case '(':
916 	  ch = BRACKET;
917 	  break;
918 	case 'B':
919 	  ch = B;
920 	  break;
921 	case 'J':
922 	  ch = J;
923 	  break;
924 	case '\0':
925 	  ch = NUL;
926 	  break;
927 	default:
928 	  if (_isjis (curr_ch))
929 	    ch = JIS_CHAR;
930 	  else
931 	    ch = OTHER;
932 	}
933 
934       action = JIS_action_table[curr_state][ch];
935       curr_state = JIS_state_table[curr_state][ch];
936 
937       switch (action)
938 	{
939 	case NOOP:
940 	  break;
941 	case EMPTY:
942 	  state->__state = ASCII;
943 	  *pwc = (wchar_t)0;
944 	  return 0;
945 	case COPY_A:
946 	  state->__state = ASCII;
947 	  *pwc = (wchar_t)*ptr;
948 	  return (i + 1);
949 	case COPY_J1:
950 	  state->__value.__wchb[0] = t[i];
951 	  break;
952 	case COPY_J2:
953 	  state->__state = JIS;
954 	  *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
955 	  return (i + 1);
956 	case MAKE_A:
957 	  ptr = (unsigned char *)(t + i + 1);
958 	  break;
959 	case ERROR:
960 	default:
961 	  _REENT_ERRNO(r) = EILSEQ;
962 	  return -1;
963 	}
964 
965     }
966 
967   state->__state = curr_state;
968   return -2;  /* n < bytes needed */
969 }
970 #endif /* !__CYGWIN__*/
971 #endif /* _MB_CAPABLE */
972