1 /*
2 Copyright (c) 1990 Regents of the University of California.
3 All rights reserved.
4  */
5 #include <stdlib.h>
6 #include <locale.h>
7 #include "mbctype.h"
8 #include <wchar.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include "local.h"
13 
14 int
__ascii_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)15 __ascii_mbtowc (
16         wchar_t       *pwc,
17         const char    *s,
18         size_t         n,
19         mbstate_t      *state)
20 {
21   wchar_t dummy;
22   unsigned char *t = (unsigned char *)s;
23 
24   (void) state;
25   if (pwc == NULL)
26     pwc = &dummy;
27 
28   if (s == NULL)
29     return 0;
30 
31   if (n == 0)
32     return -2;
33 
34   *pwc = (wchar_t)*t;
35 
36   if (*t == '\0')
37     return 0;
38 
39   return 1;
40 }
41 
42 #ifdef _MB_CAPABLE
43 typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
44                NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
45 typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
46                INV, JIS_S_NUM } JIS_STATE;
47 typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
48 
49 /**************************************************************************************
50  * state/action tables for processing JIS encoding
51  * Where possible, switches to JIS are grouped with proceding JIS characters and switches
52  * to ASCII are grouped with preceding JIS characters.  Thus, maximum returned length
53  * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
54  *************************************************************************************/
55 
56 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
57 /*              ESCAPE   DOLLAR    BRACKET   AT       B       J        NUL      JIS_CHAR  OTHER */
58 /* ASCII */   { A_ESC,   ASCII,    ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
59 /* JIS */     { J_ESC,   JIS_1,    JIS_1,    JIS_1,   JIS_1,  JIS_1,   INV,     JIS_1,    INV },
60 /* A_ESC */   { ASCII,   A_ESC_DL, ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
61 /* A_ESC_DL */{ ASCII,   ASCII,    ASCII,    JIS,     JIS,    ASCII,   ASCII,   ASCII,    ASCII },
62 /* JIS_1 */   { INV,     JIS,      JIS,      JIS,     JIS,    JIS,     INV,     JIS,      INV },
63 /* J_ESC */   { INV,     INV,      J_ESC_BR, INV,     INV,    INV,     INV,     INV,      INV },
64 /* J_ESC_BR */{ INV,     INV,      INV,      INV,     ASCII,  ASCII,   INV,     INV,      INV },
65 };
66 
67 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
68 /*              ESCAPE   DOLLAR    BRACKET   AT       B        J        NUL      JIS_CHAR  OTHER */
69 /* ASCII */   { NOOP,    COPY_A,   COPY_A,   COPY_A,  COPY_A,  COPY_A,  EMPTY,   COPY_A,  COPY_A},
70 /* JIS */     { NOOP,    COPY_J1,  COPY_J1,  COPY_J1, COPY_J1, COPY_J1, ERROR,   COPY_J1, ERROR },
71 /* A_ESC */   { COPY_A,  NOOP,     COPY_A,   COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A},
72 /* A_ESC_DL */{ COPY_A,  COPY_A,   COPY_A,   NOOP,    NOOP,    COPY_A,  COPY_A,  COPY_A,  COPY_A},
73 /* JIS_1 */   { ERROR,   COPY_J2,  COPY_J2,  COPY_J2, COPY_J2, COPY_J2, ERROR,   COPY_J2, ERROR },
74 /* J_ESC */   { ERROR,   ERROR,    NOOP,     ERROR,   ERROR,   ERROR,   ERROR,   ERROR,   ERROR },
75 /* J_ESC_BR */{ ERROR,   ERROR,    ERROR,    ERROR,   MAKE_A,  MAKE_A,  ERROR,   ERROR,   ERROR },
76 };
77 
78 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
79 #define __state __count
80 
81 #ifdef _MB_EXTENDED_CHARSETS_ISO
82 static int
___iso_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int iso_idx,mbstate_t * state)83 ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
84 	       int iso_idx, mbstate_t *state)
85 {
86   wchar_t dummy;
87   unsigned char *t = (unsigned char *)s;
88 
89   if (pwc == NULL)
90     pwc = &dummy;
91 
92   if (s == NULL)
93     return 0;
94 
95   if (n == 0)
96     return -2;
97 
98   if (*t >= 0xa0)
99     {
100       if (iso_idx >= 0)
101 	{
102 	  *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
103 	  if (*pwc == 0) /* Invalid character */
104 	    {
105 	      _REENT_ERRNO(r) = EILSEQ;
106 	      return -1;
107 	    }
108 	  return 1;
109 	}
110     }
111 
112   *pwc = (wchar_t) *t;
113 
114   if (*t == '\0')
115     return 0;
116 
117   return 1;
118 }
119 
120 static int
__iso_8859_1_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)121 __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
122 		     mbstate_t *state)
123 {
124   return ___iso_mbtowc (r, pwc, s, n, -1, state);
125 }
126 
127 static int
__iso_8859_2_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)128 __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
129 		     mbstate_t *state)
130 {
131   return ___iso_mbtowc (r, pwc, s, n, 0, state);
132 }
133 
134 static int
__iso_8859_3_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)135 __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
136 		     mbstate_t *state)
137 {
138   return ___iso_mbtowc (r, pwc, s, n, 1, state);
139 }
140 
141 static int
__iso_8859_4_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)142 __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
143 		     mbstate_t *state)
144 {
145   return ___iso_mbtowc (r, pwc, s, n, 2, state);
146 }
147 
148 static int
__iso_8859_5_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)149 __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
150 		     mbstate_t *state)
151 {
152   return ___iso_mbtowc (r, pwc, s, n, 3, state);
153 }
154 
155 static int
__iso_8859_6_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)156 __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
157 		     mbstate_t *state)
158 {
159   return ___iso_mbtowc (r, pwc, s, n, 4, state);
160 }
161 
162 static int
__iso_8859_7_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)163 __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
164 		     mbstate_t *state)
165 {
166   return ___iso_mbtowc (r, pwc, s, n, 5, state);
167 }
168 
169 static int
__iso_8859_8_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)170 __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
171 		     mbstate_t *state)
172 {
173   return ___iso_mbtowc (r, pwc, s, n, 6, state);
174 }
175 
176 static int
__iso_8859_9_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)177 __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
178 		     mbstate_t *state)
179 {
180   return ___iso_mbtowc (r, pwc, s, n, 7, state);
181 }
182 
183 static int
__iso_8859_10_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)184 __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
185 		      mbstate_t *state)
186 {
187   return ___iso_mbtowc (r, pwc, s, n, 8, state);
188 }
189 
190 static int
__iso_8859_11_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)191 __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
192 		      mbstate_t *state)
193 {
194   return ___iso_mbtowc (r, pwc, s, n, 9, state);
195 }
196 
197 static int
__iso_8859_13_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)198 __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
199 		      mbstate_t *state)
200 {
201   return ___iso_mbtowc (r, pwc, s, n, 10, state);
202 }
203 
204 static int
__iso_8859_14_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)205 __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
206 		      mbstate_t *state)
207 {
208   return ___iso_mbtowc (r, pwc, s, n, 11, state);
209 }
210 
211 static int
__iso_8859_15_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)212 __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
213 		      mbstate_t *state)
214 {
215   return ___iso_mbtowc (r, pwc, s, n, 12, state);
216 }
217 
218 static int
__iso_8859_16_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)219 __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
220 		      mbstate_t *state)
221 {
222   return ___iso_mbtowc (r, pwc, s, n, 13, state);
223 }
224 
225 static mbtowc_p __iso_8859_mbtowc[17] = {
226   NULL,
227   __iso_8859_1_mbtowc,
228   __iso_8859_2_mbtowc,
229   __iso_8859_3_mbtowc,
230   __iso_8859_4_mbtowc,
231   __iso_8859_5_mbtowc,
232   __iso_8859_6_mbtowc,
233   __iso_8859_7_mbtowc,
234   __iso_8859_8_mbtowc,
235   __iso_8859_9_mbtowc,
236   __iso_8859_10_mbtowc,
237   __iso_8859_11_mbtowc,
238   NULL,			/* No ISO 8859-12 */
239   __iso_8859_13_mbtowc,
240   __iso_8859_14_mbtowc,
241   __iso_8859_15_mbtowc,
242   __iso_8859_16_mbtowc
243 };
244 
245 /* val *MUST* be valid!  All checks for validity are supposed to be
246    performed before calling this function. */
247 mbtowc_p
__iso_mbtowc(int val)248 __iso_mbtowc (int val)
249 {
250   return __iso_8859_mbtowc[val];
251 }
252 #endif /* _MB_EXTENDED_CHARSETS_ISO */
253 
254 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
255 static int
___cp_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,int cp_idx,mbstate_t * state)256 ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
257 	      int cp_idx, mbstate_t *state)
258 {
259   wchar_t dummy;
260   unsigned char *t = (unsigned char *)s;
261 
262   if (pwc == NULL)
263     pwc = &dummy;
264 
265   if (s == NULL)
266     return 0;
267 
268   if (n == 0)
269     return -2;
270 
271   if (*t >= 0x80)
272     {
273       if (cp_idx >= 0)
274 	{
275 	  *pwc = __cp_conv[cp_idx][*t - 0x80];
276 	  if (*pwc == 0) /* Invalid character */
277 	    {
278 	      _REENT_ERRNO(r) = EILSEQ;
279 	      return -1;
280 	    }
281 	  return 1;
282 	}
283     }
284 
285   *pwc = (wchar_t)*t;
286 
287   if (*t == '\0')
288     return 0;
289 
290   return 1;
291 }
292 
293 static int
__cp_437_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)294 __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
295 		 mbstate_t *state)
296 {
297   return ___cp_mbtowc (r, pwc, s, n, 0, state);
298 }
299 
300 static int
__cp_720_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)301 __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
302 		 mbstate_t *state)
303 {
304   return ___cp_mbtowc (r, pwc, s, n, 1, state);
305 }
306 
307 static int
__cp_737_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)308 __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
309 		 mbstate_t *state)
310 {
311   return ___cp_mbtowc (r, pwc, s, n, 2, state);
312 }
313 
314 static int
__cp_775_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)315 __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
316 		 mbstate_t *state)
317 {
318   return ___cp_mbtowc (r, pwc, s, n, 3, state);
319 }
320 
321 static int
__cp_850_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)322 __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
323 		 mbstate_t *state)
324 {
325   return ___cp_mbtowc (r, pwc, s, n, 4, state);
326 }
327 
328 static int
__cp_852_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)329 __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
330 		 mbstate_t *state)
331 {
332   return ___cp_mbtowc (r, pwc, s, n, 5, state);
333 }
334 
335 static int
__cp_855_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)336 __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
337 		 mbstate_t *state)
338 {
339   return ___cp_mbtowc (r, pwc, s, n, 6, state);
340 }
341 
342 static int
__cp_857_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)343 __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
344 		 mbstate_t *state)
345 {
346   return ___cp_mbtowc (r, pwc, s, n, 7, state);
347 }
348 
349 static int
__cp_858_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)350 __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
351 		 mbstate_t *state)
352 {
353   return ___cp_mbtowc (r, pwc, s, n, 8, state);
354 }
355 
356 static int
__cp_862_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)357 __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
358 		 mbstate_t *state)
359 {
360   return ___cp_mbtowc (r, pwc, s, n, 9, state);
361 }
362 
363 static int
__cp_866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)364 __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
365 		 mbstate_t *state)
366 {
367   return ___cp_mbtowc (r, pwc, s, n, 10, state);
368 }
369 
370 static int
__cp_874_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)371 __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
372 		 mbstate_t *state)
373 {
374   return ___cp_mbtowc (r, pwc, s, n, 11, state);
375 }
376 
377 static int
__cp_1125_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)378 __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
379 		  mbstate_t *state)
380 {
381   return ___cp_mbtowc (r, pwc, s, n, 12, state);
382 }
383 
384 static int
__cp_1250_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)385 __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
386 		  mbstate_t *state)
387 {
388   return ___cp_mbtowc (r, pwc, s, n, 13, state);
389 }
390 
391 static int
__cp_1251_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)392 __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
393 		  mbstate_t *state)
394 {
395   return ___cp_mbtowc (r, pwc, s, n, 14, state);
396 }
397 
398 static int
__cp_1252_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)399 __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
400 		  mbstate_t *state)
401 {
402   return ___cp_mbtowc (r, pwc, s, n, 15, state);
403 }
404 
405 static int
__cp_1253_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)406 __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
407 		  mbstate_t *state)
408 {
409   return ___cp_mbtowc (r, pwc, s, n, 16, state);
410 }
411 
412 static int
__cp_1254_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)413 __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
414 		  mbstate_t *state)
415 {
416   return ___cp_mbtowc (r, pwc, s, n, 17, state);
417 }
418 
419 static int
__cp_1255_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)420 __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
421 		  mbstate_t *state)
422 {
423   return ___cp_mbtowc (r, pwc, s, n, 18, state);
424 }
425 
426 static int
__cp_1256_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)427 __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
428 		  mbstate_t *state)
429 {
430   return ___cp_mbtowc (r, pwc, s, n, 19, state);
431 }
432 
433 static int
__cp_1257_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)434 __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
435 		  mbstate_t *state)
436 {
437   return ___cp_mbtowc (r, pwc, s, n, 20, state);
438 }
439 
440 static int
__cp_1258_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)441 __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
442 		  mbstate_t *state)
443 {
444   return ___cp_mbtowc (r, pwc, s, n, 21, state);
445 }
446 
447 static int
__cp_20866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)448 __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
449 		   mbstate_t *state)
450 {
451   return ___cp_mbtowc (r, pwc, s, n, 22, state);
452 }
453 
454 static int
__cp_21866_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)455 __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
456 		   mbstate_t *state)
457 {
458   return ___cp_mbtowc (r, pwc, s, n, 23, state);
459 }
460 
461 static int
__cp_101_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)462 __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
463 		 mbstate_t *state)
464 {
465   return ___cp_mbtowc (r, pwc, s, n, 24, state);
466 }
467 
468 static int
__cp_102_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)469 __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
470 		 mbstate_t *state)
471 {
472   return ___cp_mbtowc (r, pwc, s, n, 25, state);
473 }
474 
475 static int
__cp_103_mbtowc(struct _reent * r,wchar_t * pwc,const char * s,size_t n,mbstate_t * state)476 __cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
477 		 mbstate_t *state)
478 {
479   return ___cp_mbtowc (r, pwc, s, n, 26, state);
480 }
481 
482 static mbtowc_p __cp_xxx_mbtowc[27] = {
483   __cp_437_mbtowc,
484   __cp_720_mbtowc,
485   __cp_737_mbtowc,
486   __cp_775_mbtowc,
487   __cp_850_mbtowc,
488   __cp_852_mbtowc,
489   __cp_855_mbtowc,
490   __cp_857_mbtowc,
491   __cp_858_mbtowc,
492   __cp_862_mbtowc,
493   __cp_866_mbtowc,
494   __cp_874_mbtowc,
495   __cp_1125_mbtowc,
496   __cp_1250_mbtowc,
497   __cp_1251_mbtowc,
498   __cp_1252_mbtowc,
499   __cp_1253_mbtowc,
500   __cp_1254_mbtowc,
501   __cp_1255_mbtowc,
502   __cp_1256_mbtowc,
503   __cp_1257_mbtowc,
504   __cp_1258_mbtowc,
505   __cp_20866_mbtowc,
506   __cp_21866_mbtowc,
507   __cp_101_mbtowc,
508   __cp_102_mbtowc,
509   __cp_103_mbtowc,
510 };
511 
512 /* val *MUST* be valid!  All checks for validity are supposed to be
513    performed before calling this function. */
514 mbtowc_p
__cp_mbtowc(int val)515 __cp_mbtowc (int val)
516 {
517   return __cp_xxx_mbtowc[__cp_val_index (val)];
518 }
519 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
520 
521 int
__utf8_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)522 __utf8_mbtowc (
523         wchar_t       *pwc,
524         const char    *s,
525         size_t         n,
526         mbstate_t      *state)
527 {
528   wchar_t dummy;
529   unsigned char *t = (unsigned char *)s;
530   int ch;
531   int i = 0;
532 
533   if (pwc == NULL)
534     pwc = &dummy;
535 
536   if (s == NULL)
537     return 0;
538 
539   if (n == 0)
540     return -2;
541 
542   if (state->__count == 0)
543     ch = t[i++];
544   else
545     ch = state->__value.__wchb[0];
546 
547   if (ch == '\0')
548     {
549       *pwc = 0;
550       state->__count = 0;
551       return 0; /* s points to the null character */
552     }
553 
554   if (ch <= 0x7f)
555     {
556       /* single-byte sequence */
557       state->__count = 0;
558       *pwc = ch;
559       return 1;
560     }
561   if (ch >= 0xc0 && ch <= 0xdf)
562     {
563       /* two-byte sequence */
564       state->__value.__wchb[0] = ch;
565       if (state->__count == 0)
566 	state->__count = 1;
567       else if (n < (size_t)-1)
568 	++n;
569       if (n < 2)
570 	return -2;
571       ch = t[i++];
572       if (ch < 0x80 || ch > 0xbf)
573 	{
574 	  _REENT_ERRNO(r) = EILSEQ;
575 	  return -1;
576 	}
577       if (state->__value.__wchb[0] < 0xc2)
578 	{
579 	  /* overlong UTF-8 sequence */
580 	  _REENT_ERRNO(r) = EILSEQ;
581 	  return -1;
582 	}
583       state->__count = 0;
584       *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
585 	|    (wchar_t)(ch & 0x3f);
586       return i;
587     }
588   if (ch >= 0xe0 && ch <= 0xef)
589     {
590       /* three-byte sequence */
591       wchar_t tmp;
592       state->__value.__wchb[0] = ch;
593       if (state->__count == 0)
594 	state->__count = 1;
595       else if (n < (size_t)-1)
596 	++n;
597       if (n < 2)
598 	return -2;
599       ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
600       if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
601 	{
602 	  /* overlong UTF-8 sequence */
603 	  _REENT_ERRNO(r) = EILSEQ;
604 	  return -1;
605 	}
606       if (ch < 0x80 || ch > 0xbf)
607 	{
608 	  _REENT_ERRNO(r) = EILSEQ;
609 	  return -1;
610 	}
611       state->__value.__wchb[1] = ch;
612       if (state->__count == 1)
613 	state->__count = 2;
614       else if (n < (size_t)-1)
615 	++n;
616       if (n < 3)
617 	return -2;
618       ch = t[i++];
619       if (ch < 0x80 || ch > 0xbf)
620 	{
621 	  _REENT_ERRNO(r) = EILSEQ;
622 	  return -1;
623 	}
624       state->__count = 0;
625       tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
626 	|    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
627 	|     (wchar_t)(ch & 0x3f);
628       *pwc = tmp;
629       return i;
630     }
631   if (ch >= 0xf0 && ch <= 0xf4)
632     {
633       /* four-byte sequence */
634       uint32_t tmp;
635       state->__value.__wchb[0] = ch;
636       if (state->__count == 0)
637 	state->__count = 1;
638       else if (n < (size_t)-1)
639 	++n;
640       if (n < 2)
641 	return -2;
642       ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
643       if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
644 	  || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
645 	{
646 	  /* overlong UTF-8 sequence or result is > 0x10ffff */
647 	  _REENT_ERRNO(r) = EILSEQ;
648 	  return -1;
649 	}
650       if (ch < 0x80 || ch > 0xbf)
651 	{
652 	  _REENT_ERRNO(r) = EILSEQ;
653 	  return -1;
654 	}
655       state->__value.__wchb[1] = ch;
656       if (state->__count == 1)
657 	state->__count = 2;
658       else if (n < (size_t)-1)
659 	++n;
660       if (n < 3)
661 	return -2;
662       ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
663       if (ch < 0x80 || ch > 0xbf)
664 	{
665 	  _REENT_ERRNO(r) = EILSEQ;
666 	  return -1;
667 	}
668       state->__value.__wchb[2] = ch;
669       if (state->__count == 2)
670 	state->__count = 3;
671       else if (n < (size_t)-1)
672 	++n;
673       if (state->__count == 3 && sizeof(wchar_t) == 2)
674 	{
675 	  /* On systems which have wchar_t being UTF-16 values, the value
676 	     doesn't fit into a single wchar_t in this case.  So what we
677 	     do here is to store the state with a special value of __count
678 	     and return the first half of a surrogate pair.  The first
679 	     three bytes of a UTF-8 sequence are enough to generate the
680 	     first half of a UTF-16 surrogate pair.  As return value we
681 	     choose to return the number of bytes actually read up to
682 	     here.
683 	     The second half of the surrogate pair is returned in case we
684 	     recognize the special __count value of four, and the next
685 	     byte is actually a valid value.  See below. */
686             tmp = (uint32_t)((state->__value.__wchb[0] & (uint32_t) 0x07) << 18)
687                 |   (uint32_t)((state->__value.__wchb[1] & (uint32_t) 0x3f) << 12)
688                 |   (uint32_t)((state->__value.__wchb[2] & (uint32_t) 0x3f) << 6);
689 	  state->__count = 4;
690 	  *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
691 	  return i;
692 	}
693       if (n < 4)
694 	return -2;
695       ch = t[i++];
696       if (ch < 0x80 || ch > 0xbf)
697 	{
698 	  _REENT_ERRNO(r) = EILSEQ;
699 	  return -1;
700 	}
701       tmp = (((uint32_t)state->__value.__wchb[0] & 0x07) << 18)
702         |   (((uint32_t)state->__value.__wchb[1] & 0x3f) << 12)
703         |   (((uint32_t)state->__value.__wchb[2] & 0x3f) << 6)
704         |   ((uint32_t)ch & 0x3f);
705       if (state->__count == 4 && sizeof(wchar_t) == 2)
706 	/* Create the second half of the surrogate pair for systems with
707 	   wchar_t == UTF-16 . */
708 	*pwc = 0xdc00 | (tmp & 0x3ff);
709       else
710 	*pwc = tmp;
711       state->__count = 0;
712       return i;
713     }
714 
715   _REENT_ERRNO(r) = EILSEQ;
716   return -1;
717 }
718 
719 /* Cygwin defines its own doublebyte charset conversion functions
720    because the underlying OS requires wchar_t == UTF-16. */
721 int
__sjis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)722 __sjis_mbtowc (
723         wchar_t       *pwc,
724         const char    *s,
725         size_t         n,
726         mbstate_t      *state)
727 {
728   wchar_t dummy;
729   unsigned char *t = (unsigned char *)s;
730   int ch;
731   int i = 0;
732 
733   if (pwc == NULL)
734     pwc = &dummy;
735 
736   if (s == NULL)
737     return 0;  /* not state-dependent */
738 
739   if (n == 0)
740     return -2;
741 
742   ch = t[i++];
743   if (state->__count == 0)
744     {
745       if (_issjis1 (ch))
746 	{
747 	  state->__value.__wchb[0] = ch;
748 	  state->__count = 1;
749 	  if (n <= 1)
750 	    return -2;
751 	  ch = t[i++];
752 	}
753     }
754   if (state->__count == 1)
755     {
756       if (_issjis2 (ch))
757 	{
758 	  *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
759 	  state->__count = 0;
760 	  return i;
761 	}
762       else
763 	{
764 	  _REENT_ERRNO(r) = EILSEQ;
765 	  return -1;
766 	}
767     }
768 
769   *pwc = (wchar_t)*t;
770 
771   if (*t == '\0')
772     return 0;
773 
774   return 1;
775 }
776 
777 int
__eucjp_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)778 __eucjp_mbtowc (
779         wchar_t       *pwc,
780         const char    *s,
781         size_t         n,
782         mbstate_t      *state)
783 {
784   wchar_t dummy;
785   unsigned char *t = (unsigned char *)s;
786   int ch;
787   int i = 0;
788 
789   if (pwc == NULL)
790     pwc = &dummy;
791 
792   if (s == NULL)
793     return 0;
794 
795   if (n == 0)
796     return -2;
797 
798   ch = t[i++];
799   if (state->__count == 0)
800     {
801       if (_iseucjp1 (ch))
802 	{
803 	  state->__value.__wchb[0] = ch;
804 	  state->__count = 1;
805 	  if (n <= 1)
806 	    return -2;
807 	  ch = t[i++];
808 	}
809     }
810   if (state->__count == 1)
811     {
812       if (_iseucjp2 (ch))
813 	{
814 	  if (state->__value.__wchb[0] == 0x8f)
815 	    {
816 	      state->__value.__wchb[1] = ch;
817 	      state->__count = 2;
818 	      if (n <= (size_t) i)
819 		return -2;
820 	      ch = t[i++];
821 	    }
822 	  else
823 	    {
824 	      *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
825 	      state->__count = 0;
826 	      return i;
827 	    }
828 	}
829       else
830 	{
831 	  _REENT_ERRNO(r) = EILSEQ;
832 	  return -1;
833 	}
834     }
835   if (state->__count == 2)
836     {
837       if (_iseucjp2 (ch))
838 	{
839 	  *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
840 		 + (wchar_t)(ch & 0x7f);
841 	  state->__count = 0;
842 	  return i;
843 	}
844       else
845 	{
846 	  _REENT_ERRNO(r) = EILSEQ;
847 	  return -1;
848 	}
849     }
850 
851   *pwc = (wchar_t)*t;
852 
853   if (*t == '\0')
854     return 0;
855 
856   return 1;
857 }
858 
859 int
__jis_mbtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * state)860 __jis_mbtowc (
861         wchar_t       *pwc,
862         const char    *s,
863         size_t         n,
864         mbstate_t      *state)
865 {
866   wchar_t dummy;
867   unsigned char *t = (unsigned char *)s;
868   JIS_STATE curr_state;
869   JIS_ACTION action;
870   JIS_CHAR_TYPE ch;
871   unsigned char *ptr;
872   unsigned int i;
873   int curr_ch;
874 
875   if (pwc == NULL)
876     pwc = &dummy;
877 
878   if (s == NULL)
879     {
880       state->__state = ASCII;
881       return 1;  /* state-dependent */
882     }
883 
884   if (n == 0)
885     return -2;
886 
887   curr_state = state->__state;
888   ptr = t;
889 
890   for (i = 0; i < n; ++i)
891     {
892       curr_ch = t[i];
893       switch (curr_ch)
894 	{
895 	case ESC_CHAR:
896 	  ch = ESCAPE;
897 	  break;
898 	case '$':
899 	  ch = DOLLAR;
900 	  break;
901 	case '@':
902 	  ch = AT;
903 	  break;
904 	case '(':
905 	  ch = BRACKET;
906 	  break;
907 	case 'B':
908 	  ch = B;
909 	  break;
910 	case 'J':
911 	  ch = J;
912 	  break;
913 	case '\0':
914 	  ch = NUL;
915 	  break;
916 	default:
917 	  if (_isjis (curr_ch))
918 	    ch = JIS_CHAR;
919 	  else
920 	    ch = OTHER;
921 	}
922 
923       action = JIS_action_table[curr_state][ch];
924       curr_state = JIS_state_table[curr_state][ch];
925 
926       switch (action)
927 	{
928 	case NOOP:
929 	  break;
930 	case EMPTY:
931 	  state->__state = ASCII;
932 	  *pwc = (wchar_t)0;
933 	  return 0;
934 	case COPY_A:
935 	  state->__state = ASCII;
936 	  *pwc = (wchar_t)*ptr;
937 	  return (i + 1);
938 	case COPY_J1:
939 	  state->__value.__wchb[0] = t[i];
940 	  break;
941 	case COPY_J2:
942 	  state->__state = JIS;
943 	  *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
944 	  return (i + 1);
945 	case MAKE_A:
946 	  ptr = (unsigned char *)(t + i + 1);
947 	  break;
948 	case ERROR:
949 	default:
950 	  _REENT_ERRNO(r) = EILSEQ;
951 	  return -1;
952 	}
953 
954     }
955 
956   state->__state = curr_state;
957   return -2;  /* n < bytes needed */
958 }
959 #endif /* _MB_CAPABLE */
960