1 /*
2  * Copyright (c) 2003-2004, Artem B. Bityuckiy
3  * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #include "cesbi.h"
27 
28 #if defined (ICONV_TO_UCS_CES_EUC) \
29  || defined (ICONV_FROM_UCS_CES_EUC)
30 
31 #include <_ansi.h>
32 #include <newlib.h>
33 #include <string.h>
34 #include <stdlib.h>
35 #include <limits.h>
36 #include <sys/types.h>
37 #include "../lib/local.h"
38 #include "../lib/ucsconv.h"
39 #include "../lib/encnames.h"
40 #include "../ccs/ccsnames.h"
41 
42 #define TYPE_EUC_JP 0
43 #define TYPE_EUC_KR 1
44 #define TYPE_EUC_TW 2
45 
46 #define MAX_CS_NUM 3
47 
48 /* CS  description structure */
49 typedef struct
50 {
51   char *csname;
52   char *prefix;
53   int bytes;
54   int prefixbytes;
55   int touchmsb; /* If 1, msb will be set by euc converter */
56 } euc_cs_desc_t;
57 
58 typedef struct
59 {
60   int type;
61   int mb_cur_max;
62   euc_cs_desc_t *desc;
63 
64   void *data[MAX_CS_NUM];
65 } euc_data_t;
66 
67 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
68  || defined (_ICONV_FROM_ENCODING_EUC_JP) \
69  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
70 static euc_cs_desc_t euc_jp_cs_desc[] =
71 {
72   {ICONV_CCS_JIS_X0208_1990, "",     2, 0, 1},
73   {ICONV_CCS_JIS_X0201_1976, "\x8e", 1, 1, 0},
74   {ICONV_CCS_JIS_X0212_1990, "\x8f", 2, 1, 1},
75   {0}
76 };
77 #endif
78 
79 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
80  || defined (_ICONV_FROM_ENCODING_EUC_TW) \
81  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
82 static euc_cs_desc_t euc_tw_cs_desc [] =
83 {
84   {ICONV_CCS_CNS11643_PLANE1,  "",         2, 0, 1},
85   {ICONV_CCS_CNS11643_PLANE2,  "\x8e\xa2", 2, 2, 1},
86   {ICONV_CCS_CNS11643_PLANE14, "\x8e\xae", 2, 2, 1},
87   {0}
88 };
89 #endif
90 
91 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
92  || defined (_ICONV_FROM_ENCODING_EUC_KR) \
93  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
94 static euc_cs_desc_t euc_kr_cs_desc [] =
95 {
96   {ICONV_CCS_KSX1001,  "", 2, 0, 1},
97   {0}
98 };
99 #endif
100 
101 #if defined (ICONV_FROM_UCS_CES_EUC)
102 static void *
euc_from_ucs_init(const char * encoding)103 euc_from_ucs_init (
104                           const char *encoding)
105 {
106   int i;
107   euc_data_t *data;
108 
109   if ((data = (euc_data_t *)calloc (1, sizeof (euc_data_t))) == NULL)
110     return 0;
111 
112 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
113  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
114   if (strcmp (encoding, ICONV_ENCODING_EUC_JP) == 0)
115     {
116       data->type = TYPE_EUC_JP;
117       data->mb_cur_max = 3;
118       data->desc = &euc_jp_cs_desc[0];
119       goto ok;
120     }
121 #endif
122 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
123  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
124   if (strcmp (encoding, ICONV_ENCODING_EUC_KR) == 0)
125     {
126       data->type = TYPE_EUC_KR;
127       data->mb_cur_max = 2;
128       data->desc = &euc_kr_cs_desc[0];
129       goto ok;
130     }
131 #endif
132 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
133  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
134   if (strcmp (encoding, ICONV_ENCODING_EUC_TW) == 0)
135     {
136       data->type = TYPE_EUC_TW;
137       data->mb_cur_max = 4;
138       data->desc = &euc_tw_cs_desc[0];
139       goto ok;
140     }
141 #endif
142 
143   goto error1;
144 
145 ok:
146   for (i = 0; data->desc[i].csname != NULL; i++)
147     {
148       data->data[i] = _iconv_from_ucs_ces_handlers_table.init (
149                                                         data->desc[i].csname);
150       if (data->data[i] == NULL)
151         goto error;
152     }
153 
154   return data;
155 
156 error:
157   _iconv_from_ucs_ces_handlers_table.close (data);
158   return NULL;
159 error1:
160   free ((void *)data);
161   return NULL;
162 }
163 
164 static size_t
euc_from_ucs_close(void * data)165 euc_from_ucs_close (
166                            void *data)
167 {
168   int i;
169   size_t res = 0;
170 
171   for (i = 0; i < MAX_CS_NUM; i++)
172     {
173       if (((euc_data_t *)data)->data[i] != NULL)
174         res |= _iconv_from_ucs_ces_handlers_table.close (
175                                                 ((euc_data_t *)data)->data[i]);
176     }
177   free(data);
178 
179   return res;
180 }
181 
182 static size_t
euc_convert_from_ucs(void * data,register ucs4_t in,unsigned char ** outbuf,size_t * outbytesleft)183 euc_convert_from_ucs (void *data,
184                              register ucs4_t in,
185                              unsigned char **outbuf,
186                              size_t *outbytesleft)
187 {
188   int i;
189   int j;
190   int res;
191   unsigned char *outbuf1;
192   size_t outbytesleft1;
193   euc_data_t *d = (euc_data_t *)data;
194 
195   if (in < 0x80) /* CS0 ASCII */
196     return _iconv_from_ucs_ces_handlers_us_ascii.convert_from_ucs (
197                                                  NULL,
198                                                  in,
199                                                  outbuf,
200                                                  outbytesleft);
201 
202   /* Try other CS */
203   for (i = 0; d->desc[i].csname != NULL; i++)
204     {
205 
206       if (((int)*outbytesleft - d->desc[i].prefixbytes - d->desc[i].bytes) < 0)
207         {
208           unsigned char buf[ICONV_MB_LEN_MAX];
209           outbytesleft1 = ICONV_MB_LEN_MAX;
210           outbuf1 = &buf[0];
211           /* See wether this is right sequence */
212           res =
213             (int)_iconv_from_ucs_ces_handlers_table.convert_from_ucs (
214                                                          d->data[i],
215                                                          in,
216                                                          &outbuf1,
217                                                          &outbytesleft1);
218           if (res > 0)
219             return (size_t)ICONV_CES_NOSPACE;
220 
221           continue;
222         }
223 
224       outbuf1 = *outbuf + d->desc[i].prefixbytes;
225       outbytesleft1 = *outbytesleft - d->desc[i].prefixbytes;
226 
227       res = (int)_iconv_from_ucs_ces_handlers_table.convert_from_ucs (
228                                                      d->data[i],
229                                                      in,
230                                                      &outbuf1,
231                                                      &outbytesleft1);
232       if (res == d->desc[i].bytes)
233         {
234           for (j = 0; j < d->desc[i].prefixbytes; j++)
235             (*outbuf)[j] = d->desc[i].prefix[j];
236 
237           if (d->desc[i].touchmsb)
238             for (j = 0; j < d->desc[i].bytes; j++)
239               {
240                 if ((*outbuf)[j + d->desc[i].prefixbytes] & 0x80)
241                   return (size_t)ICONV_CES_INVALID_CHARACTER;
242                 (*outbuf)[j + d->desc[i].prefixbytes] |= 0x80;
243               }
244 
245           *outbuf = outbuf1;
246           *outbytesleft = outbytesleft1;
247 
248           return (size_t)(res + d->desc[i].bytes);
249         }
250     }
251 
252   return (size_t)ICONV_CES_INVALID_CHARACTER;
253 }
254 #endif /* ICONV_FROM_UCS_CES_EUC */
255 
256 #if defined (ICONV_TO_UCS_CES_EUC)
257 static void *
euc_to_ucs_init(const char * encoding)258 euc_to_ucs_init (
259                         const char *encoding)
260 {
261   int i;
262   euc_data_t *data;
263 
264   if ((data = (euc_data_t *)calloc (1, sizeof (euc_data_t))) == NULL)
265     return 0;
266 
267 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
268  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
269   if (strcmp (encoding, ICONV_ENCODING_EUC_JP) == 0)
270     {
271       data->type = TYPE_EUC_JP;
272       data->mb_cur_max = 3;
273       data->desc = &euc_jp_cs_desc[0];
274       goto ok;
275     }
276 #endif
277 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
278  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
279   if (strcmp (encoding, ICONV_ENCODING_EUC_KR) == 0)
280     {
281       data->type = TYPE_EUC_KR;
282       data->mb_cur_max = 2;
283       data->desc = &euc_kr_cs_desc[0];
284       goto ok;
285     }
286 #endif
287 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
288  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
289   if (strcmp (encoding, ICONV_ENCODING_EUC_TW) == 0)
290     {
291       data->type = TYPE_EUC_TW;
292       data->mb_cur_max = 4;
293       data->desc = &euc_tw_cs_desc[0];
294       goto ok;
295     }
296 #endif
297 
298   goto error1;
299 
300 ok:
301   for (i = 0; data->desc[i].csname != NULL; i++)
302     {
303       data->data[i] = _iconv_to_ucs_ces_handlers_table.init (
304                                                         data->desc[i].csname);
305       if (data->data[i] == NULL)
306         goto error;
307     }
308 
309   return data;
310 
311 error:
312   _iconv_to_ucs_ces_handlers_table.close (data);
313   return NULL;
314 error1:
315   free ((void *)data);
316   return NULL;
317 }
318 
319 static size_t
euc_to_ucs_close(void * data)320 euc_to_ucs_close (
321                          void *data)
322 {
323   int i;
324   size_t res = 0;
325 
326   for (i = 0; i < MAX_CS_NUM; i++)
327     {
328       if (((euc_data_t *)data)->data[i] != NULL)
329         res |= _iconv_to_ucs_ces_handlers_table.close (
330                                                 ((euc_data_t *)data)->data[i]);
331     }
332   free(data);
333 
334   return res;
335 }
336 
337 static ucs4_t
euc_convert_to_ucs(void * data,const unsigned char ** inbuf,size_t * inbytesleft)338 euc_convert_to_ucs (void *data,
339                            const unsigned char **inbuf,
340                            size_t *inbytesleft)
341 {
342   int i;
343   int j;
344   ucs4_t res;
345   unsigned char buf[ICONV_MB_LEN_MAX];
346   size_t inbytesleft1;
347   euc_data_t *d = (euc_data_t *)data;
348   unsigned char *inbuf1 = &buf[0];
349 
350   if (**inbuf < 0x80) /* CS0 is always ASCII */
351     return _iconv_to_ucs_ces_handlers_us_ascii.convert_to_ucs (
352                                                          NULL,
353                                                          inbuf,
354                                                          inbytesleft);
355 
356   for (i = 1; d->desc[i].csname != NULL; i++)
357     {
358       if (memcmp((const void *)(*inbuf),
359                  (const void *)d->desc[i].prefix,
360                  d->desc[i].prefixbytes) == 0)
361         {
362           if (((int)*inbytesleft - d->desc[i].prefixbytes - d->desc[i].bytes) < 0)
363             return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
364 
365           if (d->desc[i].touchmsb)
366             for (j = 0; j < d->desc[i].bytes; j++)
367               {
368                 if (!((*inbuf)[j + d->desc[i].prefixbytes] & 0x80))
369                   return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
370                 inbuf1[j] = (*inbuf)[j + d->desc[i].prefixbytes] & 0x7F;
371               }
372           else
373             for (j = 0; j < d->desc[i].bytes; j++)
374               inbuf1[j] = (*inbuf)[j + d->desc[i].prefixbytes];
375 
376           inbytesleft1 = d->desc[i].bytes;
377 
378           res = _iconv_to_ucs_ces_handlers_table.convert_to_ucs (
379                                              d->data[i],
380                                              (const unsigned char **)&inbuf1,
381                                              &inbytesleft1);
382           if (((__int32_t)res) > 0)
383             {
384               *inbuf += d->desc[i].bytes +  d->desc[i].prefixbytes;
385               *inbytesleft -= d->desc[i].bytes + d->desc[i].prefixbytes;
386             }
387 
388           return res;
389         }
390     }
391 
392   /* Process CS1 */
393   if (((int)(*inbytesleft - d->desc[0].prefixbytes - d->desc[0].bytes)) < 0)
394     return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
395 
396   if (d->desc[0].touchmsb)
397     for (j = 0; j < d->desc[0].bytes; j++)
398       {
399         if (!((*inbuf)[j + d->desc[0].prefixbytes] & 0x80))
400           return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
401         inbuf1[j] = (*inbuf)[j] & 0x7F;
402       }
403   else
404     for (j = 0; j < d->desc[0].bytes; j++)
405       inbuf1[j] = (*inbuf)[j];
406 
407   inbytesleft1 = d->desc[0].bytes;
408 
409   res = _iconv_to_ucs_ces_handlers_table.convert_to_ucs (
410                                         d->data[0],
411                                         (const unsigned char **)&inbuf1,
412                                         &inbytesleft1);
413   if (((__int32_t)res) > 0)
414     {
415       *inbuf += d->desc[0].bytes;
416       *inbytesleft -= d->desc[0].bytes;
417     }
418 
419   return res;
420 }
421 #endif /* ICONV_TO_UCS_CES_EUC */
422 
423 static int
euc_get_mb_cur_max(void * data)424 euc_get_mb_cur_max (void *data)
425 {
426   return ((euc_data_t *)data)->mb_cur_max;
427 }
428 
429 #if defined (ICONV_FROM_UCS_CES_EUC)
430 const iconv_from_ucs_ces_handlers_t
431 _iconv_from_ucs_ces_handlers_euc =
432 {
433   euc_from_ucs_init,
434   euc_from_ucs_close,
435   euc_get_mb_cur_max,
436   NULL,
437   NULL,
438   NULL,
439   euc_convert_from_ucs
440 };
441 #endif
442 
443 #if defined (ICONV_TO_UCS_CES_EUC)
444 const iconv_to_ucs_ces_handlers_t
445 _iconv_to_ucs_ces_handlers_euc =
446 {
447   euc_to_ucs_init,
448   euc_to_ucs_close,
449   euc_get_mb_cur_max,
450   NULL,
451   NULL,
452   NULL,
453   euc_convert_to_ucs
454 };
455 #endif
456 
457 #endif /* ICONV_TO_UCS_CES_EUC || ICONV_FROM_UCS_CES_EUC */
458 
459 
460