1 /*
2  * Copyright (c) 2003-2004, Artem B. Bityuckiy
3  * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #include "cesbi.h"
27 
28 #if defined (ICONV_TO_UCS_CES_EUC) \
29  || defined (ICONV_FROM_UCS_CES_EUC)
30 
31 #include <string.h>
32 #include <stdlib.h>
33 #include <limits.h>
34 #include <sys/types.h>
35 #include "../lib/local.h"
36 #include "../ccs/ccsnames.h"
37 
38 #define TYPE_EUC_JP 0
39 #define TYPE_EUC_KR 1
40 #define TYPE_EUC_TW 2
41 
42 #define MAX_CS_NUM 3
43 
44 /* CS  description structure */
45 typedef struct
46 {
47   char *csname;
48   char *prefix;
49   int bytes;
50   int prefixbytes;
51   int touchmsb; /* If 1, msb will be set by euc converter */
52 } euc_cs_desc_t;
53 
54 typedef struct
55 {
56   int type;
57   int mb_cur_max;
58   const euc_cs_desc_t *desc;
59 
60   void *data[MAX_CS_NUM];
61 } euc_data_t;
62 
63 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
64  || defined (_ICONV_FROM_ENCODING_EUC_JP) \
65  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
66 static const euc_cs_desc_t euc_jp_cs_desc[] =
67 {
68   {ICONV_CCS_JIS_X0208_1990, "",     2, 0, 1},
69   {ICONV_CCS_JIS_X0201_1976, "\x8e", 1, 1, 0},
70   {ICONV_CCS_JIS_X0212_1990, "\x8f", 2, 1, 1},
71   {0}
72 };
73 #endif
74 
75 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
76  || defined (_ICONV_FROM_ENCODING_EUC_TW) \
77  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
78 static const euc_cs_desc_t euc_tw_cs_desc [] =
79 {
80   {ICONV_CCS_CNS11643_PLANE1,  "",         2, 0, 1},
81   {ICONV_CCS_CNS11643_PLANE2,  "\x8e\xa2", 2, 2, 1},
82   {ICONV_CCS_CNS11643_PLANE14, "\x8e\xae", 2, 2, 1},
83   {0}
84 };
85 #endif
86 
87 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
88  || defined (_ICONV_FROM_ENCODING_EUC_KR) \
89  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
90 static const euc_cs_desc_t euc_kr_cs_desc [] =
91 {
92   {ICONV_CCS_KSX1001,  "", 2, 0, 1},
93   {0}
94 };
95 #endif
96 
97 #if defined (ICONV_FROM_UCS_CES_EUC)
98 static void *
euc_from_ucs_init(const char * encoding)99 euc_from_ucs_init (
100                           const char *encoding)
101 {
102   int i;
103   euc_data_t *data;
104 
105   if ((data = (euc_data_t *)calloc (1, sizeof (euc_data_t))) == NULL)
106     return 0;
107 
108 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
109  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
110   if (strcmp (encoding, ICONV_ENCODING_EUC_JP) == 0)
111     {
112       data->type = TYPE_EUC_JP;
113       data->mb_cur_max = 3;
114       data->desc = &euc_jp_cs_desc[0];
115       goto ok;
116     }
117 #endif
118 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
119  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
120   if (strcmp (encoding, ICONV_ENCODING_EUC_KR) == 0)
121     {
122       data->type = TYPE_EUC_KR;
123       data->mb_cur_max = 2;
124       data->desc = &euc_kr_cs_desc[0];
125       goto ok;
126     }
127 #endif
128 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
129  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
130   if (strcmp (encoding, ICONV_ENCODING_EUC_TW) == 0)
131     {
132       data->type = TYPE_EUC_TW;
133       data->mb_cur_max = 4;
134       data->desc = &euc_tw_cs_desc[0];
135       goto ok;
136     }
137 #endif
138 
139   goto error1;
140 
141 ok:
142   for (i = 0; i < MAX_CS_NUM && data->desc[i].csname != NULL; i++)
143     {
144       data->data[i] = _iconv_from_ucs_ces_handlers_table.init (
145                                                         data->desc[i].csname);
146       if (data->data[i] == NULL)
147         goto error;
148     }
149 
150   return data;
151 
152 error:
153   _iconv_from_ucs_ces_handlers_table.close (data);
154   return NULL;
155 error1:
156   free ((void *)data);
157   return NULL;
158 }
159 
160 static size_t
euc_from_ucs_close(void * data)161 euc_from_ucs_close (
162                            void *data)
163 {
164   int i;
165   size_t res = 0;
166 
167   for (i = 0; i < MAX_CS_NUM; i++)
168     {
169       if (((euc_data_t *)data)->data[i] != NULL)
170         res |= _iconv_from_ucs_ces_handlers_table.close (
171                                                 ((euc_data_t *)data)->data[i]);
172     }
173   free(data);
174 
175   return res;
176 }
177 
178 static size_t
euc_convert_from_ucs(void * data,register ucs4_t in,unsigned char ** outbuf,size_t * outbytesleft)179 euc_convert_from_ucs (void *data,
180                              register ucs4_t in,
181                              unsigned char **outbuf,
182                              size_t *outbytesleft)
183 {
184   int i;
185   int j;
186   int res;
187   unsigned char *outbuf1;
188   size_t outbytesleft1;
189   euc_data_t *d = (euc_data_t *)data;
190 
191   if (in < 0x80) /* CS0 ASCII */
192     return _iconv_from_ucs_ces_handlers_us_ascii.convert_from_ucs (
193                                                  NULL,
194                                                  in,
195                                                  outbuf,
196                                                  outbytesleft);
197 
198   /* Try other CS */
199   for (i = 0; d->desc[i].csname != NULL; i++)
200     {
201 
202       if (((int)*outbytesleft - d->desc[i].prefixbytes - d->desc[i].bytes) < 0)
203         {
204           unsigned char buf[ICONV_MB_LEN_MAX];
205           outbytesleft1 = ICONV_MB_LEN_MAX;
206           outbuf1 = &buf[0];
207           /* See wether this is right sequence */
208           res =
209             (int)_iconv_from_ucs_ces_handlers_table.convert_from_ucs (
210                                                          d->data[i],
211                                                          in,
212                                                          &outbuf1,
213                                                          &outbytesleft1);
214           if (res > 0)
215             return (size_t)ICONV_CES_NOSPACE;
216 
217           continue;
218         }
219 
220       outbuf1 = *outbuf + d->desc[i].prefixbytes;
221       outbytesleft1 = *outbytesleft - d->desc[i].prefixbytes;
222 
223       res = (int)_iconv_from_ucs_ces_handlers_table.convert_from_ucs (
224                                                      d->data[i],
225                                                      in,
226                                                      &outbuf1,
227                                                      &outbytesleft1);
228       if (res == d->desc[i].bytes)
229         {
230           for (j = 0; j < d->desc[i].prefixbytes; j++)
231             (*outbuf)[j] = d->desc[i].prefix[j];
232 
233           if (d->desc[i].touchmsb)
234             for (j = 0; j < d->desc[i].bytes; j++)
235               {
236                 if ((*outbuf)[j + d->desc[i].prefixbytes] & 0x80)
237                   return (size_t)ICONV_CES_INVALID_CHARACTER;
238                 (*outbuf)[j + d->desc[i].prefixbytes] |= 0x80;
239               }
240 
241           *outbuf = outbuf1;
242           *outbytesleft = outbytesleft1;
243 
244           return (size_t)(res + d->desc[i].bytes);
245         }
246     }
247 
248   return (size_t)ICONV_CES_INVALID_CHARACTER;
249 }
250 #endif /* ICONV_FROM_UCS_CES_EUC */
251 
252 #if defined (ICONV_TO_UCS_CES_EUC)
253 static void *
euc_to_ucs_init(const char * encoding)254 euc_to_ucs_init (
255                         const char *encoding)
256 {
257   int i;
258   euc_data_t *data;
259 
260   if ((data = (euc_data_t *)calloc (1, sizeof (euc_data_t))) == NULL)
261     return 0;
262 
263 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
264  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
265   if (strcmp (encoding, ICONV_ENCODING_EUC_JP) == 0)
266     {
267       data->type = TYPE_EUC_JP;
268       data->mb_cur_max = 3;
269       data->desc = &euc_jp_cs_desc[0];
270       goto ok;
271     }
272 #endif
273 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
274  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
275   if (strcmp (encoding, ICONV_ENCODING_EUC_KR) == 0)
276     {
277       data->type = TYPE_EUC_KR;
278       data->mb_cur_max = 2;
279       data->desc = &euc_kr_cs_desc[0];
280       goto ok;
281     }
282 #endif
283 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
284  || defined (_ICONV_ENABLE_EXTERNAL_CCS)
285   if (strcmp (encoding, ICONV_ENCODING_EUC_TW) == 0)
286     {
287       data->type = TYPE_EUC_TW;
288       data->mb_cur_max = 4;
289       data->desc = &euc_tw_cs_desc[0];
290       goto ok;
291     }
292 #endif
293 
294   goto error1;
295 
296 ok:
297   for (i = 0; i < MAX_CS_NUM && data->desc[i].csname != NULL; i++)
298     {
299       data->data[i] = _iconv_to_ucs_ces_handlers_table.init (
300                                                         data->desc[i].csname);
301       if (data->data[i] == NULL)
302         goto error;
303     }
304 
305   return data;
306 
307 error:
308   _iconv_to_ucs_ces_handlers_table.close (data);
309   return NULL;
310 error1:
311   free ((void *)data);
312   return NULL;
313 }
314 
315 static size_t
euc_to_ucs_close(void * data)316 euc_to_ucs_close (
317                          void *data)
318 {
319   int i;
320   size_t res = 0;
321 
322   for (i = 0; i < MAX_CS_NUM; i++)
323     {
324       if (((euc_data_t *)data)->data[i] != NULL)
325         res |= _iconv_to_ucs_ces_handlers_table.close (
326                                                 ((euc_data_t *)data)->data[i]);
327     }
328   free(data);
329 
330   return res;
331 }
332 
333 static ucs4_t
euc_convert_to_ucs(void * data,const unsigned char ** inbuf,size_t * inbytesleft)334 euc_convert_to_ucs (void *data,
335                            const unsigned char **inbuf,
336                            size_t *inbytesleft)
337 {
338   int i;
339   int j;
340   ucs4_t res;
341   unsigned char buf[ICONV_MB_LEN_MAX];
342   size_t inbytesleft1;
343   euc_data_t *d = (euc_data_t *)data;
344   unsigned char *inbuf1 = &buf[0];
345 
346   if (**inbuf < 0x80) /* CS0 is always ASCII */
347     return _iconv_to_ucs_ces_handlers_us_ascii.convert_to_ucs (
348                                                          NULL,
349                                                          inbuf,
350                                                          inbytesleft);
351 
352   for (i = 1; d->desc[i].csname != NULL; i++)
353     {
354       if (memcmp((const void *)(*inbuf),
355                  (const void *)d->desc[i].prefix,
356                  d->desc[i].prefixbytes) == 0)
357         {
358           if (((int)*inbytesleft - d->desc[i].prefixbytes - d->desc[i].bytes) < 0)
359             return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
360 
361           if (d->desc[i].touchmsb)
362             for (j = 0; j < d->desc[i].bytes; j++)
363               {
364                 if (!((*inbuf)[j + d->desc[i].prefixbytes] & 0x80))
365                   return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
366                 inbuf1[j] = (*inbuf)[j + d->desc[i].prefixbytes] & 0x7F;
367               }
368           else
369             for (j = 0; j < d->desc[i].bytes; j++)
370               inbuf1[j] = (*inbuf)[j + d->desc[i].prefixbytes];
371 
372           inbytesleft1 = d->desc[i].bytes;
373 
374           res = _iconv_to_ucs_ces_handlers_table.convert_to_ucs (
375                                              d->data[i],
376                                              (const unsigned char **)&inbuf1,
377                                              &inbytesleft1);
378           if (((__int32_t)res) > 0)
379             {
380               *inbuf += d->desc[i].bytes +  d->desc[i].prefixbytes;
381               *inbytesleft -= d->desc[i].bytes + d->desc[i].prefixbytes;
382             }
383 
384           return res;
385         }
386     }
387 
388   /* Process CS1 */
389   if (((int)(*inbytesleft - d->desc[0].prefixbytes - d->desc[0].bytes)) < 0)
390     return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
391 
392   if (d->desc[0].touchmsb)
393     for (j = 0; j < d->desc[0].bytes; j++)
394       {
395         if (!((*inbuf)[j + d->desc[0].prefixbytes] & 0x80))
396           return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
397         inbuf1[j] = (*inbuf)[j] & 0x7F;
398       }
399   else
400     for (j = 0; j < d->desc[0].bytes; j++)
401       inbuf1[j] = (*inbuf)[j];
402 
403   inbytesleft1 = d->desc[0].bytes;
404 
405   res = _iconv_to_ucs_ces_handlers_table.convert_to_ucs (
406                                         d->data[0],
407                                         (const unsigned char **)&inbuf1,
408                                         &inbytesleft1);
409   if (((__int32_t)res) > 0)
410     {
411       *inbuf += d->desc[0].bytes;
412       *inbytesleft -= d->desc[0].bytes;
413     }
414 
415   return res;
416 }
417 #endif /* ICONV_TO_UCS_CES_EUC */
418 
419 static int
euc_get_mb_cur_max(void * data)420 euc_get_mb_cur_max (void *data)
421 {
422   return ((euc_data_t *)data)->mb_cur_max;
423 }
424 
425 #if defined (ICONV_FROM_UCS_CES_EUC)
426 const iconv_from_ucs_ces_handlers_t
427 _iconv_from_ucs_ces_handlers_euc =
428 {
429   euc_from_ucs_init,
430   euc_from_ucs_close,
431   euc_get_mb_cur_max,
432   NULL,
433   NULL,
434   NULL,
435   euc_convert_from_ucs
436 };
437 #endif
438 
439 #if defined (ICONV_TO_UCS_CES_EUC)
440 const iconv_to_ucs_ces_handlers_t
441 _iconv_to_ucs_ces_handlers_euc =
442 {
443   euc_to_ucs_init,
444   euc_to_ucs_close,
445   euc_get_mb_cur_max,
446   NULL,
447   NULL,
448   NULL,
449   euc_convert_to_ucs
450 };
451 #endif
452 
453 #endif /* ICONV_TO_UCS_CES_EUC || ICONV_FROM_UCS_CES_EUC */
454 
455 
456