1 /*
2 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 #include "cesbi.h"
27
28 #if defined (ICONV_TO_UCS_CES_EUC) \
29 || defined (ICONV_FROM_UCS_CES_EUC)
30
31 #include <string.h>
32 #include <stdlib.h>
33 #include <limits.h>
34 #include <sys/types.h>
35 #include "../lib/local.h"
36 #include "../ccs/ccsnames.h"
37
38 #define TYPE_EUC_JP 0
39 #define TYPE_EUC_KR 1
40 #define TYPE_EUC_TW 2
41
42 #define MAX_CS_NUM 3
43
44 /* CS description structure */
45 typedef struct
46 {
47 char *csname;
48 char *prefix;
49 int bytes;
50 int prefixbytes;
51 int touchmsb; /* If 1, msb will be set by euc converter */
52 } euc_cs_desc_t;
53
54 typedef struct
55 {
56 int type;
57 int mb_cur_max;
58 const euc_cs_desc_t *desc;
59
60 void *data[MAX_CS_NUM];
61 } euc_data_t;
62
63 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
64 || defined (_ICONV_FROM_ENCODING_EUC_JP) \
65 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
66 static const euc_cs_desc_t euc_jp_cs_desc[] =
67 {
68 {ICONV_CCS_JIS_X0208_1990, "", 2, 0, 1},
69 {ICONV_CCS_JIS_X0201_1976, "\x8e", 1, 1, 0},
70 {ICONV_CCS_JIS_X0212_1990, "\x8f", 2, 1, 1},
71 {0}
72 };
73 #endif
74
75 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
76 || defined (_ICONV_FROM_ENCODING_EUC_TW) \
77 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
78 static const euc_cs_desc_t euc_tw_cs_desc [] =
79 {
80 {ICONV_CCS_CNS11643_PLANE1, "", 2, 0, 1},
81 {ICONV_CCS_CNS11643_PLANE2, "\x8e\xa2", 2, 2, 1},
82 {ICONV_CCS_CNS11643_PLANE14, "\x8e\xae", 2, 2, 1},
83 {0}
84 };
85 #endif
86
87 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
88 || defined (_ICONV_FROM_ENCODING_EUC_KR) \
89 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
90 static const euc_cs_desc_t euc_kr_cs_desc [] =
91 {
92 {ICONV_CCS_KSX1001, "", 2, 0, 1},
93 {0}
94 };
95 #endif
96
97 #if defined (ICONV_FROM_UCS_CES_EUC)
98 static void *
euc_from_ucs_init(const char * encoding)99 euc_from_ucs_init (
100 const char *encoding)
101 {
102 int i;
103 euc_data_t *data;
104
105 if ((data = (euc_data_t *)calloc (1, sizeof (euc_data_t))) == NULL)
106 return 0;
107
108 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
109 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
110 if (strcmp (encoding, ICONV_ENCODING_EUC_JP) == 0)
111 {
112 data->type = TYPE_EUC_JP;
113 data->mb_cur_max = 3;
114 data->desc = &euc_jp_cs_desc[0];
115 goto ok;
116 }
117 #endif
118 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
119 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
120 if (strcmp (encoding, ICONV_ENCODING_EUC_KR) == 0)
121 {
122 data->type = TYPE_EUC_KR;
123 data->mb_cur_max = 2;
124 data->desc = &euc_kr_cs_desc[0];
125 goto ok;
126 }
127 #endif
128 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
129 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
130 if (strcmp (encoding, ICONV_ENCODING_EUC_TW) == 0)
131 {
132 data->type = TYPE_EUC_TW;
133 data->mb_cur_max = 4;
134 data->desc = &euc_tw_cs_desc[0];
135 goto ok;
136 }
137 #endif
138
139 goto error1;
140
141 ok:
142 for (i = 0; i < MAX_CS_NUM && data->desc[i].csname != NULL; i++)
143 {
144 data->data[i] = _iconv_from_ucs_ces_handlers_table.init (
145 data->desc[i].csname);
146 if (data->data[i] == NULL)
147 goto error;
148 }
149
150 return data;
151
152 error:
153 _iconv_from_ucs_ces_handlers_table.close (data);
154 return NULL;
155 error1:
156 free ((void *)data);
157 return NULL;
158 }
159
160 static size_t
euc_from_ucs_close(void * data)161 euc_from_ucs_close (
162 void *data)
163 {
164 int i;
165 size_t res = 0;
166
167 for (i = 0; i < MAX_CS_NUM; i++)
168 {
169 if (((euc_data_t *)data)->data[i] != NULL)
170 res |= _iconv_from_ucs_ces_handlers_table.close (
171 ((euc_data_t *)data)->data[i]);
172 }
173 free(data);
174
175 return res;
176 }
177
178 static size_t
euc_convert_from_ucs(void * data,register ucs4_t in,unsigned char ** outbuf,size_t * outbytesleft)179 euc_convert_from_ucs (void *data,
180 register ucs4_t in,
181 unsigned char **outbuf,
182 size_t *outbytesleft)
183 {
184 int i;
185 int j;
186 int res;
187 unsigned char *outbuf1;
188 size_t outbytesleft1;
189 euc_data_t *d = (euc_data_t *)data;
190
191 if (in < 0x80) /* CS0 ASCII */
192 return _iconv_from_ucs_ces_handlers_us_ascii.convert_from_ucs (
193 NULL,
194 in,
195 outbuf,
196 outbytesleft);
197
198 /* Try other CS */
199 for (i = 0; d->desc[i].csname != NULL; i++)
200 {
201
202 if (((int)*outbytesleft - d->desc[i].prefixbytes - d->desc[i].bytes) < 0)
203 {
204 unsigned char buf[ICONV_MB_LEN_MAX];
205 outbytesleft1 = ICONV_MB_LEN_MAX;
206 outbuf1 = &buf[0];
207 /* See wether this is right sequence */
208 res =
209 (int)_iconv_from_ucs_ces_handlers_table.convert_from_ucs (
210 d->data[i],
211 in,
212 &outbuf1,
213 &outbytesleft1);
214 if (res > 0)
215 return (size_t)ICONV_CES_NOSPACE;
216
217 continue;
218 }
219
220 outbuf1 = *outbuf + d->desc[i].prefixbytes;
221 outbytesleft1 = *outbytesleft - d->desc[i].prefixbytes;
222
223 res = (int)_iconv_from_ucs_ces_handlers_table.convert_from_ucs (
224 d->data[i],
225 in,
226 &outbuf1,
227 &outbytesleft1);
228 if (res == d->desc[i].bytes)
229 {
230 for (j = 0; j < d->desc[i].prefixbytes; j++)
231 (*outbuf)[j] = d->desc[i].prefix[j];
232
233 if (d->desc[i].touchmsb)
234 for (j = 0; j < d->desc[i].bytes; j++)
235 {
236 if ((*outbuf)[j + d->desc[i].prefixbytes] & 0x80)
237 return (size_t)ICONV_CES_INVALID_CHARACTER;
238 (*outbuf)[j + d->desc[i].prefixbytes] |= 0x80;
239 }
240
241 *outbuf = outbuf1;
242 *outbytesleft = outbytesleft1;
243
244 return (size_t)(res + d->desc[i].bytes);
245 }
246 }
247
248 return (size_t)ICONV_CES_INVALID_CHARACTER;
249 }
250 #endif /* ICONV_FROM_UCS_CES_EUC */
251
252 #if defined (ICONV_TO_UCS_CES_EUC)
253 static void *
euc_to_ucs_init(const char * encoding)254 euc_to_ucs_init (
255 const char *encoding)
256 {
257 int i;
258 euc_data_t *data;
259
260 if ((data = (euc_data_t *)calloc (1, sizeof (euc_data_t))) == NULL)
261 return 0;
262
263 #if defined (_ICONV_TO_ENCODING_EUC_JP) \
264 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
265 if (strcmp (encoding, ICONV_ENCODING_EUC_JP) == 0)
266 {
267 data->type = TYPE_EUC_JP;
268 data->mb_cur_max = 3;
269 data->desc = &euc_jp_cs_desc[0];
270 goto ok;
271 }
272 #endif
273 #if defined (_ICONV_TO_ENCODING_EUC_KR) \
274 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
275 if (strcmp (encoding, ICONV_ENCODING_EUC_KR) == 0)
276 {
277 data->type = TYPE_EUC_KR;
278 data->mb_cur_max = 2;
279 data->desc = &euc_kr_cs_desc[0];
280 goto ok;
281 }
282 #endif
283 #if defined (_ICONV_TO_ENCODING_EUC_TW) \
284 || defined (_ICONV_ENABLE_EXTERNAL_CCS)
285 if (strcmp (encoding, ICONV_ENCODING_EUC_TW) == 0)
286 {
287 data->type = TYPE_EUC_TW;
288 data->mb_cur_max = 4;
289 data->desc = &euc_tw_cs_desc[0];
290 goto ok;
291 }
292 #endif
293
294 goto error1;
295
296 ok:
297 for (i = 0; i < MAX_CS_NUM && data->desc[i].csname != NULL; i++)
298 {
299 data->data[i] = _iconv_to_ucs_ces_handlers_table.init (
300 data->desc[i].csname);
301 if (data->data[i] == NULL)
302 goto error;
303 }
304
305 return data;
306
307 error:
308 _iconv_to_ucs_ces_handlers_table.close (data);
309 return NULL;
310 error1:
311 free ((void *)data);
312 return NULL;
313 }
314
315 static size_t
euc_to_ucs_close(void * data)316 euc_to_ucs_close (
317 void *data)
318 {
319 int i;
320 size_t res = 0;
321
322 for (i = 0; i < MAX_CS_NUM; i++)
323 {
324 if (((euc_data_t *)data)->data[i] != NULL)
325 res |= _iconv_to_ucs_ces_handlers_table.close (
326 ((euc_data_t *)data)->data[i]);
327 }
328 free(data);
329
330 return res;
331 }
332
333 static ucs4_t
euc_convert_to_ucs(void * data,const unsigned char ** inbuf,size_t * inbytesleft)334 euc_convert_to_ucs (void *data,
335 const unsigned char **inbuf,
336 size_t *inbytesleft)
337 {
338 int i;
339 int j;
340 ucs4_t res;
341 unsigned char buf[ICONV_MB_LEN_MAX];
342 size_t inbytesleft1;
343 euc_data_t *d = (euc_data_t *)data;
344 unsigned char *inbuf1 = &buf[0];
345
346 if (**inbuf < 0x80) /* CS0 is always ASCII */
347 return _iconv_to_ucs_ces_handlers_us_ascii.convert_to_ucs (
348 NULL,
349 inbuf,
350 inbytesleft);
351
352 for (i = 1; d->desc[i].csname != NULL; i++)
353 {
354 if (memcmp((const void *)(*inbuf),
355 (const void *)d->desc[i].prefix,
356 d->desc[i].prefixbytes) == 0)
357 {
358 if (((int)*inbytesleft - d->desc[i].prefixbytes - d->desc[i].bytes) < 0)
359 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
360
361 if (d->desc[i].touchmsb)
362 for (j = 0; j < d->desc[i].bytes; j++)
363 {
364 if (!((*inbuf)[j + d->desc[i].prefixbytes] & 0x80))
365 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
366 inbuf1[j] = (*inbuf)[j + d->desc[i].prefixbytes] & 0x7F;
367 }
368 else
369 for (j = 0; j < d->desc[i].bytes; j++)
370 inbuf1[j] = (*inbuf)[j + d->desc[i].prefixbytes];
371
372 inbytesleft1 = d->desc[i].bytes;
373
374 res = _iconv_to_ucs_ces_handlers_table.convert_to_ucs (
375 d->data[i],
376 (const unsigned char **)&inbuf1,
377 &inbytesleft1);
378 if (((__int32_t)res) > 0)
379 {
380 *inbuf += d->desc[i].bytes + d->desc[i].prefixbytes;
381 *inbytesleft -= d->desc[i].bytes + d->desc[i].prefixbytes;
382 }
383
384 return res;
385 }
386 }
387
388 /* Process CS1 */
389 if (((int)(*inbytesleft - d->desc[0].prefixbytes - d->desc[0].bytes)) < 0)
390 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
391
392 if (d->desc[0].touchmsb)
393 for (j = 0; j < d->desc[0].bytes; j++)
394 {
395 if (!((*inbuf)[j + d->desc[0].prefixbytes] & 0x80))
396 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
397 inbuf1[j] = (*inbuf)[j] & 0x7F;
398 }
399 else
400 for (j = 0; j < d->desc[0].bytes; j++)
401 inbuf1[j] = (*inbuf)[j];
402
403 inbytesleft1 = d->desc[0].bytes;
404
405 res = _iconv_to_ucs_ces_handlers_table.convert_to_ucs (
406 d->data[0],
407 (const unsigned char **)&inbuf1,
408 &inbytesleft1);
409 if (((__int32_t)res) > 0)
410 {
411 *inbuf += d->desc[0].bytes;
412 *inbytesleft -= d->desc[0].bytes;
413 }
414
415 return res;
416 }
417 #endif /* ICONV_TO_UCS_CES_EUC */
418
419 static int
euc_get_mb_cur_max(void * data)420 euc_get_mb_cur_max (void *data)
421 {
422 return ((euc_data_t *)data)->mb_cur_max;
423 }
424
425 #if defined (ICONV_FROM_UCS_CES_EUC)
426 const iconv_from_ucs_ces_handlers_t
427 _iconv_from_ucs_ces_handlers_euc =
428 {
429 euc_from_ucs_init,
430 euc_from_ucs_close,
431 euc_get_mb_cur_max,
432 NULL,
433 NULL,
434 NULL,
435 euc_convert_from_ucs
436 };
437 #endif
438
439 #if defined (ICONV_TO_UCS_CES_EUC)
440 const iconv_to_ucs_ces_handlers_t
441 _iconv_to_ucs_ces_handlers_euc =
442 {
443 euc_to_ucs_init,
444 euc_to_ucs_close,
445 euc_get_mb_cur_max,
446 NULL,
447 NULL,
448 NULL,
449 euc_convert_to_ucs
450 };
451 #endif
452
453 #endif /* ICONV_TO_UCS_CES_EUC || ICONV_FROM_UCS_CES_EUC */
454
455
456