1 /*
2  * Copyright (c) 2003-2004, Artem B. Bityuckiy
3  * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #include "cesbi.h"
27 
28 #if defined (ICONV_TO_UCS_CES_TABLE) \
29  || defined (ICONV_FROM_UCS_CES_TABLE)
30 
31 #include <sys/types.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <sys/iconvnls.h>
38 #include "../lib/endian.h"
39 #include "../lib/local.h"
40 #include "../ccs/ccs.h"
41 
42 /*
43  * Table-based CES converter is implemented here.  Table-based CES converter
44  * deals with encodings with "null" CES, like KOI8-R. In this case it is
45  * possible to implement one generic algorithm which works with different
46  * CCS tables.
47  *
48  * Table-based CES converter deals with CCS tables placed into iconv/ccs
49  * subdirectory. First, converter tries to find needed CCS table among
50  * linked-in tables. If not found, it tries to load it from external file
51  * (only if corespondent capability was enabled in Newlib configuration).
52  *
53  * 16 bit encodings are assumed to be Big Endian.
54  */
55 
56 static ucs2_t
57 find_code_size (ucs2_t code, const __uint16_t *tblp);
58 
59 static __inline ucs2_t
60 find_code_speed (ucs2_t code, const __uint16_t *tblp);
61 
62 static __inline ucs2_t
63 find_code_speed_8bit (ucs2_t code, const unsigned char *tblp);
64 
65 #ifdef _ICONV_ENABLE_EXTERNAL_CCS
66 static const iconv_ccs_desc_t *
67 load_file (const char *name, int direction);
68 #endif
69 
70 /*
71  * Interface data and functions implementation.
72  */
73 static size_t
table_close(void * data)74 table_close (
75                     void *data)
76 {
77   const iconv_ccs_desc_t *ccsp = (iconv_ccs_desc_t *)data;
78 
79   if (ccsp->type == TABLE_EXTERNAL)
80     free ((void *)ccsp->tbl);
81 
82   free((void *)ccsp);
83   return 0;
84 }
85 
86 #if defined (ICONV_FROM_UCS_CES_TABLE)
87 static void *
table_init_from_ucs(const char * encoding)88 table_init_from_ucs (
89                             const char *encoding)
90 {
91   int i;
92   const iconv_ccs_t *biccsp = NULL;
93   iconv_ccs_desc_t *ccsp;
94 
95   for (i = 0; _iconv_ccs[i] != NULL; i++)
96     if (strcmp (_iconv_ccs[i]->name, encoding) == 0)
97       {
98         biccsp = _iconv_ccs[i];
99         break;
100       }
101 
102   if (biccsp != NULL)
103     {
104       if (biccsp->from_ucs == NULL
105           || (ccsp = (iconv_ccs_desc_t *)
106                      malloc (sizeof (iconv_ccs_desc_t))) == NULL)
107         return NULL;
108 
109       ccsp->type = TABLE_BUILTIN;
110       ccsp->bits = biccsp->bits;
111       ccsp->optimization = biccsp->from_ucs_type;
112       ccsp->tbl = biccsp->from_ucs;
113 
114       return (void *)ccsp;
115     }
116 
117 #ifdef _ICONV_ENABLE_EXTERNAL_CCS
118   return (void *)load_file (encoding, 1);
119 #else
120   return NULL;
121 #endif
122 }
123 
124 static size_t
table_convert_from_ucs(void * data,ucs4_t in,unsigned char ** outbuf,size_t * outbytesleft)125 table_convert_from_ucs (void *data,
126                                ucs4_t in,
127                                unsigned char **outbuf,
128                                size_t *outbytesleft)
129 {
130   const iconv_ccs_desc_t *ccsp = (iconv_ccs_desc_t *)data;
131   ucs2_t code;
132 
133   if (in > 0xFFFF || in == INVALC)
134     return (size_t)ICONV_CES_INVALID_CHARACTER;
135 
136   if (ccsp->bits == TABLE_8BIT)
137     {
138       code = find_code_speed_8bit ((ucs2_t)in,
139                                   (const unsigned char *)ccsp->tbl);
140       if (code == INVALC)
141         return (size_t)ICONV_CES_INVALID_CHARACTER;
142       **outbuf = (unsigned char)code;
143       *outbuf += 1;
144       *outbytesleft -= 1;
145       return 1;
146     }
147   else if (ccsp->optimization == TABLE_SPEED_OPTIMIZED)
148     code = find_code_speed ((ucs2_t)in, ccsp->tbl);
149   else
150     code = find_code_size ((ucs2_t)in, ccsp->tbl);
151 
152   if (code == INVALC)
153     return (size_t)ICONV_CES_INVALID_CHARACTER;
154 
155   if (*outbytesleft < 2)
156     return (size_t)ICONV_CES_NOSPACE;
157 
158   /* We can't store whole word since **outbuf may be not 2-byte aligned */
159   **outbuf = (unsigned char)((ucs2_t)code >> 8);
160   *(*outbuf + 1) = (unsigned char)code;
161   *outbuf += 2;
162   *outbytesleft -= 2;
163   return 2;
164 }
165 #endif /* ICONV_FROM_UCS_CES_TABLE */
166 
167 #if defined (ICONV_TO_UCS_CES_TABLE)
168 static void *
table_init_to_ucs(const char * encoding)169 table_init_to_ucs (
170                           const char *encoding)
171 {
172   int i;
173   const iconv_ccs_t *biccsp = NULL;
174   iconv_ccs_desc_t *ccsp;
175 
176   for (i = 0; _iconv_ccs[i] != NULL; i++)
177     if (strcmp (_iconv_ccs[i]->name, encoding) == 0)
178       {
179         biccsp = _iconv_ccs[i];
180         break;
181       }
182 
183   if (biccsp != NULL)
184     {
185       if (biccsp->to_ucs == NULL
186           || (ccsp = (iconv_ccs_desc_t *)
187                      malloc (sizeof (iconv_ccs_desc_t))) == NULL)
188         return NULL;
189 
190       ccsp->type = TABLE_BUILTIN;
191       ccsp->bits = biccsp->bits;
192       ccsp->optimization = biccsp->to_ucs_type;
193       ccsp->tbl = biccsp->to_ucs;
194 
195       return (void *)ccsp;
196     }
197 
198 #ifdef _ICONV_ENABLE_EXTERNAL_CCS
199   return (void *)load_file (encoding, 0);
200 #else
201   return NULL;
202 #endif
203 }
204 
205 static ucs4_t
table_convert_to_ucs(void * data,const unsigned char ** inbuf,size_t * inbytesleft)206 table_convert_to_ucs (void *data,
207                              const unsigned char **inbuf,
208                              size_t *inbytesleft)
209 {
210   const iconv_ccs_desc_t *ccsp = (iconv_ccs_desc_t *)data;
211   ucs2_t ucs;
212 
213   if (ccsp->bits == TABLE_8BIT)
214     {
215       if (*inbytesleft < 1)
216         return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
217 
218       ucs = (ucs2_t)ccsp->tbl[**inbuf];
219 
220       if (ucs == INVALC)
221         return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
222 
223       *inbytesleft -= 1;
224       *inbuf += 1;
225       return (ucs4_t)ucs;
226     }
227 
228   if (*inbytesleft < 2)
229     return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
230 
231   if (ccsp->optimization == TABLE_SIZE_OPTIMIZED)
232     ucs = find_code_size((ucs2_t)**inbuf << 8 | (ucs2_t)*(*inbuf + 1),
233                          ccsp->tbl);
234   else
235     ucs = find_code_speed((ucs2_t)**inbuf << 8 | (ucs2_t)*(*inbuf + 1),
236                           ccsp->tbl);
237 
238   if (ucs == INVALC)
239     return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
240 
241   *inbuf += 2;
242   *inbytesleft -= 2;
243   return (ucs4_t)ucs;
244 }
245 #endif /* ICONV_TO_UCS_CES_TABLE */
246 
247 static int
table_get_mb_cur_max(void * data)248 table_get_mb_cur_max (void *data)
249 {
250   return ((iconv_ccs_desc_t *)data)->bits/8;
251 }
252 
253 
254 #if defined (ICONV_TO_UCS_CES_TABLE)
255 const iconv_to_ucs_ces_handlers_t
256 _iconv_to_ucs_ces_handlers_table =
257 {
258   table_init_to_ucs,
259   table_close,
260   table_get_mb_cur_max,
261   NULL,
262   NULL,
263   NULL,
264   table_convert_to_ucs
265 };
266 #endif /* ICONV_FROM_UCS_CES_TABLE */
267 
268 #if defined (ICONV_FROM_UCS_CES_TABLE)
269 const iconv_from_ucs_ces_handlers_t
270 _iconv_from_ucs_ces_handlers_table =
271 {
272   table_init_from_ucs,
273   table_close,
274   table_get_mb_cur_max,
275   NULL,
276   NULL,
277   NULL,
278   table_convert_from_ucs
279 };
280 #endif /* ICONV_TO_UCS_CES_TABLE */
281 
282 /*
283  * Supplementary functions.
284  */
285 
286 /*
287  * find_code_speed - find code in 16 bit speed-optimized table.
288  *
289  * PARAMETERS:
290  *     ucs2_t code - code whose mapping to find.
291  *     const __uint16_t *tblp - table pointer.
292  *
293  * RETURN:
294  *     Code that corresponds to 'code'.
295  */
296 static __inline ucs2_t
find_code_speed(ucs2_t code,const __uint16_t * tblp)297 find_code_speed (ucs2_t code,
298                         const __uint16_t *tblp)
299 {
300   __uint16_t idx = tblp[code >> 8];
301 
302   if (idx == INVBLK)
303     return (ucs2_t)INVALC;
304 
305   return (ucs2_t)tblp[(code & 0x00FF) + idx];
306 }
307 
308 /*
309  * find_code_speed_8bit - find code in 8 bit speed-optimized table.
310  *
311  * PARAMETERS:
312  *     ucs2_t code - code whose mapping to find.
313  *     const __uint16_t *tblp - table pointer.
314  *
315  * RETURN:
316  *     Code that corresponds to 'code'.
317  */
318 static __inline ucs2_t
find_code_speed_8bit(ucs2_t code,const unsigned char * tblp)319 find_code_speed_8bit (ucs2_t code,
320                              const unsigned char *tblp)
321 {
322   __uint16_t idx;
323   unsigned char ccs;
324 
325   if (code == ((ucs2_t *)tblp)[0])
326     return (ucs2_t)0xFF;
327 
328   idx = ((ucs2_t *)tblp)[1 + (code >> 8)];
329 
330   if (idx == INVBLK)
331     return (ucs2_t)INVALC;
332 
333   ccs = tblp[(code & 0x00FF) + idx];
334 
335   return ccs == 0xFF ? (ucs2_t)INVALC : (ucs2_t)ccs;
336 }
337 
338 /* Left range boundary */
339 #define RANGE_LEFT(n)     (tblp[FIRST_RANGE_INDEX + (n)*3 + 0])
340 /* Right range boundary */
341 #define RANGE_RIGHT(n)    (tblp[FIRST_RANGE_INDEX + (n)*3 + 1])
342 /* Range offset */
343 #define RANGE_INDEX(n)    (tblp[FIRST_RANGE_INDEX + (n)*3 + 2])
344 /* Un-ranged offset */
345 #define UNRANGED_INDEX(n) (tblp[FIRST_UNRANGED_INDEX_INDEX] + (n)*2)
346 
347 /*
348  * find_code_size - find code in 16 bit size-optimized table.
349  *
350  * PARAMETERS:
351  *     ucs2_t code - code whose mapping to find.
352  *     const __uint16_t *tblp - table pointer.
353  *
354  * RETURN:
355  *     Code that corresponds to 'code'.
356  */
357 static ucs2_t
find_code_size(ucs2_t code,const __uint16_t * tblp)358 find_code_size (ucs2_t code,
359                        const __uint16_t *tblp)
360 {
361   int first, last, cur, center;
362 
363   if (tblp[RANGES_NUM_INDEX] > 0)
364     {
365       first = 0;
366       last = tblp[RANGES_NUM_INDEX] - 1;
367 
368       do
369         {
370           center = (last - first)/2;
371           cur = center + first;
372 
373           if (code > RANGE_RIGHT (cur))
374             first = cur;
375           else if (code < RANGE_LEFT (cur))
376             last = cur;
377           else
378             return (ucs2_t)tblp[RANGE_INDEX (cur) + code - RANGE_LEFT (cur)];
379         } while (center > 0);
380 
381         if (last - first == 1)
382           {
383             if (code >= RANGE_LEFT (first) && code <= RANGE_RIGHT (first))
384               return (ucs2_t)tblp[RANGE_INDEX (first)
385                                   + code - RANGE_LEFT (first)];
386             if (code >= RANGE_LEFT (last) && code <= RANGE_RIGHT (last))
387               return (ucs2_t)tblp[RANGE_INDEX (last)
388                                   + code - RANGE_LEFT (last)];
389           }
390     }
391 
392   if (tblp[UNRANGED_NUM_INDEX] > 0)
393     {
394       first = 0;
395       last = tblp[UNRANGED_NUM_INDEX] - 1;
396 
397       do
398         {
399           __uint16_t c;
400 
401           center = (last - first)/2;
402           cur = center + first;
403           c = tblp[UNRANGED_INDEX (cur)];
404 
405           if (code > c)
406             first = cur;
407           else if (code < c)
408             last = cur;
409           else
410             return (ucs2_t)tblp[UNRANGED_INDEX (cur) + 1];
411         } while (center > 0);
412 
413         if (last - first == 1)
414           {
415             if (code == tblp[UNRANGED_INDEX (first)])
416               return (ucs2_t)tblp[UNRANGED_INDEX (first) + 1];
417             if (code == tblp[UNRANGED_INDEX (last)])
418               return (ucs2_t)tblp[UNRANGED_INDEX (last) + 1];
419           }
420     }
421 
422   return (ucs2_t)INVALC;
423 }
424 
425 #ifdef _ICONV_ENABLE_EXTERNAL_CCS
426 
427 #define _16BIT_ELT(offset) \
428     ICONV_BETOHS(*((__uint16_t *)(buf + (offset))))
429 #define _32BIT_ELT(offset) \
430     ICONV_BETOHL(*((__uint32_t *)(buf + (offset))))
431 
432 /*
433  * load_file - load conversion table from external file and initialize
434  *             iconv_ccs_desc_t object.
435  *
436  * PARAMETERS:
437  *    const char *name - encoding name.
438  *    int direction - conversion direction.
439  *
440  * DESCRIPTION:
441  *    Loads conversion table of appropriate endianess from external file
442  *    and initializes 'iconv_ccs_desc_t' table description structure.
443  *    If 'direction' is 0 - load "To UCS" table, else load "From UCS"
444  *    table.
445  *
446  * RETURN:
447  *    iconv_ccs_desc_t * pointer is success, NULL if failure.
448  */
449 static const iconv_ccs_desc_t *
load_file(const char * name,int direction)450 load_file (
451                   const char *name,
452                   int direction)
453 {
454   FILE *file;
455   char *buf;
456   size_t tbllen;
457   size_t hdrlen;
458   off_t off;
459   off_t cur = 0;
460   const char *fname;
461   iconv_ccs_desc_t *ccsp = NULL;
462   size_t nmlen = strlen(name);
463   /* Since CCS table name length can vary - it is aligned (by adding extra
464    * bytes to it's end) to 4-byte boundary. */
465   int alignment = nmlen & 3 ? 4 - (nmlen & 3) : 0;
466 
467   hdrlen = nmlen + EXTTABLE_HEADER_LEN + alignment;
468 
469   if ((fname = _iconv_nls_construct_filename (name, ICONV_SUBDIR,
470                                               ICONV_DATA_EXT)) == NULL)
471     return NULL;
472 
473   if ((file = fopen (fname, "rb")) == NULL)
474     goto error1;
475 
476   if ((buf = malloc (hdrlen)) == NULL)
477     goto error2;
478 
479   if (fread ((void *) buf, 1, hdrlen, file) != hdrlen)
480     goto error3;
481 
482   cur += hdrlen;
483 
484   if (_16BIT_ELT (EXTTABLE_VERSION_OFF) != TABLE_VERSION_1
485       || _32BIT_ELT (EXTTABLE_CCSNAME_LEN_OFF) != nmlen
486       || strncmp (buf + EXTTABLE_CCSNAME_OFF, name, nmlen) != 0)
487     goto error3; /* Bad file */
488 
489   if ((ccsp = (iconv_ccs_desc_t *)
490            calloc (1, sizeof (iconv_ccs_desc_t))) == NULL)
491     goto error3;
492 
493   ccsp->bits = _16BIT_ELT (EXTTABLE_BITS_OFF);
494   ccsp->type = TABLE_EXTERNAL;
495 
496   /* Add 4-byte alignment to name length */
497   nmlen += alignment;
498 
499   if (ccsp->bits == TABLE_8BIT)
500     {
501       if (direction == 0) /* Load "To UCS" table */
502         {
503           off = (off_t)_32BIT_ELT (nmlen + EXTTABLE_TO_SPEED_OFF);
504           tbllen = _32BIT_ELT (nmlen + EXTTABLE_TO_SPEED_LEN_OFF);
505         }
506       else /* Load "From UCS" table */
507         {
508           off = (off_t)_32BIT_ELT (nmlen + EXTTABLE_FROM_SPEED_OFF);
509           tbllen = _32BIT_ELT (nmlen + EXTTABLE_FROM_SPEED_LEN_OFF);
510         }
511     }
512   else if (ccsp->bits == TABLE_16BIT)
513     {
514       if (direction == 0) /* Load "To UCS" table */
515         {
516 #ifdef TABLE_USE_SIZE_OPTIMIZATION
517           off = (off_t)_32BIT_ELT (nmlen + EXTTABLE_TO_SIZE_OFF);
518           tbllen = _32BIT_ELT (nmlen + EXTTABLE_TO_SIZE_LEN_OFF);
519 #else
520           off = (off_t)_32BIT_ELT (nmlen + EXTTABLE_TO_SPEED_OFF);
521           tbllen = _32BIT_ELT (nmlen + EXTTABLE_TO_SPEED_LEN_OFF);
522 #endif
523         }
524       else /* Load "From UCS" table */
525         {
526 #ifdef TABLE_USE_SIZE_OPTIMIZATION
527           off = (off_t)_32BIT_ELT (nmlen + EXTTABLE_FROM_SIZE_OFF);
528           tbllen = _32BIT_ELT (nmlen + EXTTABLE_FROM_SIZE_LEN_OFF);
529 #else
530           off = (off_t)_32BIT_ELT (nmlen + EXTTABLE_FROM_SPEED_OFF);
531           tbllen = _32BIT_ELT (nmlen + EXTTABLE_FROM_SPEED_LEN_OFF);
532 #endif
533         }
534 #ifdef TABLE_USE_SIZE_OPTIMIZATION
535       ccsp->optimization = TABLE_SIZE_OPTIMIZED;
536 #else
537       ccsp->optimization = TABLE_SPEED_OPTIMIZED;
538 #endif
539     }
540   else
541     goto error4; /* Bad file */
542 
543   if (off == EXTTABLE_NO_TABLE)
544     goto error4; /* No correspondent table in file */
545 
546   if ((ccsp->tbl = (ucs2_t *)malloc (tbllen)) == NULL)
547     goto error4;
548 
549   while (cur < off) {
550     if (getc(file) == EOF)
551       goto error5;
552     cur++;
553   }
554   if (fread ((void *) ccsp->tbl, 1, tbllen, file) != tbllen)
555     goto error5;
556 
557   goto normal_exit;
558 
559 error5:
560   free ((void *)ccsp->tbl);
561   ccsp->tbl = NULL;
562 error4:
563   free ((void *)ccsp);
564   ccsp = NULL;
565 error3:
566 normal_exit:
567   free ((void *)buf);
568 error2:
569   if (fclose (file) == EOF)
570     {
571       if (ccsp != NULL)
572         {
573           if (ccsp->tbl != NULL)
574             free ((void *)ccsp->tbl);
575           free ((void *)ccsp);
576         }
577       ccsp = NULL;
578     }
579 error1:
580   free ((void *)fname);
581   return ccsp;
582 }
583 #endif
584 
585 #endif /* ICONV_TO_UCS_CES_TABLE || ICONV_FROM_UCS_CES_TABLE */
586 
587