1 /*
2 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 #include <sys/types.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include "local.h"
31 #include "conv.h"
32 #include "ucsconv.h"
33
34 static int fake_data;
35
36 static int
37 find_encoding_name (const char *searchee,
38 const char **names);
39
40
41 /*
42 * UCS-based conversion interface functions implementation.
43 */
44
45 static void *
ucs_based_conversion_open(const char * to,const char * from)46 ucs_based_conversion_open (
47 const char *to,
48 const char *from)
49 {
50 iconv_ucs_conversion_t *uc;
51 const iconv_to_ucs_ces_t *to_ucs_bices;
52 const iconv_from_ucs_ces_t *from_ucs_bices;
53
54 uc = (iconv_ucs_conversion_t *)
55 calloc (1, sizeof (iconv_ucs_conversion_t));
56 if (uc == NULL)
57 return NULL;
58
59 /*
60 * Find CES converter for "from" encoding ("from" source encoding corresponds
61 * to "to_ucs" CES converter).
62 */
63 for (to_ucs_bices = &_iconv_to_ucs_ces[0];
64 to_ucs_bices->names != NULL;
65 to_ucs_bices++)
66 {
67 if (find_encoding_name (from, to_ucs_bices->names) == 0)
68 break;
69 }
70
71 /*
72 * Find CES converter for "to" encoding ("to" source encoding corresponds
73 * to "from_ucs" CES converter).
74 */
75 for (from_ucs_bices = &_iconv_from_ucs_ces[0];
76 from_ucs_bices->names != NULL;
77 from_ucs_bices++)
78 {
79 if (find_encoding_name (to, from_ucs_bices->names) == 0)
80 break;
81 }
82
83 if (to_ucs_bices->names == NULL || from_ucs_bices->names == NULL)
84 goto error;
85
86 uc->to_ucs.handlers = to_ucs_bices->handlers;
87 uc->from_ucs.handlers = from_ucs_bices->handlers;
88
89 /* Initialize "to UCS" CES converter */
90 if (to_ucs_bices->handlers->init != NULL)
91 {
92 uc->to_ucs.data = to_ucs_bices->handlers->init (from);
93 if (uc->to_ucs.data == NULL)
94 goto error;
95 }
96 else
97 uc->to_ucs.data = (void *)&fake_data;
98
99
100 /* Initialize "from UCS" CES converter */
101 if (from_ucs_bices->handlers->init != NULL)
102 {
103 uc->from_ucs.data = from_ucs_bices->handlers->init (to);
104 if (uc->from_ucs.data == NULL)
105 goto error;
106 }
107 else
108 uc->from_ucs.data = (void *)&fake_data;
109
110 return uc;
111
112 error:
113 if (uc->to_ucs.data != NULL && uc->to_ucs.handlers->close != NULL)
114 uc->to_ucs.handlers->close (uc->to_ucs.data);
115
116 free ((void *)uc);
117
118 return NULL;
119 }
120
121
122 static size_t
ucs_based_conversion_close(void * data)123 ucs_based_conversion_close (
124 void *data)
125 {
126 iconv_ucs_conversion_t *uc;
127 size_t res = 0;
128
129 uc = (iconv_ucs_conversion_t *)data;
130
131 if (uc->from_ucs.handlers->close != NULL)
132 res = uc->from_ucs.handlers->close (uc->from_ucs.data);
133 if (uc->to_ucs.handlers->close != NULL)
134 res |= uc->to_ucs.handlers->close (uc->to_ucs.data);
135
136 free ((void *)data);
137
138 return res;
139 }
140
141
142 static size_t
ucs_based_conversion_convert(void * data,const unsigned char ** inbuf,size_t * inbytesleft,unsigned char ** outbuf,size_t * outbytesleft,int flags)143 ucs_based_conversion_convert (
144 void *data,
145 const unsigned char **inbuf,
146 size_t *inbytesleft,
147 unsigned char **outbuf,
148 size_t *outbytesleft,
149 int flags)
150 {
151 unsigned char outbuf1[ICONV_MB_LEN_MAX];
152 unsigned char *poutbuf1;
153 size_t res = 0;
154 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
155
156 while (*inbytesleft > 0)
157 {
158 register size_t bytes;
159 register ucs4_t ch;
160 const unsigned char *inbuf_save = *inbuf;
161 size_t inbyteslef_save = *inbytesleft;
162
163 if (*outbytesleft == 0)
164 {
165 _REENT_ERRNO (rptr) = E2BIG;
166 return (size_t)-1;
167 }
168
169 ch = uc->to_ucs.handlers->convert_to_ucs (uc->to_ucs.data,
170 inbuf, inbytesleft);
171
172 if (ch == (ucs4_t)ICONV_CES_BAD_SEQUENCE)
173 {
174 _REENT_ERRNO (rptr) = EINVAL;
175 return (size_t)-1;
176 }
177
178 if (ch == (ucs4_t)ICONV_CES_INVALID_CHARACTER)
179 {
180 _REENT_ERRNO (rptr) = EILSEQ;
181 return (size_t)-1;
182 }
183
184 if (flags & ICONV_DONT_SAVE_BIT)
185 {
186 poutbuf1 = &outbuf1[0];
187 outbuf = &poutbuf1;
188 }
189
190 bytes = uc->from_ucs.handlers->convert_from_ucs (uc->from_ucs.data, ch,
191 outbuf, outbytesleft);
192
193 if (bytes == (size_t)ICONV_CES_NOSPACE)
194 {
195 *inbuf = inbuf_save;
196 *inbytesleft = inbyteslef_save;
197 _REENT_ERRNO (rptr) = E2BIG;
198 return (size_t)-1;
199 }
200 else if (bytes == (size_t)ICONV_CES_INVALID_CHARACTER)
201 {
202 if (flags & ICONV_FAIL_BIT)
203 {
204 /* Generate error */
205 _REENT_ERRNO (rptr) = EILSEQ;
206 return (size_t)-1;
207 }
208 /*
209 * For this case SUSv3 stands: "if iconv() encounters a character in the
210 * input buffer that is valid, but for which an identical character does
211 * not exist in the target encoding, iconv() shall perform an
212 * implementation-defined conversion on this character".
213 * Don't generate error, just write default character.
214 */
215 bytes = uc->from_ucs.handlers->convert_from_ucs (
216 uc->from_ucs.data,
217 (ucs4_t)DEFAULT_CHARACTER,
218 outbuf,
219 outbytesleft);
220 if ((ssize_t)bytes < 0)
221 {
222 _REENT_ERRNO (rptr) = E2BIG;
223 return (size_t)-1;
224 }
225
226 res += 1;
227 }
228 }
229
230 return res;
231 }
232
233
234 static int
ucs_based_conversion_get_mb_cur_max(void * data,int direction)235 ucs_based_conversion_get_mb_cur_max (void *data,
236 int direction)
237 {
238 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
239
240 if (direction == 0)
241 return uc->to_ucs.handlers->get_mb_cur_max (uc->to_ucs.data);
242 else
243 return uc->from_ucs.handlers->get_mb_cur_max (uc->from_ucs.data);
244 }
245
246
247 static void
ucs_based_conversion_get_state(void * data,mbstate_t * state,int direction)248 ucs_based_conversion_get_state (void *data,
249 mbstate_t *state,
250 int direction)
251 {
252 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
253 mbstate_t nullstate = ICONV_ZERO_MB_STATE_T;
254
255 if (direction == 0)
256 {
257 if (uc->to_ucs.handlers->get_state != NULL)
258 uc->to_ucs.handlers->get_state (uc->to_ucs.data, state);
259 else
260 *state = nullstate; /* internal copy */
261 }
262 else
263 {
264 if (uc->from_ucs.handlers->get_state != NULL)
265 uc->from_ucs.handlers->get_state (uc->from_ucs.data, state);
266 else
267 *state = nullstate; /* internal copy */
268 }
269
270 return;
271 }
272
273
274 static int
ucs_based_conversion_set_state(void * data,mbstate_t * state,int direction)275 ucs_based_conversion_set_state (void *data,
276 mbstate_t *state,
277 int direction)
278 {
279 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
280
281 if (direction == 0)
282 {
283 if (uc->to_ucs.handlers->set_state != NULL)
284 return uc->to_ucs.handlers->set_state (uc->to_ucs.data, state);
285 }
286 else
287 {
288 if (uc->from_ucs.handlers->set_state != NULL)
289 return uc->from_ucs.handlers->set_state (uc->from_ucs.data, state);
290 }
291
292 return 0;
293 }
294
295 static int
ucs_based_conversion_is_stateful(void * data,int direction)296 ucs_based_conversion_is_stateful (void *data,
297 int direction)
298 {
299 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
300
301 if (direction == 0)
302 {
303 if (uc->to_ucs.handlers->is_stateful != NULL)
304 return uc->to_ucs.handlers->is_stateful (uc->to_ucs.data);
305 }
306 else
307 {
308 if (uc->from_ucs.handlers->is_stateful != NULL)
309 return uc->from_ucs.handlers->is_stateful (uc->from_ucs.data);
310 }
311
312 return 0;
313 }
314
315
316 /* UCS-based conversion definition object */
317 const iconv_conversion_handlers_t
318 _iconv_ucs_conversion_handlers =
319 {
320 ucs_based_conversion_open,
321 ucs_based_conversion_close,
322 ucs_based_conversion_convert,
323 ucs_based_conversion_get_state,
324 ucs_based_conversion_set_state,
325 ucs_based_conversion_get_mb_cur_max,
326 ucs_based_conversion_is_stateful
327 };
328
329
330 /*
331 * Supplementary functions.
332 */
333
334 static int
find_encoding_name(const char * searchee,const char ** names)335 find_encoding_name (const char *searchee,
336 const char **names)
337 {
338 const char *p;
339
340 for (p = *names; p != NULL; p = *(names++))
341 if (strcmp (p, searchee) == 0)
342 return 0;
343
344 return -1;
345 }
346
347