1 /*
2 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 #include <_ansi.h>
27 #include <sys/types.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "local.h"
32 #include "conv.h"
33 #include "ucsconv.h"
34
35 static int fake_data;
36
37 static int
38 find_encoding_name (const char *searchee,
39 const char **names);
40
41
42 /*
43 * UCS-based conversion interface functions implementation.
44 */
45
46 static void *
ucs_based_conversion_open(const char * to,const char * from)47 ucs_based_conversion_open (
48 const char *to,
49 const char *from)
50 {
51 iconv_ucs_conversion_t *uc;
52 const iconv_to_ucs_ces_t *to_ucs_bices;
53 const iconv_from_ucs_ces_t *from_ucs_bices;
54
55 uc = (iconv_ucs_conversion_t *)
56 calloc (1, sizeof (iconv_ucs_conversion_t));
57 if (uc == NULL)
58 return NULL;
59
60 /*
61 * Find CES converter for "from" encoding ("from" source encoding corresponds
62 * to "to_ucs" CES converter).
63 */
64 for (to_ucs_bices = &_iconv_to_ucs_ces[0];
65 to_ucs_bices->names != NULL;
66 to_ucs_bices++)
67 {
68 if (find_encoding_name (from, to_ucs_bices->names) == 0)
69 break;
70 }
71
72 /*
73 * Find CES converter for "to" encoding ("to" source encoding corresponds
74 * to "from_ucs" CES converter).
75 */
76 for (from_ucs_bices = &_iconv_from_ucs_ces[0];
77 from_ucs_bices->names != NULL;
78 from_ucs_bices++)
79 {
80 if (find_encoding_name (to, from_ucs_bices->names) == 0)
81 break;
82 }
83
84 if (to_ucs_bices->names == NULL || from_ucs_bices->names == NULL)
85 goto error;
86
87 uc->to_ucs.handlers = to_ucs_bices->handlers;
88 uc->from_ucs.handlers = from_ucs_bices->handlers;
89
90 /* Initialize "to UCS" CES converter */
91 if (to_ucs_bices->handlers->init != NULL)
92 {
93 uc->to_ucs.data = to_ucs_bices->handlers->init (from);
94 if (uc->to_ucs.data == NULL)
95 goto error;
96 }
97 else
98 uc->to_ucs.data = (void *)&fake_data;
99
100
101 /* Initialize "from UCS" CES converter */
102 if (from_ucs_bices->handlers->init != NULL)
103 {
104 uc->from_ucs.data = from_ucs_bices->handlers->init (to);
105 if (uc->from_ucs.data == NULL)
106 goto error;
107 }
108 else
109 uc->from_ucs.data = (void *)&fake_data;
110
111 return uc;
112
113 error:
114 if (uc->to_ucs.data != NULL && uc->to_ucs.handlers->close != NULL)
115 uc->to_ucs.handlers->close (uc->to_ucs.data);
116
117 free ((void *)uc);
118
119 return NULL;
120 }
121
122
123 static size_t
ucs_based_conversion_close(void * data)124 ucs_based_conversion_close (
125 void *data)
126 {
127 iconv_ucs_conversion_t *uc;
128 size_t res = 0;
129
130 uc = (iconv_ucs_conversion_t *)data;
131
132 if (uc->from_ucs.handlers->close != NULL)
133 res = uc->from_ucs.handlers->close (uc->from_ucs.data);
134 if (uc->to_ucs.handlers->close != NULL)
135 res |= uc->to_ucs.handlers->close (uc->to_ucs.data);
136
137 free ((void *)data);
138
139 return res;
140 }
141
142
143 static size_t
ucs_based_conversion_convert(void * data,const unsigned char ** inbuf,size_t * inbytesleft,unsigned char ** outbuf,size_t * outbytesleft,int flags)144 ucs_based_conversion_convert (
145 void *data,
146 const unsigned char **inbuf,
147 size_t *inbytesleft,
148 unsigned char **outbuf,
149 size_t *outbytesleft,
150 int flags)
151 {
152 unsigned char outbuf1[ICONV_MB_LEN_MAX];
153 unsigned char *poutbuf1;
154 size_t res = 0;
155 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
156
157 while (*inbytesleft > 0)
158 {
159 register size_t bytes;
160 register ucs4_t ch;
161 const unsigned char *inbuf_save = *inbuf;
162 size_t inbyteslef_save = *inbytesleft;
163
164 if (*outbytesleft == 0)
165 {
166 _REENT_ERRNO (rptr) = E2BIG;
167 return (size_t)-1;
168 }
169
170 ch = uc->to_ucs.handlers->convert_to_ucs (uc->to_ucs.data,
171 inbuf, inbytesleft);
172
173 if (ch == (ucs4_t)ICONV_CES_BAD_SEQUENCE)
174 {
175 _REENT_ERRNO (rptr) = EINVAL;
176 return (size_t)-1;
177 }
178
179 if (ch == (ucs4_t)ICONV_CES_INVALID_CHARACTER)
180 {
181 _REENT_ERRNO (rptr) = EILSEQ;
182 return (size_t)-1;
183 }
184
185 if (flags & ICONV_DONT_SAVE_BIT)
186 {
187 poutbuf1 = &outbuf1[0];
188 outbuf = &poutbuf1;
189 }
190
191 bytes = uc->from_ucs.handlers->convert_from_ucs (uc->from_ucs.data, ch,
192 outbuf, outbytesleft);
193
194 if (bytes == (size_t)ICONV_CES_NOSPACE)
195 {
196 *inbuf = inbuf_save;
197 *inbytesleft = inbyteslef_save;
198 _REENT_ERRNO (rptr) = E2BIG;
199 return (size_t)-1;
200 }
201 else if (bytes == (size_t)ICONV_CES_INVALID_CHARACTER)
202 {
203 if (flags & ICONV_FAIL_BIT)
204 {
205 /* Generate error */
206 _REENT_ERRNO (rptr) = EILSEQ;
207 return (size_t)-1;
208 }
209 /*
210 * For this case SUSv3 stands: "if iconv() encounters a character in the
211 * input buffer that is valid, but for which an identical character does
212 * not exist in the target encoding, iconv() shall perform an
213 * implementation-defined conversion on this character".
214 * Don't generate error, just write default character.
215 */
216 bytes = uc->from_ucs.handlers->convert_from_ucs (
217 uc->from_ucs.data,
218 (ucs4_t)DEFAULT_CHARACTER,
219 outbuf,
220 outbytesleft);
221 if ((ssize_t)bytes < 0)
222 {
223 _REENT_ERRNO (rptr) = E2BIG;
224 return (size_t)-1;
225 }
226
227 res += 1;
228 }
229 }
230
231 return res;
232 }
233
234
235 static int
ucs_based_conversion_get_mb_cur_max(void * data,int direction)236 ucs_based_conversion_get_mb_cur_max (void *data,
237 int direction)
238 {
239 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
240
241 if (direction == 0)
242 return uc->to_ucs.handlers->get_mb_cur_max (uc->to_ucs.data);
243 else
244 return uc->from_ucs.handlers->get_mb_cur_max (uc->from_ucs.data);
245 }
246
247
248 static void
ucs_based_conversion_get_state(void * data,mbstate_t * state,int direction)249 ucs_based_conversion_get_state (void *data,
250 mbstate_t *state,
251 int direction)
252 {
253 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
254 mbstate_t nullstate = ICONV_ZERO_MB_STATE_T;
255
256 if (direction == 0)
257 {
258 if (uc->to_ucs.handlers->get_state != NULL)
259 uc->to_ucs.handlers->get_state (uc->to_ucs.data, state);
260 else
261 *state = nullstate; /* internal copy */
262 }
263 else
264 {
265 if (uc->from_ucs.handlers->get_state != NULL)
266 uc->from_ucs.handlers->get_state (uc->from_ucs.data, state);
267 else
268 *state = nullstate; /* internal copy */
269 }
270
271 return;
272 }
273
274
275 static int
ucs_based_conversion_set_state(void * data,mbstate_t * state,int direction)276 ucs_based_conversion_set_state (void *data,
277 mbstate_t *state,
278 int direction)
279 {
280 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
281
282 if (direction == 0)
283 {
284 if (uc->to_ucs.handlers->set_state != NULL)
285 return uc->to_ucs.handlers->set_state (uc->to_ucs.data, state);
286 }
287 else
288 {
289 if (uc->from_ucs.handlers->set_state != NULL)
290 return uc->from_ucs.handlers->set_state (uc->from_ucs.data, state);
291 }
292
293 return 0;
294 }
295
296 static int
ucs_based_conversion_is_stateful(void * data,int direction)297 ucs_based_conversion_is_stateful (void *data,
298 int direction)
299 {
300 iconv_ucs_conversion_t *uc = (iconv_ucs_conversion_t *)data;
301
302 if (direction == 0)
303 {
304 if (uc->to_ucs.handlers->is_stateful != NULL)
305 return uc->to_ucs.handlers->is_stateful (uc->to_ucs.data);
306 }
307 else
308 {
309 if (uc->from_ucs.handlers->is_stateful != NULL)
310 return uc->from_ucs.handlers->is_stateful (uc->from_ucs.data);
311 }
312
313 return 0;
314 }
315
316
317 /* UCS-based conversion definition object */
318 const iconv_conversion_handlers_t
319 _iconv_ucs_conversion_handlers =
320 {
321 ucs_based_conversion_open,
322 ucs_based_conversion_close,
323 ucs_based_conversion_convert,
324 ucs_based_conversion_get_state,
325 ucs_based_conversion_set_state,
326 ucs_based_conversion_get_mb_cur_max,
327 ucs_based_conversion_is_stateful
328 };
329
330
331 /*
332 * Supplementary functions.
333 */
334
335 static int
find_encoding_name(const char * searchee,const char ** names)336 find_encoding_name (const char *searchee,
337 const char **names)
338 {
339 const char *p;
340
341 for (p = *names; p != NULL; p = *(names++))
342 if (strcmp (p, searchee) == 0)
343 return 0;
344
345 return -1;
346 }
347
348