1 /*
2  * Copyright (c) 2003-2004, Artem B. Bityuckiy
3  * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #ifndef __ICONV_UCS_CONVERSION_H__
27 #define __ICONV_UCS_CONVERSION_H__
28 
29 #include <_ansi.h>
30 #include <sys/types.h>
31 #include <wchar.h>
32 #include "local.h"
33 
34 /* No enough space in output buffer */
35 #define ICONV_CES_NOSPACE 0
36 /* Invalid input character */
37 #define ICONV_CES_INVALID_CHARACTER -1
38 /* No corespondent character in destination encoding */
39 #define ICONV_CES_BAD_SEQUENCE -2
40 /* All unknown characters are marked by this code */
41 #define DEFAULT_CHARACTER 0x3f /* ASCII '?' */
42 
43 
44 /*
45  * iconv_to_ucs_ces_handlers_t - "to UCS" CES converter handlers.
46  *
47  * Structure contains function pointers which should be provided by
48  * "to_ucs" CES converter.
49  *
50  * ============================================================================
51  */
52 typedef struct
53 {
54   /*
55    * init - initialize CES converter.
56    *
57    * PARAMETERS:
58    *   const char *encoding - encoding name.
59    *
60    * DESCRIPTION:
61    *  Initializes CES converter. CES converter may deal with a series of
62    *  encodings, such as Table or EUC CES converters. 'encoding' parameter
63    *  indicates which encoding CES converter should use.
64    *
65    * RETURN:
66    *   Returns CES-specific data pointer if success. In case of error returns
67    *   NULL and sets current thread's/process's errno.
68    */
69   void *(*init) (
70                           const char *encoding);
71 
72   /*
73    * close - close CES converter.
74    *
75    * PARAMETERS:
76    *   void *data      - CES converter-specific data.
77    *
78    * DESCRIPTION:
79    *     Preforms CES converter closing.   *
80    * RETURN:
81    *   Returns (size_t)0 if success. In case of error returns (size_t)-1 and
82    *   sets current thread's/process's errno.
83    */
84   size_t (*close) (
85                         void *data);
86 
87   /*
88    * get_mb_cur_max - get maximum character length in bytes.
89    *
90    * PARAMETERS:
91    *   void *data     - conversion-specific data;
92    *
93    * DESCRIPTION:
94    *   Returns encoding's maximum character length.
95    */
96   int (*get_mb_cur_max) (void *data);
97 
98   /*
99    * get_state - get current shift state.
100    *
101    * PARAMETERS:
102    *   void *data   - conversion-specific data;
103    *   mbstate_t *state - mbstate_t object where shift state will be stored;
104    *
105    * DESCRIPTION:
106    *   Returns encoding's current shift sequence.
107    */
108   void (*get_state) (void *data,
109                            mbstate_t *state);
110 
111   /*
112    * set_state - set shift state.
113    *
114    * PARAMETERS:
115    *   void *data   - conversion-specific data;
116    *   mbstate_t *state - mbstate_t value to which shift state will be set.
117    *
118    * DESCRIPTION:
119    *   Sets encoding's current shift state to 'state'. if 'state'
120    *   object is zero-object - reset current shift state.
121    *   Returns 0 if '*state' object has right format, -1 else.
122    */
123   int (*set_state) (void *data,
124                          mbstate_t *state);
125 
126   /*
127    * is_stateful - is encoding stateful state.
128    *
129    * PARAMETERS:
130    *   void *data   - conversion-specific data;
131    *
132    * DESCRIPTION:
133    *   Returns 0 if encoding is stateless, else returns 1.
134    */
135   int (*is_stateful) (void *data);
136 
137   /*
138    * convert_to_ucs - convert character to UCS.
139    *
140    * PARAMETERS:
141    *   void *data               - CES converter-specific data;
142    *   const unsigned char **inbuf - buffer with input character byte sequence;
143    *   size_t *inbytesleft          - output buffer bytes count.
144    *
145    * DESCRIPTION:
146    *   Converts input characters into UCS encoding. 'inbuf' is
147    *   incremented accordingly. 'bytesleft' is decremented accordingly. Should
148    *   be provided by correspondent CES module.
149    *
150    * RETURN:
151    *   Returns resulting UCS code if success. If input character is invalid,
152    *   returns ICONV_CES_INVALID_CHARACTER. If invalid or incomplete bytes
153    *   sequence was met, returns ICONV_CES_BAD_SEQUENCE.
154    */
155   ucs4_t (*convert_to_ucs) (void *data,
156                                  const unsigned char **inbuf,
157                                  size_t *inbytesleft);
158 } iconv_to_ucs_ces_handlers_t;
159 
160 
161 /*
162  * iconv_from_ucs_ces_handlers_t - "from UCS" CES converter handlers.
163  *
164  * Structure contains function pointers which should be provided by
165  * "from_ucs" CES converter.
166  *
167  * ============================================================================
168  */
169 typedef struct
170 {
171   /* Same as in iconv_to_ucs_ces_handlers_t */
172   void *(*init) (
173                           const char *encoding);
174 
175   /* Same as in iconv_to_ucs_ces_handlers_t */
176   size_t (*close) (
177                         void *data);
178 
179   /* Same as in iconv_to_ucs_ces_handlers_t */
180   int (*get_mb_cur_max) (void *data);
181 
182   /* Same as in iconv_to_ucs_ces_handlers_t */
183   void (*get_state) (void *data,
184                            mbstate_t *state);
185 
186   /* Same as in iconv_to_ucs_ces_handlers_t */
187   int (*set_state) (void *data,
188                          mbstate_t *state);
189 
190   /* Same as in iconv_to_ucs_ces_handlers_t */
191   int (*is_stateful) (void *data);
192 
193   /*
194    * convert_from_ucs - convert UCS character to destination encoding.
195    *
196    * PARAMETERS:
197    *   void *data         - CES converter-specific data;
198    *   ucs4_t in              - input UCS-4 character;
199    *   unsigned char **outbuf - output buffer for the result;
200    *   size_t *outbytesleft   - output buffer bytes count.
201    *
202    * DESCRIPTION:
203    *   Converts input UCS characters to destination encoding and stores result
204    *   in 'outbuf' if there is sufficient free space present. 'outbuf' is
205    *   incremented accordingly. 'outbytesleft' is decremented accordingly. Should
206    *   be provided by correspondent CES module.
207    *   Output buffer always has at least 1 byte.
208    *
209    * RETURN:
210    *   Returns number of bytes that was written into output buffer if success.
211    *   If there is no enough space in output buffer, returns ICONV_CES_NOSPACE.
212    *   If there is no corresponding character in destination encoding, returns
213    *   ICONV_CES_INVALID_CHARACTER.
214    */
215   size_t (*convert_from_ucs) (void *data,
216                                    ucs4_t in,
217                                    unsigned char **outbuf,
218                                    size_t *outbytesleft);
219 } iconv_from_ucs_ces_handlers_t;
220 
221 
222 /*
223  * iconv_to_ucs_ces_desc_t - "to UCS" CES converter definition structure for
224  * usage in iconv_ucs_conversion_t conversion description structure.
225  *
226  * ============================================================================
227  */
228 typedef struct
229 {
230   /* CES converter handlers */
231   const iconv_to_ucs_ces_handlers_t *handlers;
232 
233   /* "to_ucs" CES converter-specific data. */
234   void *data;
235 } iconv_to_ucs_ces_desc_t;
236 
237 
238 /*
239  * iconv_from_ucs_ces_desc_t - "from UCS" CES converter definition structure for
240  * usage in iconv_ucs_conversion_t conversion description structure.
241  *
242  * ============================================================================
243  */
244 typedef struct
245 {
246   /* CES converter handlers */
247   const iconv_from_ucs_ces_handlers_t *handlers;
248 
249   /* "from_ucs" CES converter-specific data. */
250   void *data;
251 } iconv_from_ucs_ces_desc_t;
252 
253 
254 /*
255  * iconv_ucs_conversion_t - UCS-based conversion definition structure.
256  *
257  * Defines special type of conversion where every character is first
258  * converted into UCS-4 (UCS-2 for table-driven), and after this the
259  * resulting UCS character is converted to destination encoding.
260  * UCS-based conversion is composed of two *converters*, defined by
261  * iconv_ces_t structure. The iconv_ucs_conversion_t object is referred
262  * from iconv_conversion_t object using 'data' field.
263  *
264  * Structure contains two objects - 'to_ucs' and 'from_ucs' which define
265  * "source encoding to UCS" and "UCS to destination encoding" converters.
266  *
267  * ============================================================================
268  */
269 typedef struct
270 {
271   /* Source encoding -> CES converter. */
272   iconv_to_ucs_ces_desc_t to_ucs;
273 
274   /* UCS -> destination encoding CES converter. */
275   iconv_from_ucs_ces_desc_t from_ucs;
276 } iconv_ucs_conversion_t;
277 
278 
279 /*
280  * iconv_to_ucs_ces_t - defines "to UCS" CES converter.
281  *
282  * ============================================================================
283  */
284 typedef struct
285 {
286   /*
287    * An array of encodings names, supported by CES converter.
288    * The end of array should be marked by NULL pointer.
289    */
290   const char **names;
291 
292   /* CES converter description structure */
293   const iconv_to_ucs_ces_handlers_t *handlers;
294 } iconv_to_ucs_ces_t;
295 
296 
297 /*
298  * iconv_from_ucs_ces_t - defines "from UCS" CES converter.
299  *
300  * ============================================================================
301  */
302 typedef struct
303 {
304   /*
305    * An array of encodings names, supported by CES converter.
306    * The end of array should be marked by NULL pointer.
307    */
308   const char **names;
309 
310   /* CES converter description structure */
311   const iconv_from_ucs_ces_handlers_t *handlers;
312 } iconv_from_ucs_ces_t;
313 
314 
315 /* List of "to UCS" linked-in CES converters. */
316 extern const iconv_to_ucs_ces_t
317 _iconv_to_ucs_ces[];
318 
319 /* List of "from UCS" linked-in CES converters. */
320 extern const iconv_from_ucs_ces_t
321 _iconv_from_ucs_ces[];
322 
323 #endif /* !__ICONV_UCS_CONVERSION_H__ */
324 
325