1 /*
2  * Copyright (c) 2003-2004, Artem B. Bityuckiy
3  * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #ifndef __ICONV_UCS_CONVERSION_H__
27 #define __ICONV_UCS_CONVERSION_H__
28 
29 #include <sys/types.h>
30 #include <wchar.h>
31 #include "local.h"
32 
33 /* No enough space in output buffer */
34 #define ICONV_CES_NOSPACE 0
35 /* Invalid input character */
36 #define ICONV_CES_INVALID_CHARACTER -1
37 /* No corespondent character in destination encoding */
38 #define ICONV_CES_BAD_SEQUENCE -2
39 /* All unknown characters are marked by this code */
40 #define DEFAULT_CHARACTER 0x3f /* ASCII '?' */
41 
42 
43 /*
44  * iconv_to_ucs_ces_handlers_t - "to UCS" CES converter handlers.
45  *
46  * Structure contains function pointers which should be provided by
47  * "to_ucs" CES converter.
48  *
49  * ============================================================================
50  */
51 typedef struct
52 {
53   /*
54    * init - initialize CES converter.
55    *
56    * PARAMETERS:
57    *   const char *encoding - encoding name.
58    *
59    * DESCRIPTION:
60    *  Initializes CES converter. CES converter may deal with a series of
61    *  encodings, such as Table or EUC CES converters. 'encoding' parameter
62    *  indicates which encoding CES converter should use.
63    *
64    * RETURN:
65    *   Returns CES-specific data pointer if success. In case of error returns
66    *   NULL and sets current thread's/process's errno.
67    */
68   void *(*init) (
69                           const char *encoding);
70 
71   /*
72    * close - close CES converter.
73    *
74    * PARAMETERS:
75    *   void *data      - CES converter-specific data.
76    *
77    * DESCRIPTION:
78    *     Preforms CES converter closing.   *
79    * RETURN:
80    *   Returns (size_t)0 if success. In case of error returns (size_t)-1 and
81    *   sets current thread's/process's errno.
82    */
83   size_t (*close) (
84                         void *data);
85 
86   /*
87    * get_mb_cur_max - get maximum character length in bytes.
88    *
89    * PARAMETERS:
90    *   void *data     - conversion-specific data;
91    *
92    * DESCRIPTION:
93    *   Returns encoding's maximum character length.
94    */
95   int (*get_mb_cur_max) (void *data);
96 
97   /*
98    * get_state - get current shift state.
99    *
100    * PARAMETERS:
101    *   void *data   - conversion-specific data;
102    *   mbstate_t *state - mbstate_t object where shift state will be stored;
103    *
104    * DESCRIPTION:
105    *   Returns encoding's current shift sequence.
106    */
107   void (*get_state) (void *data,
108                            mbstate_t *state);
109 
110   /*
111    * set_state - set shift state.
112    *
113    * PARAMETERS:
114    *   void *data   - conversion-specific data;
115    *   mbstate_t *state - mbstate_t value to which shift state will be set.
116    *
117    * DESCRIPTION:
118    *   Sets encoding's current shift state to 'state'. if 'state'
119    *   object is zero-object - reset current shift state.
120    *   Returns 0 if '*state' object has right format, -1 else.
121    */
122   int (*set_state) (void *data,
123                          mbstate_t *state);
124 
125   /*
126    * is_stateful - is encoding stateful state.
127    *
128    * PARAMETERS:
129    *   void *data   - conversion-specific data;
130    *
131    * DESCRIPTION:
132    *   Returns 0 if encoding is stateless, else returns 1.
133    */
134   int (*is_stateful) (void *data);
135 
136   /*
137    * convert_to_ucs - convert character to UCS.
138    *
139    * PARAMETERS:
140    *   void *data               - CES converter-specific data;
141    *   const unsigned char **inbuf - buffer with input character byte sequence;
142    *   size_t *inbytesleft          - output buffer bytes count.
143    *
144    * DESCRIPTION:
145    *   Converts input characters into UCS encoding. 'inbuf' is
146    *   incremented accordingly. 'bytesleft' is decremented accordingly. Should
147    *   be provided by correspondent CES module.
148    *
149    * RETURN:
150    *   Returns resulting UCS code if success. If input character is invalid,
151    *   returns ICONV_CES_INVALID_CHARACTER. If invalid or incomplete bytes
152    *   sequence was met, returns ICONV_CES_BAD_SEQUENCE.
153    */
154   ucs4_t (*convert_to_ucs) (void *data,
155                                  const unsigned char **inbuf,
156                                  size_t *inbytesleft);
157 } iconv_to_ucs_ces_handlers_t;
158 
159 
160 /*
161  * iconv_from_ucs_ces_handlers_t - "from UCS" CES converter handlers.
162  *
163  * Structure contains function pointers which should be provided by
164  * "from_ucs" CES converter.
165  *
166  * ============================================================================
167  */
168 typedef struct
169 {
170   /* Same as in iconv_to_ucs_ces_handlers_t */
171   void *(*init) (
172                           const char *encoding);
173 
174   /* Same as in iconv_to_ucs_ces_handlers_t */
175   size_t (*close) (
176                         void *data);
177 
178   /* Same as in iconv_to_ucs_ces_handlers_t */
179   int (*get_mb_cur_max) (void *data);
180 
181   /* Same as in iconv_to_ucs_ces_handlers_t */
182   void (*get_state) (void *data,
183                            mbstate_t *state);
184 
185   /* Same as in iconv_to_ucs_ces_handlers_t */
186   int (*set_state) (void *data,
187                          mbstate_t *state);
188 
189   /* Same as in iconv_to_ucs_ces_handlers_t */
190   int (*is_stateful) (void *data);
191 
192   /*
193    * convert_from_ucs - convert UCS character to destination encoding.
194    *
195    * PARAMETERS:
196    *   void *data         - CES converter-specific data;
197    *   ucs4_t in              - input UCS-4 character;
198    *   unsigned char **outbuf - output buffer for the result;
199    *   size_t *outbytesleft   - output buffer bytes count.
200    *
201    * DESCRIPTION:
202    *   Converts input UCS characters to destination encoding and stores result
203    *   in 'outbuf' if there is sufficient free space present. 'outbuf' is
204    *   incremented accordingly. 'outbytesleft' is decremented accordingly. Should
205    *   be provided by correspondent CES module.
206    *   Output buffer always has at least 1 byte.
207    *
208    * RETURN:
209    *   Returns number of bytes that was written into output buffer if success.
210    *   If there is no enough space in output buffer, returns ICONV_CES_NOSPACE.
211    *   If there is no corresponding character in destination encoding, returns
212    *   ICONV_CES_INVALID_CHARACTER.
213    */
214   size_t (*convert_from_ucs) (void *data,
215                                    ucs4_t in,
216                                    unsigned char **outbuf,
217                                    size_t *outbytesleft);
218 } iconv_from_ucs_ces_handlers_t;
219 
220 
221 /*
222  * iconv_to_ucs_ces_desc_t - "to UCS" CES converter definition structure for
223  * usage in iconv_ucs_conversion_t conversion description structure.
224  *
225  * ============================================================================
226  */
227 typedef struct
228 {
229   /* CES converter handlers */
230   const iconv_to_ucs_ces_handlers_t *handlers;
231 
232   /* "to_ucs" CES converter-specific data. */
233   void *data;
234 } iconv_to_ucs_ces_desc_t;
235 
236 
237 /*
238  * iconv_from_ucs_ces_desc_t - "from UCS" CES converter definition structure for
239  * usage in iconv_ucs_conversion_t conversion description structure.
240  *
241  * ============================================================================
242  */
243 typedef struct
244 {
245   /* CES converter handlers */
246   const iconv_from_ucs_ces_handlers_t *handlers;
247 
248   /* "from_ucs" CES converter-specific data. */
249   void *data;
250 } iconv_from_ucs_ces_desc_t;
251 
252 
253 /*
254  * iconv_ucs_conversion_t - UCS-based conversion definition structure.
255  *
256  * Defines special type of conversion where every character is first
257  * converted into UCS-4 (UCS-2 for table-driven), and after this the
258  * resulting UCS character is converted to destination encoding.
259  * UCS-based conversion is composed of two *converters*, defined by
260  * iconv_ces_t structure. The iconv_ucs_conversion_t object is referred
261  * from iconv_conversion_t object using 'data' field.
262  *
263  * Structure contains two objects - 'to_ucs' and 'from_ucs' which define
264  * "source encoding to UCS" and "UCS to destination encoding" converters.
265  *
266  * ============================================================================
267  */
268 typedef struct
269 {
270   /* Source encoding -> CES converter. */
271   iconv_to_ucs_ces_desc_t to_ucs;
272 
273   /* UCS -> destination encoding CES converter. */
274   iconv_from_ucs_ces_desc_t from_ucs;
275 } iconv_ucs_conversion_t;
276 
277 
278 /*
279  * iconv_to_ucs_ces_t - defines "to UCS" CES converter.
280  *
281  * ============================================================================
282  */
283 typedef struct
284 {
285   /*
286    * An array of encodings names, supported by CES converter.
287    * The end of array should be marked by NULL pointer.
288    */
289   const char **names;
290 
291   /* CES converter description structure */
292   const iconv_to_ucs_ces_handlers_t *handlers;
293 } iconv_to_ucs_ces_t;
294 
295 
296 /*
297  * iconv_from_ucs_ces_t - defines "from UCS" CES converter.
298  *
299  * ============================================================================
300  */
301 typedef struct
302 {
303   /*
304    * An array of encodings names, supported by CES converter.
305    * The end of array should be marked by NULL pointer.
306    */
307   const char **names;
308 
309   /* CES converter description structure */
310   const iconv_from_ucs_ces_handlers_t *handlers;
311 } iconv_from_ucs_ces_t;
312 
313 
314 /* List of "to UCS" linked-in CES converters. */
315 extern const iconv_to_ucs_ces_t
316 _iconv_to_ucs_ces[];
317 
318 /* List of "from UCS" linked-in CES converters. */
319 extern const iconv_from_ucs_ces_t
320 _iconv_from_ucs_ces[];
321 
322 #endif /* !__ICONV_UCS_CONVERSION_H__ */
323 
324