1#
2#Copyright (c) 2003-2004, Artem B. Bityuckiy
3#
4#Redistribution and use in source and binary forms, with or without
5#modification, are permitted provided that the following conditions
6#are met:
7#1. Redistributions of source code must retain the above copyright
8#notice, this list of conditions and the following disclaimer.
9#2. Redistributions in binary form must reproduce the above copyright
10#notice, this list of conditions and the following disclaimer in the
11#documentation and/or other materials provided with the distribution.
12#
13#THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15#IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16#ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17#FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18#DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19#OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20#HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21#LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22#OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23#SUCH DAMAGE.
24#
25#
26# This file describes dependencies between encodings, CES, CCS, etc.
27# File relates only to UCS-based conversions and is needed for automatic
28# generation of C source files and C header files.
29#
30# This configuration file consists of sections, each section consists of
31# entries.
32#
33# Use only normalized names.
34#
35
36#
37# The first section named "ENCODINGS" describes:
38# 1. CES converter corresponding for each enoding;
39# 2. Each encoding's aliases;
40# 3. CCS tables corresponding for each enoding.
41#
42SECTION ENCODINGS
43
44# ISO-10646-UCS-2. Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
45ENTRY
46ENCODING: ucs_2
47CES: ucs_2
48ALIASES: ucs2 iso_10646_ucs_2 iso10646_ucs_2 iso_10646_ucs2 iso10646_ucs2 iso10646ucs2 csUnicode
49ENTRY END
50
51# Big Endian version of ISO-10646-UCS-2 (in fact, equivalent to ucs_2).
52# Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
53ENTRY
54ENCODING: ucs_2be
55CES: ucs_2
56ALIASES: ucs2be
57ENTRY END
58
59# Little Endian version of ISO-10646-UCS-2.
60# Little Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
61ENTRY
62ENCODING: ucs_2le
63CES: ucs_2
64ALIASES: ucs2le
65ENTRY END
66
67# ISO-10646-UCS-2 in system byte order.
68# NBSP is always interpreted as NBSP (BOM isn't supported).
69# NOTE: Dont delete and rename this since it is used as widechar's
70# encoding when sizeof(wchar_t) == 2
71ENTRY
72ENCODING: ucs_2_internal
73CES: ucs_2_internal
74ALIASES: ucs2_internal ucs_2internal ucs2internal
75ENTRY END
76
77# ISO-10646-UCS-4. Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
78ENTRY
79ENCODING: ucs_4
80CES: ucs_4
81ALIASES: ucs4 iso_10646_ucs_4 iso10646_ucs_4 iso_10646_ucs4 iso10646_ucs4 iso10646ucs4
82ENTRY END
83
84# Big Endian version of ISO-10646-UCS-4 (in fact, equivalent to ucs_4).
85# Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
86ENTRY
87ENCODING: ucs_4be
88CES: ucs_4
89ALIASES: ucs4be
90ENTRY END
91
92# Little Endian version of ISO-10646-UCS-4.
93# Little Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
94ENTRY
95ENCODING: ucs_4le
96CES: ucs_4
97ALIASES: ucs4le
98ENTRY END
99
100# ISO-10646-UCS-4 in system byte order.
101# NBSP is always interpreted as NBSP (BOM isn't supported).
102# NOTE: Dont delete and rename this since it is used as widechar's
103# encoding when sizeof(wchar_t) == 4
104ENTRY
105ENCODING: ucs_4_internal
106CES: ucs_4_internal
107ALIASES: ucs4_internal ucs_4internal ucs4internal
108ENTRY END
109
110# RFC 3629 UTF-8
111ENTRY
112ENCODING: utf_8
113CES: utf_8
114ALIASES: utf8
115ENTRY END
116
117# RFC 2781 UTF-16. The very first NBSP code in stream is interpreted as BOM.
118ENTRY
119ENCODING: utf_16
120CES: utf_16
121ALIASES: utf16
122ENTRY END
123
124# Big Endian version of RFC 2781 UTF-16.
125# NBSP is always interpreted as NBSP (BOM isn't supported).
126ENTRY
127ENCODING: utf_16be
128CES: utf_16
129ALIASES: utf16be
130ENTRY END
131
132# Little Endian version of RFC 2781 UTF-16.
133# NBSP is always interpreted as NBSP (BOM isn't supported).
134ENTRY
135ENCODING: utf_16le
136CES: utf_16
137ALIASES: utf16le
138ENTRY END
139
140# 7-bit ASCII.
141ENTRY
142ENCODING: us_ascii
143CES: us_ascii
144ALIASES: ansi_x3.4_1968 ansi_x3.4_1986 iso_646.irv:1991 ascii iso646_us us ibm367 cp367 csascii
145ENTRY END
146
147# RFC 1489 Cyrillic
148ENTRY
149ENCODING: koi8_r
150CES: table
151CCS: koi8_r
152ALIASES: cskoi8r koi8r koi8
153ENTRY END
154
155# Obsoleted Ukrainian
156ENTRY
157ENCODING: koi8_ru
158CES: table
159CCS: koi8_ru
160ALIASES: koi8ru
161ENTRY END
162
163# RFC 2319 Ukrainian
164ENTRY
165ENCODING: koi8_u
166CES: table
167CCS: koi8_u
168ALIASES: koi8u
169ENTRY END
170
171# KOI8 Unified
172ENTRY
173ENCODING: koi8_uni
174CES: table
175CCS: koi8_uni
176ALIASES: koi8uni
177ENTRY END
178
179# ISO IR 111/ECMA Cyrillic.
180ENTRY
181ENCODING: iso_ir_111
182CES: table
183CCS: iso_ir_111
184ALIASES: ecma_cyrillic koi8_e koi8e csiso111ecmacyrillic
185ENTRY END
186
187# ISO 8859-1:1987 - Latin 1, West European
188ENTRY
189ENCODING: iso_8859_1
190CES: table
191CCS: iso_8859_1
192ALIASES: iso8859_1 iso88591 iso_8859_1:1987 iso_ir_100 latin1 l1 ibm819 cp819 csisolatin1
193ENTRY END
194
195# ISO 8859-2:1987 - Latin 2, East European
196ENTRY
197ENCODING: iso_8859_2
198CES: table
199CCS: iso_8859_2
200ALIASES: iso8859_2 iso88592 iso_8859_2:1987 iso_ir_101 latin2 l2 csisolatin2
201ENTRY END
202
203# ISO 8859-3:1988 - Latin 3, South European
204ENTRY
205ENCODING: iso_8859_3
206CES: table
207CCS: iso_8859_3
208ALIASES: iso_8859_3:1988 iso_ir_109 iso8859_3 latin3 l3 csisolatin3 iso88593
209ENTRY END
210
211# ISO 8859-4:1988 - Latin 4, North European
212ENTRY
213ENCODING: iso_8859_4
214CES: table
215CCS: iso_8859_4
216ALIASES: iso8859_4 iso88594 iso_8859_4:1988 iso_ir_110 latin4 l4 csisolatin4
217ENTRY END
218
219# ISO 8859-5:1988 - Cyrillic
220ENTRY
221ENCODING: iso_8859_5
222CES: table
223CCS: iso_8859_5
224ALIASES: iso8859_5 iso88595 iso_8859_5:1988 iso_ir_144 cyrillic csisolatincyrillic
225ENTRY END
226
227# ISO i8859-6:1987 - Arabic
228ENTRY
229ENCODING: iso_8859_6
230CES: table
231CCS: iso_8859_6
232ALIASES: iso_8859_6:1987 iso_ir_127 iso8859_6 ecma_114 asmo_708 arabic csisolatinarabic iso88596
233ENTRY END
234
235# ISO 8859-7:1987 - Greek
236ENTRY
237ENCODING: iso_8859_7
238CES: table
239CCS: iso_8859_7
240ALIASES: iso_8859_7:1987 iso_ir_126 iso8859_7 elot_928 ecma_118 greek greek8 csisolatingreek iso88597
241ENTRY END
242
243# ISO 8859-8:1988 - Hebrew
244ENTRY
245ENCODING: iso_8859_8
246CES: table
247CCS: iso_8859_8
248ALIASES: iso_8859_8:1988 iso_ir_138 iso8859_8 hebrew csisolatinhebrew iso88598
249ENTRY END
250
251# ISO 8859-9:1989 - Latin 5, Turkish
252ENTRY
253ENCODING: iso_8859_9
254CES: table
255CCS: iso_8859_9
256ALIASES: iso_8859_9:1989 iso_ir_148 iso8859_9 latin5 l5 csisolatin5 iso88599
257ENTRY END
258
259# ISO 8859-10:1992 - Latin 6, Nordic
260ENTRY
261ENCODING: iso_8859_10
262CES: table
263CCS: iso_8859_10
264ALIASES: iso_8859_10:1992 iso_ir_157 iso885910 latin6 l6 csisolatin6 iso8859_10
265ENTRY END
266
267# ISO 8859-11 - Thai
268ENTRY
269ENCODING: iso_8859_11
270CES: table
271CCS: iso_8859_11
272ALIASES: iso8859_11 iso885911
273ENTRY END
274
275# ISO 8859-13:1998 - Latin 7, Baltic Rim
276ENTRY
277ENCODING: iso_8859_13
278CES: table
279CCS: iso_8859_13
280ALIASES: iso_8859_13:1998 iso8859_13 iso885913
281ENTRY END
282
283# ISO 8859-14:1998 - Latin 8, Celtic
284ENTRY
285ENCODING: iso_8859_14
286CES: table
287CCS: iso_8859_14
288ALIASES: iso_8859_14:1998 iso885914 iso8859_14
289ENTRY END
290
291# ISO 8859-15:1998 - Latin 9, West Europe, successor of Latin 1
292ENTRY
293ENCODING: iso_8859_15
294CES: table
295CCS: iso_8859_15
296ALIASES: iso885915 iso_8859_15:1998 iso8859_15
297ENTRY END
298
299# Win-1250
300ENTRY
301ENCODING: win_1250
302CES: table
303CCS: win_1250
304ALIASES: cp1250
305ENTRY END
306
307# Win-1251 - Cyrillic
308ENTRY
309ENCODING: win_1251
310CES: table
311CCS: win_1251
312ALIASES: cp1251
313ENTRY END
314
315# Win-1252 - Latin 1
316ENTRY
317ENCODING: win_1252
318CES: table
319CCS: win_1252
320ALIASES: cp1252
321ENTRY END
322
323# Win-1253 - Greek
324ENTRY
325ENCODING: win_1253
326CES: table
327CCS: win_1253
328ALIASES: cp1253
329ENTRY END
330
331# Win-1254 - Turkish
332ENTRY
333ENCODING: win_1254
334CES: table
335CCS: win_1254
336ALIASES: cp1254
337ENTRY END
338
339# Win-1255 - Hebrew
340ENTRY
341ENCODING: win_1255
342CES: table
343CCS: win_1255
344ALIASES: cp1255
345ENTRY END
346
347# Win-1256 - Arabic
348ENTRY
349ENCODING: win_1256
350CES: table
351CCS: win_1256
352ALIASES: cp1256
353ENTRY END
354
355# Win-1257 - Baltic
356ENTRY
357ENCODING: win_1257
358CES: table
359CCS: win_1257
360ALIASES: cp1257
361ENTRY END
362
363# Win-1258 - Vietnamese7 that supports Cyrillic
364ENTRY
365ENCODING: win_1258
366CES: table
367CCS: win_1258
368ALIASES: cp1258
369ENTRY END
370
371# big5 - an encoding for Traditional Chinese
372ENTRY
373ENCODING: big5
374CES: table_pcs
375CCS: big5
376ALIASES: csbig5 big_five bigfive cn_big5 cp950
377ENTRY END
378
379# IBM 775 - an updated version of CP 437 that supports balitic languages.
380ENTRY
381ENCODING: cp775
382CES: table
383CCS: cp775
384ALIASES: ibm775 cspc775baltic
385ENTRY END
386
387# IBM 850 - an updated version of CP 437 where several Latin 1 characters have been
388# added instead of some less-often used characters like line-drawing and greek ones.
389ENTRY
390ENCODING: cp850
391CES: table
392CCS: cp850
393ALIASES: ibm850 850 cspc850multilingual
394ENTRY END
395
396# IBM 852 - an updated version of CP 437 where several Latin 2 characters have been added
397# instead of some less-often used characters like line-drawing and greek ones.
398ENTRY
399ENCODING: cp852
400CES: table
401CCS: cp852
402ALIASES: ibm852 852 cspcp852
403ENTRY END
404
405# IBM 855 - an updated version of CP 437 that supports Cyrillic.
406ENTRY
407ENCODING: cp855
408CES: table
409CCS: cp855
410ALIASES: ibm855 855 csibm855
411ENTRY END
412
413# IBM 866 - an updated version of CP 855 which followes the more logical Russian alphabet
414# ordering of the alternativny variant that is preferred by many Russian users.
415ENTRY
416ENCODING: cp866
417CES: table
418CCS: cp866
419ALIASES: 866 IBM866 CSIBM866
420ENTRY END
421
422# EUC-JP - The EUC for Japanese
423ENTRY
424ENCODING: euc_jp
425CES: euc
426CCS: jis_x0208_1990 jis_x0201_1976 jis_x0212_1990
427ALIASES: eucjp
428ENTRY END
429
430# EUC-KR - The EUC for Korean
431ENTRY
432ENCODING: euc_kr
433CES: euc
434CCS: ksx1001
435ALIASES: euckr
436ENTRY END
437
438# EUC-TW - The EUC for Traditional Chinese
439ENTRY
440ENCODING: euc_tw
441CES: euc
442CCS: cns11643_plane1 cns11643_plane2 cns11643_plane14
443ALIASES: euctw
444ENTRY END
445
446SECTION END
447
448#
449# This section is named "CES_DEPENDENCIES" and describes dependencies
450# between CES converters (some CES converters may use another CES converters).
451#
452SECTION CES_DEPENDENCIES
453
454ENTRY
455CES: table_pcs
456USED_CES: table
457ENTRY END
458
459ENTRY
460CES: euc
461USED_CES: table us_ascii
462ENTRY END
463
464SECTION END
465
466