# #Copyright (c) 2003-2004, Artem B. Bityuckiy # #Redistribution and use in source and binary forms, with or without #modification, are permitted provided that the following conditions #are met: #1. Redistributions of source code must retain the above copyright #notice, this list of conditions and the following disclaimer. #2. Redistributions in binary form must reproduce the above copyright #notice, this list of conditions and the following disclaimer in the #documentation and/or other materials provided with the distribution. # #THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND #ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE #IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE #ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE #FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS #OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) #HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT #LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY #OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF #SUCH DAMAGE. # # # This file describes dependencies between encodings, CES, CCS, etc. # File relates only to UCS-based conversions and is needed for automatic # generation of C source files and C header files. # # This configuration file consists of sections, each section consists of # entries. # # Use only normalized names. # # # The first section named "ENCODINGS" describes: # 1. CES converter corresponding for each enoding; # 2. Each encoding's aliases; # 3. CCS tables corresponding for each enoding. # SECTION ENCODINGS # ISO-10646-UCS-2. Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: ucs_2 CES: ucs_2 ALIASES: ucs2 iso_10646_ucs_2 iso10646_ucs_2 iso_10646_ucs2 iso10646_ucs2 iso10646ucs2 csUnicode ENTRY END # Big Endian version of ISO-10646-UCS-2 (in fact, equivalent to ucs_2). # Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: ucs_2be CES: ucs_2 ALIASES: ucs2be ENTRY END # Little Endian version of ISO-10646-UCS-2. # Little Endian, NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: ucs_2le CES: ucs_2 ALIASES: ucs2le ENTRY END # ISO-10646-UCS-2 in system byte order. # NBSP is always interpreted as NBSP (BOM isn't supported). # NOTE: Dont delete and rename this since it is used as widechar's # encoding when sizeof(wchar_t) == 2 ENTRY ENCODING: ucs_2_internal CES: ucs_2_internal ALIASES: ucs2_internal ucs_2internal ucs2internal ENTRY END # ISO-10646-UCS-4. Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: ucs_4 CES: ucs_4 ALIASES: ucs4 iso_10646_ucs_4 iso10646_ucs_4 iso_10646_ucs4 iso10646_ucs4 iso10646ucs4 ENTRY END # Big Endian version of ISO-10646-UCS-4 (in fact, equivalent to ucs_4). # Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: ucs_4be CES: ucs_4 ALIASES: ucs4be ENTRY END # Little Endian version of ISO-10646-UCS-4. # Little Endian, NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: ucs_4le CES: ucs_4 ALIASES: ucs4le ENTRY END # ISO-10646-UCS-4 in system byte order. # NBSP is always interpreted as NBSP (BOM isn't supported). # NOTE: Dont delete and rename this since it is used as widechar's # encoding when sizeof(wchar_t) == 4 ENTRY ENCODING: ucs_4_internal CES: ucs_4_internal ALIASES: ucs4_internal ucs_4internal ucs4internal ENTRY END # RFC 3629 UTF-8 ENTRY ENCODING: utf_8 CES: utf_8 ALIASES: utf8 ENTRY END # RFC 2781 UTF-16. The very first NBSP code in stream is interpreted as BOM. ENTRY ENCODING: utf_16 CES: utf_16 ALIASES: utf16 ENTRY END # Big Endian version of RFC 2781 UTF-16. # NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: utf_16be CES: utf_16 ALIASES: utf16be ENTRY END # Little Endian version of RFC 2781 UTF-16. # NBSP is always interpreted as NBSP (BOM isn't supported). ENTRY ENCODING: utf_16le CES: utf_16 ALIASES: utf16le ENTRY END # 7-bit ASCII. ENTRY ENCODING: us_ascii CES: us_ascii ALIASES: ansi_x3.4_1968 ansi_x3.4_1986 iso_646.irv:1991 ascii iso646_us us ibm367 cp367 csascii ENTRY END # RFC 1489 Cyrillic ENTRY ENCODING: koi8_r CES: table CCS: koi8_r ALIASES: cskoi8r koi8r koi8 ENTRY END # Obsoleted Ukrainian ENTRY ENCODING: koi8_ru CES: table CCS: koi8_ru ALIASES: koi8ru ENTRY END # RFC 2319 Ukrainian ENTRY ENCODING: koi8_u CES: table CCS: koi8_u ALIASES: koi8u ENTRY END # KOI8 Unified ENTRY ENCODING: koi8_uni CES: table CCS: koi8_uni ALIASES: koi8uni ENTRY END # ISO IR 111/ECMA Cyrillic. ENTRY ENCODING: iso_ir_111 CES: table CCS: iso_ir_111 ALIASES: ecma_cyrillic koi8_e koi8e csiso111ecmacyrillic ENTRY END # ISO 8859-1:1987 - Latin 1, West European ENTRY ENCODING: iso_8859_1 CES: table CCS: iso_8859_1 ALIASES: iso8859_1 iso88591 iso_8859_1:1987 iso_ir_100 latin1 l1 ibm819 cp819 csisolatin1 ENTRY END # ISO 8859-2:1987 - Latin 2, East European ENTRY ENCODING: iso_8859_2 CES: table CCS: iso_8859_2 ALIASES: iso8859_2 iso88592 iso_8859_2:1987 iso_ir_101 latin2 l2 csisolatin2 ENTRY END # ISO 8859-3:1988 - Latin 3, South European ENTRY ENCODING: iso_8859_3 CES: table CCS: iso_8859_3 ALIASES: iso_8859_3:1988 iso_ir_109 iso8859_3 latin3 l3 csisolatin3 iso88593 ENTRY END # ISO 8859-4:1988 - Latin 4, North European ENTRY ENCODING: iso_8859_4 CES: table CCS: iso_8859_4 ALIASES: iso8859_4 iso88594 iso_8859_4:1988 iso_ir_110 latin4 l4 csisolatin4 ENTRY END # ISO 8859-5:1988 - Cyrillic ENTRY ENCODING: iso_8859_5 CES: table CCS: iso_8859_5 ALIASES: iso8859_5 iso88595 iso_8859_5:1988 iso_ir_144 cyrillic csisolatincyrillic ENTRY END # ISO i8859-6:1987 - Arabic ENTRY ENCODING: iso_8859_6 CES: table CCS: iso_8859_6 ALIASES: iso_8859_6:1987 iso_ir_127 iso8859_6 ecma_114 asmo_708 arabic csisolatinarabic iso88596 ENTRY END # ISO 8859-7:1987 - Greek ENTRY ENCODING: iso_8859_7 CES: table CCS: iso_8859_7 ALIASES: iso_8859_7:1987 iso_ir_126 iso8859_7 elot_928 ecma_118 greek greek8 csisolatingreek iso88597 ENTRY END # ISO 8859-8:1988 - Hebrew ENTRY ENCODING: iso_8859_8 CES: table CCS: iso_8859_8 ALIASES: iso_8859_8:1988 iso_ir_138 iso8859_8 hebrew csisolatinhebrew iso88598 ENTRY END # ISO 8859-9:1989 - Latin 5, Turkish ENTRY ENCODING: iso_8859_9 CES: table CCS: iso_8859_9 ALIASES: iso_8859_9:1989 iso_ir_148 iso8859_9 latin5 l5 csisolatin5 iso88599 ENTRY END # ISO 8859-10:1992 - Latin 6, Nordic ENTRY ENCODING: iso_8859_10 CES: table CCS: iso_8859_10 ALIASES: iso_8859_10:1992 iso_ir_157 iso885910 latin6 l6 csisolatin6 iso8859_10 ENTRY END # ISO 8859-11 - Thai ENTRY ENCODING: iso_8859_11 CES: table CCS: iso_8859_11 ALIASES: iso8859_11 iso885911 ENTRY END # ISO 8859-13:1998 - Latin 7, Baltic Rim ENTRY ENCODING: iso_8859_13 CES: table CCS: iso_8859_13 ALIASES: iso_8859_13:1998 iso8859_13 iso885913 ENTRY END # ISO 8859-14:1998 - Latin 8, Celtic ENTRY ENCODING: iso_8859_14 CES: table CCS: iso_8859_14 ALIASES: iso_8859_14:1998 iso885914 iso8859_14 ENTRY END # ISO 8859-15:1998 - Latin 9, West Europe, successor of Latin 1 ENTRY ENCODING: iso_8859_15 CES: table CCS: iso_8859_15 ALIASES: iso885915 iso_8859_15:1998 iso8859_15 ENTRY END # Win-1250 ENTRY ENCODING: win_1250 CES: table CCS: win_1250 ALIASES: cp1250 ENTRY END # Win-1251 - Cyrillic ENTRY ENCODING: win_1251 CES: table CCS: win_1251 ALIASES: cp1251 ENTRY END # Win-1252 - Latin 1 ENTRY ENCODING: win_1252 CES: table CCS: win_1252 ALIASES: cp1252 ENTRY END # Win-1253 - Greek ENTRY ENCODING: win_1253 CES: table CCS: win_1253 ALIASES: cp1253 ENTRY END # Win-1254 - Turkish ENTRY ENCODING: win_1254 CES: table CCS: win_1254 ALIASES: cp1254 ENTRY END # Win-1255 - Hebrew ENTRY ENCODING: win_1255 CES: table CCS: win_1255 ALIASES: cp1255 ENTRY END # Win-1256 - Arabic ENTRY ENCODING: win_1256 CES: table CCS: win_1256 ALIASES: cp1256 ENTRY END # Win-1257 - Baltic ENTRY ENCODING: win_1257 CES: table CCS: win_1257 ALIASES: cp1257 ENTRY END # Win-1258 - Vietnamese7 that supports Cyrillic ENTRY ENCODING: win_1258 CES: table CCS: win_1258 ALIASES: cp1258 ENTRY END # big5 - an encoding for Traditional Chinese ENTRY ENCODING: big5 CES: table_pcs CCS: big5 ALIASES: csbig5 big_five bigfive cn_big5 cp950 ENTRY END # IBM 775 - an updated version of CP 437 that supports balitic languages. ENTRY ENCODING: cp775 CES: table CCS: cp775 ALIASES: ibm775 cspc775baltic ENTRY END # IBM 850 - an updated version of CP 437 where several Latin 1 characters have been # added instead of some less-often used characters like line-drawing and greek ones. ENTRY ENCODING: cp850 CES: table CCS: cp850 ALIASES: ibm850 850 cspc850multilingual ENTRY END # IBM 852 - an updated version of CP 437 where several Latin 2 characters have been added # instead of some less-often used characters like line-drawing and greek ones. ENTRY ENCODING: cp852 CES: table CCS: cp852 ALIASES: ibm852 852 cspcp852 ENTRY END # IBM 855 - an updated version of CP 437 that supports Cyrillic. ENTRY ENCODING: cp855 CES: table CCS: cp855 ALIASES: ibm855 855 csibm855 ENTRY END # IBM 866 - an updated version of CP 855 which followes the more logical Russian alphabet # ordering of the alternativny variant that is preferred by many Russian users. ENTRY ENCODING: cp866 CES: table CCS: cp866 ALIASES: 866 IBM866 CSIBM866 ENTRY END # EUC-JP - The EUC for Japanese ENTRY ENCODING: euc_jp CES: euc CCS: jis_x0208_1990 jis_x0201_1976 jis_x0212_1990 ALIASES: eucjp ENTRY END # EUC-KR - The EUC for Korean ENTRY ENCODING: euc_kr CES: euc CCS: ksx1001 ALIASES: euckr ENTRY END # EUC-TW - The EUC for Traditional Chinese ENTRY ENCODING: euc_tw CES: euc CCS: cns11643_plane1 cns11643_plane2 cns11643_plane14 ALIASES: euctw ENTRY END SECTION END # # This section is named "CES_DEPENDENCIES" and describes dependencies # between CES converters (some CES converters may use another CES converters). # SECTION CES_DEPENDENCIES ENTRY CES: table_pcs USED_CES: table ENTRY END ENTRY CES: euc USED_CES: table us_ascii ENTRY END SECTION END