#!/usr/bin/perl -w # # Copyright (c) 2003-2004, Artem B. Bityuckiy, SoftMine Corporation. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # use integer; use Getopt::Std; use IO::Seekable; use strict; # ############################################################################## # # FUNCTION PROTOTYPES AND GLOBAL DATA DECLARATION SECTION # # ############################################################################## # SUPPLEMENTARY FUNCTIONS FORWARD DECLARATIONS sub ProcessOptions(); sub Err($;$); sub Generate8bitToUCS(); sub GenerateSpeed($); sub Generate16bitSize($); sub Output8bitToUCS(;$); sub Output8bitFromUCS(;$); sub OutputSpeed($;$); sub OutputSize($;$); # VARIABLES RELATING TO COMMAND-LINE OPTIONS my $Verbose; # Be verbose if true my $Source; # Output C source code instead of binary .cct file if true my $Plane; # Use this plane if defined my $InFile; # Use this file for input my $OutFile; # Use this file for output my $CCSName; # Use this CCS name my $NoSpeed; # Don't generate speed-optimized tables (binary files only) my $NoSize; # Don't generate size-optimized tables (binary files only) my $NoBE; # Don't generate big-endian tables (binary files only) my $NoLE; # Don't generate big-endian tables (binary files only) my $NoTo; # Don't generate "to_ucs" table (binary files only) my $NoFrom; # Don't generate "from_ucs" table (binary files only) my $CCSCol; # CCS column number in source file my $UCSCol; # UCS column number in source file # DATA STRUCTURES WITH "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES my (@FromSpeedTbl, @ToSpeedTbl, @FromSizeTbl, @ToSizeTbl); # "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES SIZE IN BYTES my ($FromSpeedBytes, $ToSpeedBytes, $FromSizeBytes, $ToSizeBytes) = (0, 0, 0, 0); my (%CCSUCS, %UCSCCS); # CCS->UCS and UCS->CCS mappings my $Bits = 8; # Table bits (8 or 16); # SPECIAL MARKER CODES my $InvCode = 0xFFFF; # FFFF indicates 18 bit invalid codes my $InvBlock = 0xFFFF; # FFFF also mark empty blocks in speed-optimized tables my $LostCode = 0x3F; # ASCII '?' marks codes lost during CCS->UCS mapping # To mark invalid codes in 8bit encodings 0xFF is used CCS's 0xFF mapping is saved # separately. $FFMap variable stores real 0xFF mapping if defined. my $InvCode8bit = 0xFF; my $FFMap; # 8 Bit "From UCS" table header size (bytes) my $Hdr8bitFromUCS = 2; # Binary table header size (bytes) my $HdrBinary = 8; # At first all lost CCS codes are marked by $TmpLost to distinguish between # code which is equivalent to $LostCode and lost codes. This is done in order to # output $MacroLostCode instead of $LostCode in source file. my $TmpLost = 0x1FFFF; # VARIABLES RELATING TO C SOURCE CODE my $MacroInvCode = 'INVALC'; my $MacroInvBlock = 'INVBLK'; my $MacroLostCode = 'LOST_C'; my $MacroCCSName = 'ICONV_CCS_%s'; my $GuardSize = 'defined (TABLE_USE_SIZE_OPTIMIZATION)'; my $GuardToUCS = "ICONV_TO_UCS_CCS_%s"; my $GuardFromUCS = "ICONV_FROM_UCS_CCS_%s"; my $MacroSpeedTable = 'TABLE_SPEED_OPTIMIZED'; my $MacroSizeTable = 'TABLE_SIZE_OPTIMIZED'; my $Macro8bitTable = 'TABLE_8BIT'; my $Macro16bitTable = 'TABLE_16BIT'; my $MacroVer1Table = 'TABLE_VERSION_1'; my $TypeBICCS = 'iconv_ccs_t'; my $VarToUCSSize = "to_ucs_size_%s"; my $VarToUCSSpeed = "to_ucs_speed_%s"; my $VarFromUCSSize = "from_ucs_size_%s"; my $VarFromUCSSpeed = "from_ucs_speed_%s"; my $VarBICCS = "_iconv_ccs_%s"; # Text block that visually separates tables. my $Separator = '=' x 70; # ############################################################################## # # SCRIPT ENTRY POINT # # ############################################################################## # Parse command-line options, check them and set correspondent global variables ProcessOptions(); # Initialize global variables tat depend on CCS name. $_ = sprintf $_, $CCSName foreach +($VarToUCSSpeed, $VarToUCSSize, $VarToUCSSpeed, $VarFromUCSSpeed, $VarFromUCSSize, $VarBICCS); $_ = sprintf $_, "\U$CCSName" foreach +($GuardToUCS, $GuardFromUCS, $MacroCCSName); # Open input and output files Err "Can't open \"$InFile\" file for reading: $!.\n", 1 unless open(INFILE, '<', $InFile); Err "Can't open \"$OutFile\" file for writing: $!.\n", 1 unless open(OUTFILE, '>', $OutFile); # ============================================================================== # EXTRACT CODES MAP FROM INPUT FILE # ============================================================================== for (my $ln = 1; my $l = ; $ln += 1) { # Skip comment and empty lines, remove ending CR symbol next if $l =~ /^#.*$/ or $l =~ /^\s*$/; $l =~ s/^(.*)\n$/$1/, $l =~ s/^(.*)\r$/$1/; # Remove comment and extra spaces $l =~ s/(.*)\s*#.*/$1/; $l =~ s/\s+/ /g; $l =~ s/(.*)\s*$/$1/; # Split line into individual codes my @codes = split / /, $l; # Skip line if there is no needed columns unless (defined $codes[$CCSCol]) { print("Warning (line $ln): no CCS column, skip.\n") if $Verbose; next; } unless (defined $codes[$UCSCol]) { print("Warning (line $ln): no UCS column, skip.\n") if $Verbose; next; } # Extract codes strings from needed columns my ($ccs, $ucs) = ($codes[$CCSCol], $codes[$UCSCol]); my $patt = qr/(0[xX])?[0-9a-fA-F]{1,8}/; # HEX digit regexp pattern. # Check that CCS and UCS code strings has right format. unless ($ccs =~ m/^$patt$/) { print("Warning (line $ln): $ccs CCS code isn't recognized, skip.\n") if $Verbose; next; } unless ($ucs =~ m/^($patt(,|\+))*$patt$/) { print("Warning (line $ln): $ucs UCS code isn't recognized, skip.\n") if $Verbose; next; } # Convert code to numeric format (assume hex). $ccs = hex $ccs; if ($ucs =~ m/,/ or $ucs =~ m/\+/) { # Mark CCS codes with "one to many" mappings as lost printf "Warning (line $ln): only one to one mapping is supported, " . "mark 0x%.4X CCS code as lost.\n", hex $ccs if $Verbose; $ucs = $TmpLost; } else { # Convert code to numeric format $ucs = hex $ucs; # Check that UCS code isn't longer than 16 bits. if ($ucs > 0xFFFF) { printf("Warning (line $ln): UCS code should fit 16 bits, " . "mark 0x%.4X CCS code as lost.\n", hex $ccs) if $Verbose; $ucs = $TmpLost; } } # If CCS value > 0xFFFF user should specify plane number. if ($ccs > 0xFFFF && !defined $Plane) { print("Warning (line $ln): $ccs is > 16 bit, plane number should be specified," . " skip this mapping.\n") if $Verbose; next; } if (defined $Plane) { next if (($ccs & 0xFFFF0000) >> 16) != hex $Plane; # Skip alien plane. $ccs &= 0xFFFF; } # Check that reserved codes aren't used. if ($ccs == $InvCode or $ucs == $InvCode) { print("Warning (line $ln): $InvCode is reserved to mark invalid codes and " . "shouldn't be used in mappings, skip.\n") if $Verbose; next; } # Save mapping in UCSCCS and CCSUCS hash arrays. $UCSCCS{$ucs} = $ccs if $ucs != $TmpLost && !defined $UCSCCS{$ucs}; $CCSUCS{$ccs} = $ucs if !defined $CCSUCS{$ccs}; $Bits = 16 if $ccs > 0xFF; } if (not %CCSUCS) { Err "Error: there is no plane $Plane in \"$0\".\n" if defined $Plane; Err "Error: mapping wasn't found.\n"; } # ============================================================================== # GENERATE TABLE DATA # ============================================================================== if ($Bits == 8) { $FFMap = $CCSUCS{0xFF}; $FFMap = $InvCode if !defined $FFMap; } if ($Bits == 8) { Generate8bitToUCS() unless $NoTo; } else { GenerateSpeed("to_ucs") unless $NoTo || $NoSpeed; Generate16bitSize("to_ucs") unless $NoTo || $NoSize; } GenerateSpeed("from_ucs") unless $NoFrom || $NoSpeed; Generate16bitSize("from_ucs") unless $NoFrom || $NoSize; # ============================================================================== # OUTPUT ARRAYS # ============================================================================== if ($Source) { # OUTPUT SOURCE print OUTFILE "/* * This file was generated automatically - don't edit it. * File contains iconv CCS tables for $CCSName encoding. */ #include \"ccsbi.h\" #if defined ($GuardToUCS) \\ || defined ($GuardFromUCS) #include <_ansi.h> #include #include #include \"ccs.h\" #include \"ccsnames.h\" "; if ($Bits == 8) { print OUTFILE "#if (_BYTE_ORDER == _LITTLE_ENDIAN) # define W(word) (word) & 0xFF, (word) >> 8 #elif (_BYTE_ORDER == _BIG_ENDIAN) # define W(word) (word) >> 8, (word) & 0xFF #else # error \"Unknown byte order.\" #endif "; } unless ($NoTo) { if ($Bits == 8) { Output8bitToUCS(); } else { OutputSpeed("to_ucs") unless $NoSpeed; OutputSize("to_ucs") unless $NoSize; } } unless ($NoFrom) { if ($Bits == 8) { Output8bitFromUCS(); } else { OutputSpeed("from_ucs") unless $NoSpeed; OutputSize("from_ucs") unless $NoSize; } } # OUTPUT TABLE DESCRIPTION STRUCTURE print OUTFILE "/* * $CCSName CCS description table. * $Separator */ const $TypeBICCS $VarBICCS = { \t$MacroVer1Table, /* Table version */ \t$MacroCCSName, /* CCS name */ "; if ($Bits == 8) { print OUTFILE "\t$Macro8bitTable, /* Table bits */ \t0, /* Not Used */ #if defined ($GuardFromUCS) \t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table */ #else \t(__uint16_t *)NULL, #endif \t0, /* Not Used */ #if defined ($GuardToUCS) \t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table */ #else \t(__uint16_t *)NULL, #endif };\n"; } else { print OUTFILE "\t$Macro16bitTable, /* Table bits */ #if defined ($GuardFromUCS) \\ && ($GuardSize) \t$MacroSizeTable, \t(__uint16_t *)&$VarFromUCSSize, /* UCS -> $CCSName table size-optimized table */ #elif defined ($GuardFromUCS) \\ && !($GuardSize) \t$MacroSpeedTable, \t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table speed-optimized table */ #else \t$MacroSpeedTable, \t(__uint16_t *)NULL, #endif #if defined ($GuardToUCS) \\ && ($GuardSize) \t$MacroSizeTable, \t(__uint16_t *)&$VarToUCSSize /* $CCSName -> UCS table speed-optimized table */ #elif defined ($GuardToUCS) \\ && !($GuardSize) \t$MacroSpeedTable, \t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table speed-optimized table */ #else \t$MacroSpeedTable, \t(__uint16_t *)NULL, #endif };\n"; } print OUTFILE "\n#endif /* $GuardToUCS) || ... */\n\n"; } else { # OUTPUT BINARY TABLES DESCRIPTION STRUCTURE (ALWAYS BIG ENDIAN) print OUTFILE pack "n", 1; print OUTFILE pack "n", $Bits; my $len = length $CCSName; print OUTFILE pack "N", $len; print OUTFILE pack "a$len", $CCSName; my $pos = $HdrBinary + $len; if ($pos & 3) { my $l = 4 - ($pos & 3); print OUTFILE pack "a$l", 'XXX'; $pos += $l; } $pos += 16*4; my @tables; for (my $i = 0; $i < 16; $i++) { $tables[$i] = 0; } $tables[0] = $pos, $tables[1] = $FromSpeedBytes, $pos += $FromSpeedBytes unless $NoFrom || $NoSpeed || $NoBE; $tables[2] = $pos, $tables[3] = $FromSpeedBytes, $pos += $FromSpeedBytes unless $NoFrom || $NoSpeed || $NoLE; if ($Bits == 16) { $tables[4] = $pos, $tables[5] = $FromSizeBytes, $pos += $FromSizeBytes unless $NoFrom || $NoSize || $NoBE; $tables[6] = $pos, $tables[7] = $FromSizeBytes, $pos += $FromSizeBytes unless $NoFrom || $NoSize || $NoLE; } $tables[8] = $pos, $tables[9] = $ToSpeedBytes, $pos += $ToSpeedBytes unless $NoTo || $NoSpeed || $NoBE; $tables[10] = $pos, $tables[11] = $ToSpeedBytes, $pos += $ToSpeedBytes unless $NoTo || $NoSpeed || $NoLE; if ($Bits == 16) { $tables[12] = $pos, $tables[13] = $ToSizeBytes, $pos += $ToSizeBytes unless $NoTo || $NoSize || $NoBE; $tables[14] = $pos, $tables[15] = $ToSizeBytes, $pos += $ToSizeBytes unless $NoTo || $NoSize || $NoLE; } print OUTFILE pack("N", $_) foreach @tables; print "Total bytes for output: $pos.\n" if $Verbose; # OUTPUT BINARY TABLES unless ($NoFrom) { if ($Bits == 8) { Output8bitFromUCS("n") unless $NoBE; Output8bitFromUCS("v") unless $NoLE; } else { unless ($NoSpeed) { OutputSpeed("from_ucs", "n") unless $NoBE; OutputSpeed("from_ucs", "v") unless $NoLE; } unless ($NoSize) { OutputSize("from_ucs", "n") unless $NoBE; OutputSize("from_ucs", "v") unless $NoLE; } } } unless ($NoTo) { if ($Bits == 8) { Output8bitToUCS("n") unless $NoBE; Output8bitToUCS("v") unless $NoLE; } else { unless ($NoSpeed) { OutputSpeed("to_ucs", "n") unless $NoBE; OutputSpeed("to_ucs", "v") unless $NoLE; } unless ($NoSize) { OutputSize("to_ucs", "n") unless $NoBE; OutputSize("to_ucs", "v") unless $NoLE; } } } } close INFILE; close OUTFILE; exit 0; # ############################################################################## # # SUPPLEMENTARY FUNCTIONS # # ############################################################################## # ============================================================================= # # Generate 8bit "to_ucs" table. Store table data in %ToSpeedTbl hash. # Store table size in $ToSpeedBytes scalar. # # ============================================================================= sub Generate8bitToUCS() { for (my $i = 0; $i <= 255; $i++) { $ToSpeedTbl[$i] = defined $CCSUCS{$i} ? $CCSUCS{$i} : $InvCode; } $ToSpeedBytes = 256*2; } # ============================================================================= # # Generate speed-optimized table. # # Parameter 1: # "to_ucs" - generate "to_ucs" table, store table data in @ToSpeedTbl # array, store table size in $ToSpeedBytes scalar. # "from_ucs" - generate "from_ucs" table, store table data in @FromSpeedTbl # array, store table size in $FromSpeedBytes scalar. # # Data is written to @ToSpeedTbl or @FromSpeedTbl (@map) table and has the # following format: # $table[0] - 256-element array (control block); # $table[1 .. $#table] - 256-element arrays (data blocks). # # ============================================================================= sub GenerateSpeed($) { my $map; my $tbl; my $bytes; if ($_[0] eq "to_ucs") { $map = \%CCSUCS; $tbl = \@ToSpeedTbl; $bytes = \$ToSpeedBytes; } elsif ($_[0] eq "from_ucs") { $map = \%UCSCCS; $tbl = \@FromSpeedTbl; $bytes = \$FromSpeedBytes; } else { Err "Internal script error in GenerateSpeed()\n"; } # Identify unused blocks my @busy_blocks; $busy_blocks[$_ >> 8] = 1 foreach (keys %$map); # GENERATE FIRST 256-ELEMENT CONTROL BLOCK for (my $i = 0, my $idx = $Bits == 16 ? 0 : 256 + $Hdr8bitFromUCS; $i <= 0xFF; $i++) { $tbl->[0]->[$i] = $busy_blocks[$i] ? $idx += 256 : undef; } # GENERATE DATA BLOCKS $$bytes = 0; for (my $i = 0; $i <= 0xFF; $i++) { next unless $busy_blocks[$i]; $$bytes += 256; for (my $j = 0; $j <= 0xFF; $j++) { $tbl->[$i+1]->[$j] = $map->{($i << 8) | $j}; } } $$bytes *= 2 if $Bits == 16; $$bytes += $Hdr8bitFromUCS if $Bits == 8; $$bytes += 512; } # ============================================================================= # # Generate 16bit size-optimized table. # # Parameter 1: # "to_ucs" - generate "to_ucs" table, store table data in @ToSizeTbl # array, store table size in $ToSizeBytes scalar. # "from_ucs" - generate "from_ucs" table, store table data in @FromSizeTbl # array, store table size in $FromSizeBytes scalar. # # Data is written to @ToSizeTbl or @FromSizeTbl (@map) table and has the # following format: # $table[0] - number of ranges; # $table[1] - number of unranged codes; # $table[2] - unranged codes index in resulting array; # $table[3]->[0 .. $table[0]] - array of arrays of ranges: # $table[3]->[x]->[0] - first code; # $table[3]->[x]->[1] - last code; # $table[3]->[x]->[2] - range index in resulting array; # $table[4]->[0 .. $table[0]] - array of arrays of ranges content; # $table[5]->[0 .. $table[1]] - array of arrays of unranged codes; # $table[5]->[x]->[0] - CCS code; # $table[5]->[x]->[0] - UCS code; # # ============================================================================= sub Generate16bitSize($) { my $map; my $tbl; my $bytes; if ($_[0] eq "to_ucs") { $map = \%CCSUCS; $tbl = \@ToSizeTbl; $bytes = \$ToSizeBytes; } elsif ($_[0] eq "from_ucs") { $map = \%UCSCCS; $tbl = \@FromSizeTbl; $bytes = \$FromSizeBytes; } else { Err "Internal script error Generate16bitSize()\n"; } # CREATE LIST OF RANGES. my @codes = sort {$a <=> $b} keys %$map; my @ranges; # Code ranges my @range; # Current working range foreach (@codes) { if (not @range or $_ - 1 == $range[$#range]) { push @range, $_; } else { my @tmp = @range; push @ranges, \@tmp; undef @range; redo; } } # Add Last range too if (@range) { my @tmp = @range; push @ranges, \@tmp; } # OPTIMIZE LIST OF RANGES. my $r = 0; # Working range number while (1) { last if ($r == $#ranges); my @r1 = @{$ranges[$r]}; my @r2 = @{$ranges[$r + 1]}; # Calculate how many array entries two ranges need my ($s1, $s2); if ($#r1 == 0) { $s1 = 2; } elsif ($#r1 == 1) { $s1 = 4; } else { $s1 = $#r1 + 1 + 3; } if ($#r2 == 0) { $s2 = 2; } elsif ($#r2 == 1) { $s2 = 4; } else { $s2 = $#r2 + 1 + 3; } my $two = $s1 + $s2; # Calculate how many array entries will be needed if we join them my $one = $r2[$#r2] - $r1[0] + 1 + 3; $r += 1, next if ($one > $two); # Join ranges my @r; # New range. push @r, $_ foreach (@r1); for (my $i = $r1[$#r1]+1; $i < $r2[0]; $i++) { push @r, undef; } push @r, $_ foreach (@r2); $ranges[$r] = \@r; splice @ranges, $r+1, 1; } # SEPARATE RANGED AND UNRANGED CODES. SPLIT 2-CODES RANGES ON 2 UNRANGED. my @unranged; foreach (@ranges) { if ($#$_ == 0) { push @unranged, $$_[0]; undef $_; } elsif ($#$_ == 1) { push @unranged, $$_[0]; push @unranged, $$_[1]; undef $_; } } # DELETE UNUSED ELEMENTS for (my $i = 0; $i <= $#ranges; $i++) { splice @ranges, $i--, 1 unless defined $ranges[$i]; } # CALCULATE UNRANGED CODES ARRAY INDEX my $idx = 3 + ($#ranges + 1)*3; $idx += $#$_ + 1 foreach @ranges; # COMPOSE TABLE $tbl->[0] = $#ranges + 1; # Number of ranges $tbl->[1] = $#unranged + 1; # Number of unranged codes $tbl->[2] = $idx; # Array index of unranged codes # Generate ranges list $idx = 3 + ($#ranges + 1)*3; # First range data index $$bytes = $idx*2; my $num = 0; foreach (@ranges) { $tbl->[3]->[$num]->[0] = $_->[0]; $tbl->[3]->[$num]->[1] = $_->[$#$_]; $tbl->[3]->[$num]->[2] = $idx; $idx += $#$_ + 1; $num += 1; } # Generate ranges content $num = 0; foreach (@ranges) { for (my $i = 0; $i <= $#$_; $i++) { $tbl->[4]->[$num]->[$i] = defined $_->[$i] ? $map->{$_->[$i]} : undef; } $num += 1; $$bytes += ($#$_ + 1)*2; } # Generate unranged codes list $num = 0; foreach (@unranged) { $tbl->[5]->[$num]->[0] = $_; $tbl->[5]->[$num]->[1] = $map->{$_}; $num += 1; } $$bytes += ($#unranged + 1)*4; } # ============================================================================= # # Output 8bit "to UCS" table. Output table's source code if $Source # and table's binary data if !$Source. # # Parameter 1: Not used when sources are output. Output BE binary if 'n' and # LE binary if 'v'. # # ============================================================================= sub Output8bitToUCS(;$) { my $endian = $_[0]; my $br = 0; printf "Output%s 8-bit UCS -> $CCSName table ($ToSpeedBytes bytes).\n", defined $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : "" if $Verbose; if ($Source) { # Output heading information printf OUTFILE "/* * 8-bit $CCSName -> UCS table ($ToSpeedBytes bytes). * $Separator */ #if defined ($GuardToUCS) static const __uint16_t ${VarToUCSSpeed}\[] = {\n\t"; } if ($Source) { foreach (@ToSpeedTbl) { $br += 1; if ($_ != $InvCode) { if ($_ != $TmpLost) { printf OUTFILE "0x%.4X,", $_; } else { print OUTFILE "$MacroLostCode,"; } } else { print OUTFILE "$MacroInvCode,"; } print(OUTFILE "\n\t"), $br = 0 unless $br % 8; } print OUTFILE "\n};\n\n#endif /* $GuardToUCS */\n\n"; } else { foreach (@ToSpeedTbl) { print OUTFILE pack($endian, $_ == $TmpLost ? $LostCode : $_); } } } # ============================================================================= # # Output 8bit "from UCS" table. Output table's source code if $Source # and table's binary data if !$Source. # # Parameter 1: Not used when sources are output. Output BE binary if 'n' and # LE binary if 'v'. # # ============================================================================= sub Output8bitFromUCS(;$) { my $endian = $_[0]; printf "Output%s 8-bit $CCSName -> UCS table ($FromSpeedBytes bytes).\n", defined $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : "" if $Verbose; if ($Source) { print OUTFILE "/* * 8-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes). * $Separator */ #if defined ($GuardFromUCS) static const unsigned char ${VarFromUCSSpeed}\[] = { "; } # SAVE 0xFF MAPPING. if ($Source) { printf OUTFILE "\tW(0x%.4X), /* Real 0xFF mapping. 0xFF is used " . "to mark invalid codes */\n", $FFMap; } else { print OUTFILE pack($endian, $FFMap); } # OUTPUT HEADING BLOCK (ALWAYS 16 BIT) if ($Source) { my $count = 0; print OUTFILE "\t/* Heading Block */"; for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 4) { print OUTFILE "\n\t" unless $br; if (defined $FromSpeedTbl[0]->[$i]) { printf OUTFILE "W(0x%.4X),", $FromSpeedTbl[0]->[$i]; } else { print OUTFILE "W($MacroInvBlock),"; } } } else { print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock) foreach @{$FromSpeedTbl[0]}; } if ($Source) { my $index = 512 + $Hdr8bitFromUCS; for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++) { next unless defined $FromSpeedTbl[$blk]; printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index; $index += 256; for (my $i = 0, my $br = 0; $i < 256; $i++, $br = $i % 8) { print OUTFILE "\n\t" unless $br; my $code = $FromSpeedTbl[$blk]->[$i]; if (!defined $code) { printf OUTFILE "0x%.2X,", $InvCode8bit; } else { printf OUTFILE "0x%.2X,", $code == $TmpLost ? $LostCode : $code; } } } print OUTFILE "\n};\n\n#endif /* $GuardFromUCS */\n\n"; } else { for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++) { next unless defined $FromSpeedTbl[$blk]; for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) { my $code = $FromSpeedTbl[$blk]->[$i]; if (!defined $code) { printf OUTFILE pack 'C', $InvCode8bit; } else { print OUTFILE $code == $TmpLost ? pack('C', $LostCode) : pack('C', $code); } } } } } # ============================================================================= # # Output 16bit Speed-optimized table. Output table's source code if $Source # and table's binary data if !$Source. # # Parameter 1: # "to_ucs" - Output "to_ucs" table. # "from_ucs" - Output "from_ucs" table. # Parameter 2: Not used when sources are output. Output BE binary if 'n' and # LE binary if 'v'. # # ============================================================================= sub OutputSpeed($;$) { my $endian = $_[1]; my $tbl; my ($direction, $optimiz, $e, $bytes); $optimiz = $Bits == 16 ? " speed-optimized" : ""; $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : ""; if ($_[0] eq "to_ucs") { $tbl = \@ToSpeedTbl; $direction = " $CCSName -> UCS"; $bytes = $ToSpeedBytes; if ($Source) { print OUTFILE "/* * 16-bit $CCSName -> UCS speed-optimized table ($ToSpeedBytes bytes). * $Separator */ #if defined ($GuardToUCS) \\ && !($GuardSize) static const __uint16_t ${VarToUCSSpeed}\[] = { "; } } elsif ($_[0] eq "from_ucs") { $tbl = \@FromSpeedTbl; $direction = " UCS -> $CCSName"; $bytes = $FromSpeedBytes; if ($Source) { print OUTFILE "/* * 16-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes). * $Separator */ #if defined ($GuardFromUCS) \\ && !($GuardSize) static const __uint16_t ${VarFromUCSSpeed}\[] = { "; } } else { Err "Internal script error Output16bitSpeed()\n"; } printf "Output%s 16-bit%s%s table (%d bytes).\n", $e, $direction, $optimiz, $bytes if $Verbose; # OUTPUT HEADING BLOCK (ALWAYS 16 BIT) if ($Source) { my $count = 0; print OUTFILE "\t/* Heading Block */"; for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) { print OUTFILE "\n\t" unless $br; if (defined $tbl->[0]->[$i]) { printf OUTFILE "0x%.4X,", $tbl->[0]->[$i]; } else { print OUTFILE "$MacroInvBlock,"; } } } else { print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock) foreach @{$tbl->[0]}; } # OUTPUT OTHER BLOCKS if ($Source) { my $index = 256; for (my $blk = 1; $blk <= $#$tbl; $blk++) { next unless defined $tbl->[$blk]; printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index; $index += 256; for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) { print OUTFILE "\n\t" unless $br; my $code = $tbl->[$blk]->[$i]; print OUTFILE defined $code ? ($code == $TmpLost ? $MacroLostCode : sprintf "0x%.4X", $code) : $MacroInvCode, ","; } } } else { for (my $blk = 1; $blk <= $#$tbl; $blk++) { next unless defined $tbl->[$blk]; for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) { my $code = $tbl->[$blk]->[$i]; print OUTFILE pack($endian, defined $code ? ($code == $TmpLost ? $LostCode : $code) : $InvCode); } } } if ($Source) { if ($_[0] eq "to_ucs") { print OUTFILE " }; #endif /* $GuardToUCS && !$GuardSize */ "; } else { print OUTFILE " }; #endif /* $GuardFromUCS && !$GuardSize */ "; } } } # ============================================================================= # # Output 16bit Size-optimized table. Output table's source code if $Source # and table's binary data if !$Source. # # Parameter 1: # "to_ucs" - Output "to_ucs" table. # "from_ucs" - Output "from_ucs" table. # Parameter 2: Not used when sources are output. Output BE binary if 'n' and # LE binary if 'v'. # # ============================================================================= sub OutputSize($;$) { my $endian = $_[1]; my $tbl; my ($direction, $optimiz, $e, $bytes); $optimiz = $Bits == 16 ? " size-optimized" : ""; $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : ""; if ($_[0] eq "to_ucs") { $tbl = \@ToSizeTbl; $direction = " $CCSName -> UCS"; $bytes = $ToSizeBytes; if ($Source) { print OUTFILE "/* * 16-bit $CCSName -> UCS size-optimized table ($ToSizeBytes bytes). * $Separator */ #if defined ($GuardToUCS) \\ && ($GuardSize) static const __uint16_t ${VarToUCSSize}\[] = { "; } } elsif ($_[0] eq "from_ucs") { $tbl = \@FromSizeTbl; $direction = " UCS -> $CCSName"; $bytes = $FromSizeBytes; if ($Source) { print OUTFILE "/* * 16-bit UCS -> $CCSName size-optimized table ($FromSizeBytes bytes). * $Separator */ #if defined ($GuardFromUCS) \\ && ($GuardSize) static const __uint16_t ${VarFromUCSSize}\[] = { "; } } else { Err "Internal script error Output16bitSize()\n"; } printf "Output%s 16-bit%s%s table (%d bytes).\n", $e, $direction, $optimiz, $bytes if $Verbose; # OUTPUT FIRST 3 ELEMENTS if ($Source) { printf OUTFILE "\t0x%.4X, /* Ranges number */\n", $tbl->[0]; printf OUTFILE "\t0x%.4X, /* Unranged codes number */\n", $tbl->[1]; printf OUTFILE "\t0x%.4X, /* First unranged code index */\n", $tbl->[2]; } else { printf OUTFILE pack $endian, $tbl->[0]; printf OUTFILE pack $endian, $tbl->[1]; printf OUTFILE pack $endian, $tbl->[2]; } my $idx = 0; # OUTPUT RANGES if ($Source) { print OUTFILE "\t/* Ranges list: first code, last Code, array index. */\n"; for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) { printf OUTFILE "\t/* Array index: 0x%.4X */ 0x%.4X, 0x%.4X, 0x%.4X,\n", $idx += 3, $tbl->[3]->[$range]->[0], $tbl->[3]->[$range]->[1], $tbl->[3]->[$range]->[2]; } } else { for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) { print OUTFILE pack($endian, $tbl->[3]->[$range]->[0]), pack($endian, $tbl->[3]->[$range]->[1]), pack($endian, $tbl->[3]->[$range]->[2]); } } $idx += 3; # OUTPUT RANGES CONTENT if ($Source) { print OUTFILE "\t/* Ranges content */"; for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) { printf OUTFILE "\n\t/* Range 0x%.4X - 0x%.4X, array index: 0x%.4X */", $tbl->[3]->[$range]->[0], $tbl->[3]->[$range]->[1], $idx; $idx += $tbl->[3]->[$range]->[1] - $tbl->[3]->[$range]->[0] + 1; for (my $elt = 0, my $br = 0; $elt <= $#{$tbl->[4]->[$range]}; $br = ++$elt % 8) { print OUTFILE "\n\t" unless $br; if (defined $tbl->[4]->[$range]->[$elt]) { if ($tbl->[4]->[$range]->[$elt] != $TmpLost) { printf OUTFILE "0x%.4X,", $tbl->[4]->[$range]->[$elt]; } else { print OUTFILE "$MacroLostCode,"; } } else { print OUTFILE "$MacroInvCode,"; } } } } else { for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) { for (my $elt = 0; $elt <= $#{$tbl->[4]->[$range]}; $elt++) { if (defined $tbl->[4]->[$range]->[$elt]) { if ($tbl->[4]->[$range]->[$elt] != $TmpLost) { print OUTFILE pack $endian, $tbl->[4]->[$range]->[$elt]; } else { print OUTFILE pack $endian, $LostCode; } } else { print OUTFILE pack $endian, $InvCode; } } } } # OUTPUT UNRANGED CODES if ($Source) { printf OUTFILE "\n\t/* Unranged codes (%d codes) */", $#{$tbl->[4]} + 1; for (my $i = 0; $i <= $#{$tbl->[5]}; $i++) { printf OUTFILE "\n\t/* Array index: 0x%.4X */ 0x%.4X,0x%.4X,", $idx, $tbl->[5]->[$i]->[0], $tbl->[5]->[$i]->[1]; } } else { for (my $i = 0; $i <= $#{$tbl->[5]}; $i++) { print OUTFILE pack($endian, $tbl->[5]->[$i]->[0]), pack($endian, $tbl->[5]->[$i]->[1]); } } if ($Source) { if ($_[0] eq "to_ucs") { print OUTFILE " }; #endif /* $GuardToUCS && $GuardSize */ "; } else { print OUTFILE " }; #endif /* $GuardFromUCS && $GuardSize */ "; } } } # ============================================================================= # # Parse command line options # # ============================================================================= sub ProcessOptions() { my $help_opt = 'h'; # Print help option my $input_opt = 'i'; # Input file name option my $output_opt = 'o'; # Output file name option my $source_opt = 'S'; # Generate C source file option my $enc_opt = 'N'; # Encoding name my $plane_opt = 'p'; # Plane number my $verbose_opt = 'v'; # Verbose output my $ccscol_opt = 'x'; # Encoding's column number my $ucscol_opt = 'y'; # UCS column number my $nosize_opt = 'l'; # Don't generate size-optimized tables my $nospeed_opt = 'b'; # Don't generate speed-optimized tables my $nobe_opt = 'B'; # Don't generate big-endian tables my $nole_opt = 'L'; # Don't generate big-endian tables my $noto_opt = 't'; # Don't generate "to_ucs" table my $nofrom_opt = 'f'; # Don't generate "from_ucs" table my %args; # Command line arguments found by getopts() my $getopts_string = "$help_opt$source_opt$enc_opt:$verbose_opt$input_opt:$output_opt:$plane_opt:" . "$nosize_opt$nospeed_opt$nobe_opt$nole_opt$noto_opt$nofrom_opt$ccscol_opt:" . "$ucscol_opt:"; getopts($getopts_string, \%args) || Err "getopts() failed: $!.\n", 1; # Print usage rules and exit. if ($args{$help_opt}) { print<.c (for sources) or .cct (for binaries) is assumed. If encoding name isn't specified is assumed. is normalized (small letters, "-" are substituted by "_") input file name base (no extension). For example, for Koi8-r.txt input file, is koi8_r. END ; exit 0; } $Verbose = $args{$verbose_opt}; $Source = $args{$source_opt}; $NoSpeed = $args{$nospeed_opt}; $NoSize = $args{$nosize_opt}; $NoBE = $args{$nobe_opt}; $NoLE = $args{$nole_opt}; $NoFrom = $args{$nofrom_opt}; $NoTo = $args{$noto_opt}; $CCSCol = $args{$ccscol_opt}; $UCSCol = $args{$ucscol_opt}; $Plane = $args{$plane_opt}; $InFile = $args{$input_opt}; $OutFile = $args{$output_opt}; $CCSName = $args{$enc_opt}; Err "Error: input file isn't defined. Use -$help_opt for help.\n", 1 unless $InFile; unless ($OutFile) { # Construct output file name $OutFile = $InFile; $OutFile =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/; $OutFile =~ tr/-/_/; if ($Source) { $OutFile = "$OutFile.c"; } else { $OutFile = "$OutFile.cct" } } unless ($CCSName) { # Construct CCS name $CCSName = $InFile; $CCSName =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/; $CCSName =~ tr/-/_/; } Err "-$nosize_opt option can't be used with -$nospeed_opt option " . "simultaniously.\n", 1 if $NoSpeed && $NoSize; Err "-$nobe_opt option can't be used with -$nole_opt option " . "simultaniously.\n", 1 if $NoBE && $NoLE; Err "-$noto_opt option can't be used with -$nofrom_opt option" . "simultaniously.\n", 1 if $NoTo && $NoFrom; Err "-$nosize_opt, -$nospeed_opt, -$nobe_opt -$nole_opt " . "-$noto_opt and -$nofrom_opt " . "options can't be used with -$source_opt option.\n" . "Source code always contains both speed- and size-optimized " . "tables in System Endian. Use -$help_opt for help.\n", 1 if $Source and $NoSpeed || $NoSize || $NoBE || $NoLE || $NoTo || $NoFrom; if (!$CCSCol && !$UCSCol) { $CCSCol = 0; $UCSCol = 1; } elsif ($CCSCol && $UCSCol) { Err "Column number should be >= 0\n", 1 if ($CCSCol <= 0 or $UCSCol <= 0); $CCSCol -= 1; $UCSCol -= 1; } else { Err "Please, define both CCS and UCS column numbers\n", 1; } if ($Verbose) { print "Use $InFile file for input.\n", "Use $OutFile file for output.\n", "Use $CCSName as CCS name.\n"; print "Generate C source file.\n" if $Source; print "Generate binary file.\n" if !$Source; printf "Use plane N 0x%.4X.\n", hex $Plane if defined $Plane; printf "Use column N $CCSCol for $CCSName.\n"; printf "Use column N $UCSCol for UCS.\n"; print "Don't generate size-optimized tables.\n" if $NoSize; print "Don't generate speed-optimized tables.\n" if $NoSpeed; print "Don't generate big-endian tables.\n" if $NoBE; print "Don't generate little-endian tables.\n" if $NoLE; print "Don't generate \"to_ucs\" table.\n" if $NoTo; print "Don't generate \"from_ucs\" table.\n" if $NoFrom; } return; } # ============================================================================= # # Print error message, close all and exit # # Parameter 1: error message # Parameter 2: don't delete output file if > 1 # # ============================================================================= sub Err($;$) { print STDERR "$_[0]"; close INFILE; close OUTFILE; unlink $OutFile unless $_[1]; exit 1; }