1#! /bin/sh -f 2 3# Copyright (c) 2018 Thomas Wolff <towo@towo.net> 4 5# generate a table for Unicode case conversion; entries: 6# struct caseconv_entry defined in towctrans_l.c 7 8if [ -r UnicodeData.txt ] 9then UnicodeData=UnicodeData.txt 10elif [ -r /usr/share/unicode/ucd/UnicodeData.txt ] 11then UnicodeData=/usr/share/unicode/ucd/UnicodeData.txt 12else echo UnicodeData.txt not found >&2 13 exit 1 14fi 15 16LC_ALL=C 17export LC_ALL 18 19compact=true 20 21#0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061; 22#0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041 23#0130;LATIN CAPITAL LETTER I WITH DOT ABOVE;Lu;0;L;0049 0307;;;;N;LATIN CAPITAL LETTER I DOT;;;0069; 24#01C4;LATIN CAPITAL LETTER DZ WITH CARON;Lu;0;L;<compat> 0044 017D;;;;N;LATIN CAPITAL LETTER D Z HACEK;;;01C6;01C5 25#01C5;LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5 26#01C6;LATIN SMALL LETTER DZ WITH CARON;Ll;0;L;<compat> 0064 017E;;;;N;LATIN SMALL LETTER D Z HACEK;;01C4;;01C5 27 28tr -d '\015' < $UnicodeData | 29sed \ 30-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;][^;]*\);\([^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \ 31-e t \ 32-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;][^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \ 33-e t \ 34-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;]*\);\([^;][^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \ 35-e t \ 36-e d | 37(#src 01C5 upper "01C4" lower "01C6" title "01C5" 38if $compact 39then 40 ( 41 cat <<\/EOS 42 src () { 43 if [ -n "$3" ] 44 then tohi=$(( 0x0$3 - 0x0$1 )) 45 else tohi=0 46 fi 47 if [ -n "$5" ] 48 then tolo=$(( 0x0$5 - 0x0$1 )) 49 else tolo=0 50 fi 51 case "$tolo.$tohi" in 52 0.0) true;; 53 0.*) 54 case "$1.$tohi" in 55 *[02468ACE].1) echo "'#error' U+$1 ODDSML";; 56 *[02468ACE].-1) echo " 0x$1 TO1 ODDCAP";; 57 *[13579BDF].1) echo "'#error' U+$1 EVENSML";; 58 *[13579BDF].-1) echo " 0x$1 TO1 EVENCAP";; 59 *) echo " 0x$1 TOUP $tohi";; 60 esac;; 61 *.0) 62 case "$1.$tolo" in 63 *[02468ACE].1) echo " 0x$1 TO1 EVENCAP";; 64 *[02468ACE].-1) echo "'#error' U+$1 EVENSML";; 65 *[13579BDF].1) echo " 0x$1 TO1 ODDCAP";; 66 *[13579BDF].-1) echo "'#error' U+$1 ODDSML";; 67 *) echo " 0x$1 TOLO $tolo";; 68 esac;; 69 *) case "$tolo.$tohi" in 70 1.-1) echo " 0x$1 TOBOTH 0";; 71 *) echo "'#error' U+$1";; 72 esac;; 73 esac 74 } 75/EOS 76 cat 77 ) | sh | 78 uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," | 79 ( 80 cat <<\/EOS 81 first= 82 diff=-1 83 max=255 84 range () { 85 # $diff == $(($last - $first)) 86 if [ "$diff" -ge 0 ] 87 then # we have items at all 88 echo " {$first, $diff, $v2, $v3}," 89 fi 90 first= 91 diff=-1 92 } 93 item () { 94 if [ "$1" = "#error" ] 95 then echo "$*" 96 return 97 fi 98 99 if [ $diff -eq $max ] 100 then range 101 elif [ -n "$first" ] 102 then if [ $(( $1 )) -ne $(( ${last-0} + 1 )) ] 103 then range 104 fi 105 fi 106 107 if [ -z "$first" ] 108 then first=$1 109 v2=$2 110 v3=$3 111 fi 112 113 last=$1 114 diff=$(( $diff + 1 )) 115 } 116/EOS 117 cat 118 ) | sh 119elif false 120then 121 sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/ {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \ 122 -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/' \ 123 -e 's/\(0x[0-9A-F][0-9A-F]*\) - \(0x[0-9A-F][0-9A-F]*\)/$((`printf %d \1` - `printf %d \2`))/g' \ 124 -e 's/^/echo "/' -e 's/$/"/' | 125 sh 126else 127 sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/ {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \ 128 -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/' 129fi 130) > caseconv.t 131