1#! /bin/sh -f
2
3# Copyright (c) 2018 Thomas Wolff <towo@towo.net>
4
5# generate a table for Unicode case conversion; entries:
6# struct caseconv_entry defined in towctrans_l.c
7
8if [ -r UnicodeData.txt ]
9then	UnicodeData=UnicodeData.txt
10elif [ -r /usr/share/unicode/ucd/UnicodeData.txt ]
11then	UnicodeData=/usr/share/unicode/ucd/UnicodeData.txt
12else	echo UnicodeData.txt not found >&2
13	exit 1
14fi
15
16LC_ALL=C
17export LC_ALL
18
19compact=true
20
21#0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;
22#0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041
23#0130;LATIN CAPITAL LETTER I WITH DOT ABOVE;Lu;0;L;0049 0307;;;;N;LATIN CAPITAL LETTER I DOT;;;0069;
24#01C4;LATIN CAPITAL LETTER DZ WITH CARON;Lu;0;L;<compat> 0044 017D;;;;N;LATIN CAPITAL LETTER D Z HACEK;;;01C6;01C5
25#01C5;LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5
26#01C6;LATIN SMALL LETTER DZ WITH CARON;Ll;0;L;<compat> 0064 017E;;;;N;LATIN SMALL LETTER D Z HACEK;;01C4;;01C5
27
28tr -d '\015' < $UnicodeData |
29sed \
30-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;][^;]*\);\([^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
31-e t \
32-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;][^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
33-e t \
34-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;]*\);\([^;][^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
35-e t \
36-e d |
37(#src 01C5 upper "01C4" lower "01C6" title "01C5"
38if $compact
39then
40  (
41  cat <<\/EOS
42  src () {
43    if [ -n "$3" ]
44    then	tohi=$(( 0x0$3 - 0x0$1 ))
45    else	tohi=0
46    fi
47    if [ -n "$5" ]
48    then	tolo=$(( 0x0$5 - 0x0$1 ))
49    else	tolo=0
50    fi
51    case "$tolo.$tohi" in
52    0.0)	true;;
53    0.*)
54	case "$1.$tohi" in
55	*[02468ACE].1)	echo "'#error' U+$1 ODDSML";;
56	*[02468ACE].-1)	echo "  0x$1 TO1 ODDCAP";;
57	*[13579BDF].1)	echo "'#error' U+$1 EVENSML";;
58	*[13579BDF].-1)	echo "  0x$1 TO1 EVENCAP";;
59	*)		echo "  0x$1 TOUP $tohi";;
60	esac;;
61    *.0)
62	case "$1.$tolo" in
63	*[02468ACE].1)	echo "  0x$1 TO1 EVENCAP";;
64	*[02468ACE].-1)	echo "'#error' U+$1 EVENSML";;
65	*[13579BDF].1)	echo "  0x$1 TO1 ODDCAP";;
66	*[13579BDF].-1)	echo "'#error' U+$1 ODDSML";;
67	*)		echo "  0x$1 TOLO $tolo";;
68	esac;;
69    *)	case "$tolo.$tohi" in
70	1.-1)		echo "  0x$1 TOBOTH 0";;
71	*)		echo "'#error' U+$1";;
72	esac;;
73    esac
74  }
75/EOS
76  cat
77  ) | sh |
78  uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," |
79  (
80  cat <<\/EOS
81  first=
82  diff=-1
83  max=255
84  range () {
85	# $diff == $(($last - $first))
86	if [ "$diff" -ge 0 ]
87	then	# we have items at all
88		echo "  {$first, $diff, $v2, $v3},"
89	fi
90	first=
91	diff=-1
92  }
93  item () {
94	if [ "$1" = "#error" ]
95	then	echo "$*"
96		return
97	fi
98
99	if [ $diff -eq $max ]
100	then	range
101	elif [ -n "$first" ]
102	then	if [ $(( $1 )) -ne $(( ${last-0} + 1 )) ]
103		then	range
104		fi
105	fi
106
107	if [ -z "$first" ]
108	then	first=$1
109		v2=$2
110		v3=$3
111	fi
112
113	last=$1
114	diff=$(( $diff + 1 ))
115  }
116/EOS
117  cat
118  ) | sh
119elif false
120then
121  sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/  {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
122      -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/' \
123      -e 's/\(0x[0-9A-F][0-9A-F]*\) - \(0x[0-9A-F][0-9A-F]*\)/$((`printf %d \1` - `printf %d \2`))/g' \
124      -e 's/^/echo "/' -e 's/$/"/' |
125  sh
126else
127  sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/  {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
128      -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/'
129fi
130) > caseconv.t
131