1#! /bin/sh 2# Copyright(c) 2018 Thomas Wolff <towo@towo.net> 3 4# generate list of wide characters, with convex closure 5 6skipcheck=false 7 8if [ ! -r EastAsianWidth.txt ] 9then ln -s /usr/share/unicode/ucd/EastAsianWidth.txt . || exit 1 10fi 11if [ ! -r UnicodeData.txt ] 12then ln -s /usr/share/unicode/ucd/UnicodeData.txt . || exit 1 13fi 14if [ ! -r Blocks.txt ] 15then ln -s /usr/share/unicode/ucd/Blocks.txt . || exit 1 16fi 17 18sed -e "s,^\([^;]*\);[NAH],\1," -e t -e d EastAsianWidth.txt > wide.na 19sed -e "s,^\([^;]*\);[WF],\1," -e t -e d EastAsianWidth.txt > wide.fw 20 21PATH="$PATH:." # for uniset 22 23nrfw=`uniset +wide.fw nr | sed -e 's,.*:,,'` 24echo FW $nrfw 25nrna=`uniset +wide.na nr | sed -e 's,.*:,,'` 26echo NAH $nrna 27 28extrablocks="2E80-303E" 29 30# check all blocks 31includes () { 32 nr=`uniset +wide.$2 -$1 nr | sed -e 's,.*:,,'` 33 test $nr != $3 34} 35echo "adding compact closure of wide ranges, this may take ~10min" 36for b in $extrablocks `sed -e 's,^\([0-9A-F]*\)\.\.\([0-9A-F]*\).*,\1-\2,' -e t -e d Blocks.txt` 37do range=$b 38 echo checking $range $* >&2 39 if includes $range fw $nrfw && ! includes $range na $nrna 40 then echo $range 41 fi 42done > wide.blocks 43 44( 45sed -e "s,^,//," -e 1q EastAsianWidth.txt 46sed -e "s,^,//," -e 1q Blocks.txt 47uniset `sed -e 's,^,+,' wide.blocks` +wide.fw c 48) > wide.t 49 50rm -f wide.na wide.fw wide.blocks 51