1 /*
2 Copyright (c) 2016 Corinna Vinschen <corinna@vinschen.de>
3 Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling
4 */
5 /* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
6 #include <_ansi.h>
7 #include <wctype.h>
8 #include <stdint.h>
9 //#include <errno.h>
10 #include "local.h"
11
12 /*
13 struct caseconv_entry describes the case conversion behaviour
14 of a range of Unicode characters.
15 It was designed to be compact for a minimal table size.
16 The range is first...first + diff.
17 Conversion behaviour for a character c in the respective range:
18 mode == TOLO towlower (c) = c + delta
19 mode == TOUP towupper (c) = c + delta
20 mode == TOBOTH (titling case characters)
21 towlower (c) = c + 1
22 towupper (c) = c - 1
23 mode == TO1 capital/small letters are alternating
24 delta == EVENCAP even codes are capital
25 delta == ODDCAP odd codes are capital
26 (this correlates with an even/odd first range value
27 as of Unicode 10.0 but we do not rely on this)
28 As of Unicode 10.0, the following field lengths are sufficient
29 first: 17 bits
30 diff: 8 bits
31 delta: 17 bits
32 mode: 2 bits
33 The reserve of 4 bits (to limit the struct to 6 bytes)
34 is currently added to the 'first' field;
35 should a future Unicode version make it necessary to expand the others,
36 the 'first' field could be reduced as needed, or larger ranges could
37 be split up (reduce limit max=255 e.g. to max=127 or max=63 in
38 script mkcaseconv, check increasing table size).
39 */
40 enum {TO1, TOLO, TOUP, TOBOTH};
41 enum {EVENCAP, ODDCAP};
42 static struct caseconv_entry {
43 uint_least32_t first: 21;
44 uint_least32_t diff: 8;
45 uint_least32_t mode: 2;
46 int_least32_t delta: 17;
47 }
48 #ifdef _HAVE_BITFIELDS_IN_PACKED_STRUCTS
49 __attribute__((packed))
50 #endif
51 caseconv_table [] = {
52 #include "caseconv.t"
53 };
54 #define first(ce) ((wint_t) ce.first)
55 #define last(ce) ((wint_t) (ce.first + ce.diff))
56
57 /* auxiliary function for binary search in interval properties table */
58 static const struct caseconv_entry *
bisearch(wint_t ucs,const struct caseconv_entry * table,int max)59 bisearch (wint_t ucs, const struct caseconv_entry *table, int max)
60 {
61 int min = 0;
62 int mid;
63
64 if (ucs < first(table[0]) || ucs > last(table[max]))
65 return 0;
66 while (max >= min)
67 {
68 mid = (min + max) / 2;
69 if (ucs > last(table[mid]))
70 min = mid + 1;
71 else if (ucs < first(table[mid]))
72 max = mid - 1;
73 else
74 return &table[mid];
75 }
76 return 0;
77 }
78
79 static wint_t
toulower(wint_t c)80 toulower (wint_t c)
81 {
82 const struct caseconv_entry * cce =
83 bisearch(c, caseconv_table,
84 sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
85
86 if (cce)
87 switch (cce->mode)
88 {
89 case TOLO:
90 return c + cce->delta;
91 case TOBOTH:
92 return c + 1;
93 case TO1:
94 switch (cce->delta)
95 {
96 case EVENCAP:
97 if (!(c & 1))
98 return c + 1;
99 break;
100 case ODDCAP:
101 if (c & 1)
102 return c + 1;
103 break;
104 default:
105 break;
106 }
107 default:
108 break;
109 }
110
111 return c;
112 }
113
114 static wint_t
touupper(wint_t c)115 touupper (wint_t c)
116 {
117 const struct caseconv_entry * cce =
118 bisearch(c, caseconv_table,
119 sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
120
121 if (cce)
122 switch (cce->mode)
123 {
124 case TOUP:
125 return c + cce->delta;
126 case TOBOTH:
127 return c - 1;
128 case TO1:
129 switch (cce->delta)
130 {
131 case EVENCAP:
132 if (c & 1)
133 return c - 1;
134 break;
135 case ODDCAP:
136 if (!(c & 1))
137 return c - 1;
138 break;
139 default:
140 break;
141 }
142 default:
143 break;
144 }
145
146 return c;
147 }
148
149 wint_t
towctrans_l(wint_t c,wctrans_t w,struct __locale_t * locale)150 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
151 {
152 (void) locale;
153 #ifdef _MB_CAPABLE
154 wint_t u = _jp2uc_l (c, locale);
155 #else
156 wint_t u = c;
157 #endif
158 wint_t res;
159 if (w == WCT_TOLOWER)
160 res = toulower (u);
161 else if (w == WCT_TOUPPER)
162 res = touupper (u);
163 else
164 {
165 // skipping the errno setting that was previously involved
166 // by delegating to towctrans; it was causing trouble (cygwin crash)
167 // and there is no errno specified for towctrans
168 return c;
169 }
170 if (res != u)
171 #ifdef _MB_CAPABLE
172 return _uc2jp_l (res, locale);
173 #else
174 return res;
175 #endif
176 else
177 return c;
178 }
179