1 /*
2 Copyright (c) 2016 Corinna Vinschen <corinna@vinschen.de>
3 Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling
4 */
5 /* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
6 #include <_ansi.h>
7 #include <wctype.h>
8 #include <stdint.h>
9 //#include <errno.h>
10 #include "local.h"
11
12 /*
13 struct caseconv_entry describes the case conversion behaviour
14 of a range of Unicode characters.
15 It was designed to be compact for a minimal table size.
16 The range is first...first + diff.
17 Conversion behaviour for a character c in the respective range:
18 mode == TOLO towlower (c) = c + delta
19 mode == TOUP towupper (c) = c + delta
20 mode == TOBOTH (titling case characters)
21 towlower (c) = c + 1
22 towupper (c) = c - 1
23 mode == TO1 capital/small letters are alternating
24 delta == EVENCAP even codes are capital
25 delta == ODDCAP odd codes are capital
26 (this correlates with an even/odd first range value
27 as of Unicode 10.0 but we do not rely on this)
28 As of Unicode 10.0, the following field lengths are sufficient
29 first: 17 bits
30 diff: 8 bits
31 delta: 17 bits
32 mode: 2 bits
33 The reserve of 4 bits (to limit the struct to 6 bytes)
34 is currently added to the 'first' field;
35 should a future Unicode version make it necessary to expand the others,
36 the 'first' field could be reduced as needed, or larger ranges could
37 be split up (reduce limit max=255 e.g. to max=127 or max=63 in
38 script mkcaseconv, check increasing table size).
39 */
40 enum {TO1, TOLO, TOUP, TOBOTH};
41 enum {EVENCAP, ODDCAP};
42 static const struct caseconv_entry {
43 uint_least32_t first: 21;
44 uint_least32_t diff: 8;
45 uint_least32_t mode: 2;
46 #ifdef __MSP430__
47 /*
48 * MSP430 has 20-bit integers which the compiler attempts to use and
49 * fails. Waste some memory to fix that.
50 */
51 int_least32_t delta;
52 #else
53 int_least32_t delta: 17;
54 #endif
55 }
56 #ifdef _HAVE_BITFIELDS_IN_PACKED_STRUCTS
57 __attribute__((packed))
58 #endif
59 caseconv_table [] = {
60 #include "caseconv.t"
61 };
62 #define first(ce) ((wint_t) ce.first)
63 #define last(ce) ((wint_t) (ce.first + ce.diff))
64
65 /* auxiliary function for binary search in interval properties table */
66 static const struct caseconv_entry *
bisearch(wint_t ucs,const struct caseconv_entry * table,size_t max)67 bisearch (wint_t ucs, const struct caseconv_entry *table, size_t max)
68 {
69 size_t min = 0;
70 size_t mid;
71
72 if (ucs < first(table[0]) || ucs > last(table[max]))
73 return 0;
74 while (max >= min)
75 {
76 mid = (min + max) / 2;
77 if (ucs > last(table[mid]))
78 min = mid + 1;
79 else if (ucs < first(table[mid]))
80 max = mid - 1;
81 else
82 return &table[mid];
83 }
84 return 0;
85 }
86
87 static wint_t
toulower(wint_t c)88 toulower (wint_t c)
89 {
90 const struct caseconv_entry * cce =
91 bisearch(c, caseconv_table,
92 sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
93
94 if (cce)
95 switch (cce->mode)
96 {
97 case TOLO:
98 return c + cce->delta;
99 case TOBOTH:
100 return c + 1;
101 case TO1:
102 switch (cce->delta)
103 {
104 case EVENCAP:
105 if (!(c & 1))
106 return c + 1;
107 break;
108 case ODDCAP:
109 if (c & 1)
110 return c + 1;
111 break;
112 default:
113 break;
114 }
115 default:
116 break;
117 }
118
119 return c;
120 }
121
122 static wint_t
touupper(wint_t c)123 touupper (wint_t c)
124 {
125 const struct caseconv_entry * cce =
126 bisearch(c, caseconv_table,
127 sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
128
129 if (cce)
130 switch (cce->mode)
131 {
132 case TOUP:
133 return c + cce->delta;
134 case TOBOTH:
135 return c - 1;
136 case TO1:
137 switch (cce->delta)
138 {
139 case EVENCAP:
140 if (c & 1)
141 return c - 1;
142 break;
143 case ODDCAP:
144 if (!(c & 1))
145 return c - 1;
146 break;
147 default:
148 break;
149 }
150 default:
151 break;
152 }
153
154 return c;
155 }
156
157 wint_t
towctrans_l(wint_t c,wctrans_t w,struct __locale_t * locale)158 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
159 {
160 (void) locale;
161 #ifdef _MB_CAPABLE
162 wint_t u = _jp2uc_l (c, locale);
163 #else
164 wint_t u = c;
165 #endif
166 wint_t res;
167 if (w == WCT_TOLOWER)
168 res = toulower (u);
169 else if (w == WCT_TOUPPER)
170 res = touupper (u);
171 else
172 {
173 // skipping the errno setting that was previously involved
174 // by delegating to towctrans; it was causing trouble (cygwin crash)
175 // and there is no errno specified for towctrans
176 return c;
177 }
178 if (res != u)
179 #ifdef _MB_CAPABLE
180 return _uc2jp_l (res, locale);
181 #else
182 return res;
183 #endif
184 else
185 return c;
186 }
187