1 /*
2 Copyright (c) 2016 Corinna Vinschen <corinna@vinschen.de>
3 Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling
4  */
5 /* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
6 #include <_ansi.h>
7 #include <wctype.h>
8 #include <stdint.h>
9 //#include <errno.h>
10 #include "local.h"
11 
12 /*
13    struct caseconv_entry describes the case conversion behaviour
14    of a range of Unicode characters.
15    It was designed to be compact for a minimal table size.
16    The range is first...first + diff.
17    Conversion behaviour for a character c in the respective range:
18      mode == TOLO	towlower (c) = c + delta
19      mode == TOUP	towupper (c) = c + delta
20      mode == TOBOTH	(titling case characters)
21 			towlower (c) = c + 1
22 			towupper (c) = c - 1
23      mode == TO1	capital/small letters are alternating
24 	delta == EVENCAP	even codes are capital
25 	delta == ODDCAP		odd codes are capital
26 			(this correlates with an even/odd first range value
27 			as of Unicode 10.0 but we do not rely on this)
28    As of Unicode 10.0, the following field lengths are sufficient
29 	first: 17 bits
30 	diff: 8 bits
31 	delta: 17 bits
32 	mode: 2 bits
33    The reserve of 4 bits (to limit the struct to 6 bytes)
34    is currently added to the 'first' field;
35    should a future Unicode version make it necessary to expand the others,
36    the 'first' field could be reduced as needed, or larger ranges could
37    be split up (reduce limit max=255 e.g. to max=127 or max=63 in
38    script mkcaseconv, check increasing table size).
39  */
40 enum {TO1, TOLO, TOUP, TOBOTH};
41 enum {EVENCAP, ODDCAP};
42 static const struct caseconv_entry {
43   uint_least32_t first: 21;
44   uint_least32_t diff: 8;
45   uint_least32_t mode: 2;
46 #ifdef __MSP430__
47   /*
48    * MSP430 has 20-bit integers which the compiler attempts to use and
49    * fails. Waste some memory to fix that.
50    */
51   int_least32_t delta;
52 #else
53   int_least32_t delta: 17;
54 #endif
55 }
56 #ifdef _HAVE_BITFIELDS_IN_PACKED_STRUCTS
57 __attribute__((packed))
58 #endif
59 caseconv_table [] = {
60 #include "caseconv.t"
61 };
62 #define first(ce)	((wint_t) ce.first)
63 #define last(ce)	((wint_t) (ce.first + ce.diff))
64 
65 /* auxiliary function for binary search in interval properties table */
66 static const struct caseconv_entry *
bisearch(wint_t ucs,const struct caseconv_entry * table,size_t max)67 bisearch (wint_t ucs, const struct caseconv_entry *table, size_t max)
68 {
69   size_t min = 0;
70   size_t mid;
71 
72   if (ucs < first(table[0]) || ucs > last(table[max]))
73     return 0;
74   while (max >= min)
75     {
76       mid = (min + max) / 2;
77       if (ucs > last(table[mid]))
78 	min = mid + 1;
79       else if (ucs < first(table[mid]))
80 	max = mid - 1;
81       else
82 	return &table[mid];
83     }
84   return 0;
85 }
86 
87 static wint_t
toulower(wint_t c)88 toulower (wint_t c)
89 {
90   const struct caseconv_entry * cce =
91     bisearch(c, caseconv_table,
92              sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
93 
94   if (cce)
95     switch (cce->mode)
96       {
97       case TOLO:
98 	return c + cce->delta;
99       case TOBOTH:
100 	return c + 1;
101       case TO1:
102 	switch (cce->delta)
103 	  {
104 	  case EVENCAP:
105 	    if (!(c & 1))
106 	      return c + 1;
107 	    break;
108 	  case ODDCAP:
109 	    if (c & 1)
110 	      return c + 1;
111 	    break;
112 	  default:
113 	    break;
114 	  }
115 	default:
116 	  break;
117       }
118 
119   return c;
120 }
121 
122 static wint_t
touupper(wint_t c)123 touupper (wint_t c)
124 {
125   const struct caseconv_entry * cce =
126     bisearch(c, caseconv_table,
127              sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
128 
129   if (cce)
130     switch (cce->mode)
131       {
132       case TOUP:
133 	return c + cce->delta;
134       case TOBOTH:
135 	return c - 1;
136       case TO1:
137 	switch (cce->delta)
138 	  {
139 	  case EVENCAP:
140 	    if (c & 1)
141 	      return c - 1;
142 	    break;
143 	  case ODDCAP:
144 	    if (!(c & 1))
145 	      return c - 1;
146 	    break;
147 	  default:
148 	    break;
149 	  }
150       default:
151 	break;
152       }
153 
154   return c;
155 }
156 
157 wint_t
towctrans_l(wint_t c,wctrans_t w,struct __locale_t * locale)158 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
159 {
160   (void) locale;
161 #ifdef _MB_CAPABLE
162   wint_t u = _jp2uc_l (c, locale);
163 #else
164   wint_t u = c;
165 #endif
166   wint_t res;
167   if (w == WCT_TOLOWER)
168     res = toulower (u);
169   else if (w == WCT_TOUPPER)
170     res = touupper (u);
171   else
172     {
173       // skipping the errno setting that was previously involved
174       // by delegating to towctrans; it was causing trouble (cygwin crash)
175       // and there is no errno specified for towctrans
176       return c;
177     }
178   if (res != u)
179 #ifdef _MB_CAPABLE
180     return _uc2jp_l (res, locale);
181 #else
182     return res;
183 #endif
184   else
185     return c;
186 }
187