1 /*
2 Copyright (c) 2016 Corinna Vinschen <corinna@vinschen.de>
3 Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling
4  */
5 /* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
6 #include <_ansi.h>
7 #include <wctype.h>
8 #include <stdint.h>
9 //#include <errno.h>
10 #include "local.h"
11 
12 /*
13    struct caseconv_entry describes the case conversion behaviour
14    of a range of Unicode characters.
15    It was designed to be compact for a minimal table size.
16    The range is first...first + diff.
17    Conversion behaviour for a character c in the respective range:
18      mode == TOLO	towlower (c) = c + delta
19      mode == TOUP	towupper (c) = c + delta
20      mode == TOBOTH	(titling case characters)
21 			towlower (c) = c + 1
22 			towupper (c) = c - 1
23      mode == TO1	capital/small letters are alternating
24 	delta == EVENCAP	even codes are capital
25 	delta == ODDCAP		odd codes are capital
26 			(this correlates with an even/odd first range value
27 			as of Unicode 10.0 but we do not rely on this)
28    As of Unicode 10.0, the following field lengths are sufficient
29 	first: 17 bits
30 	diff: 8 bits
31 	delta: 17 bits
32 	mode: 2 bits
33    The reserve of 4 bits (to limit the struct to 6 bytes)
34    is currently added to the 'first' field;
35    should a future Unicode version make it necessary to expand the others,
36    the 'first' field could be reduced as needed, or larger ranges could
37    be split up (reduce limit max=255 e.g. to max=127 or max=63 in
38    script mkcaseconv, check increasing table size).
39  */
40 enum {TO1, TOLO, TOUP, TOBOTH};
41 enum {EVENCAP, ODDCAP};
42 static struct caseconv_entry {
43   uint_least32_t first: 21;
44   uint_least32_t diff: 8;
45   uint_least32_t mode: 2;
46   int_least32_t delta: 17;
47 }
48 #ifdef _HAVE_BITFIELDS_IN_PACKED_STRUCTS
49 __attribute__((packed))
50 #endif
51 caseconv_table [] = {
52 #include "caseconv.t"
53 };
54 #define first(ce)	((wint_t) ce.first)
55 #define last(ce)	((wint_t) (ce.first + ce.diff))
56 
57 /* auxiliary function for binary search in interval properties table */
58 static const struct caseconv_entry *
bisearch(wint_t ucs,const struct caseconv_entry * table,int max)59 bisearch (wint_t ucs, const struct caseconv_entry *table, int max)
60 {
61   int min = 0;
62   int mid;
63 
64   if (ucs < first(table[0]) || ucs > last(table[max]))
65     return 0;
66   while (max >= min)
67     {
68       mid = (min + max) / 2;
69       if (ucs > last(table[mid]))
70 	min = mid + 1;
71       else if (ucs < first(table[mid]))
72 	max = mid - 1;
73       else
74 	return &table[mid];
75     }
76   return 0;
77 }
78 
79 static wint_t
toulower(wint_t c)80 toulower (wint_t c)
81 {
82   const struct caseconv_entry * cce =
83     bisearch(c, caseconv_table,
84              sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
85 
86   if (cce)
87     switch (cce->mode)
88       {
89       case TOLO:
90 	return c + cce->delta;
91       case TOBOTH:
92 	return c + 1;
93       case TO1:
94 	switch (cce->delta)
95 	  {
96 	  case EVENCAP:
97 	    if (!(c & 1))
98 	      return c + 1;
99 	    break;
100 	  case ODDCAP:
101 	    if (c & 1)
102 	      return c + 1;
103 	    break;
104 	  default:
105 	    break;
106 	  }
107 	default:
108 	  break;
109       }
110 
111   return c;
112 }
113 
114 static wint_t
touupper(wint_t c)115 touupper (wint_t c)
116 {
117   const struct caseconv_entry * cce =
118     bisearch(c, caseconv_table,
119              sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
120 
121   if (cce)
122     switch (cce->mode)
123       {
124       case TOUP:
125 	return c + cce->delta;
126       case TOBOTH:
127 	return c - 1;
128       case TO1:
129 	switch (cce->delta)
130 	  {
131 	  case EVENCAP:
132 	    if (c & 1)
133 	      return c - 1;
134 	    break;
135 	  case ODDCAP:
136 	    if (!(c & 1))
137 	      return c - 1;
138 	    break;
139 	  default:
140 	    break;
141 	  }
142       default:
143 	break;
144       }
145 
146   return c;
147 }
148 
149 wint_t
towctrans_l(wint_t c,wctrans_t w,struct __locale_t * locale)150 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
151 {
152   (void) locale;
153 #ifdef _MB_CAPABLE
154   wint_t u = _jp2uc_l (c, locale);
155 #else
156   wint_t u = c;
157 #endif
158   wint_t res;
159   if (w == WCT_TOLOWER)
160     res = toulower (u);
161   else if (w == WCT_TOUPPER)
162     res = touupper (u);
163   else
164     {
165       // skipping the errno setting that was previously involved
166       // by delegating to towctrans; it was causing trouble (cygwin crash)
167       // and there is no errno specified for towctrans
168       return c;
169     }
170   if (res != u)
171 #ifdef _MB_CAPABLE
172     return _uc2jp_l (res, locale);
173 #else
174     return res;
175 #endif
176   else
177     return c;
178 }
179