1 /* Routine to translate between Japanese characters and Unicode */
2
3 /* Copyright (c) 2002 Red Hat Incorporated.
4 All rights reserved.
5 Modified (m) 2017 Thomas Wolff: consider locale, add dummy uc2jp
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 The name of Red Hat Incorporated may not be used to endorse
18 or promote products derived from this software without specific
19 prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
25 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <picolibc.h>
34
35 #ifdef _MB_CAPABLE
36 /* Under Cygwin, the incoming wide character is already given in UTF due
37 to the requirements of the underlying OS. */
38
39 #include <string.h>
40 #include <wctype.h>
41 #include "local.h"
42
43 /* Japanese encoding types supported */
44 #define JP_JIS 1
45 #define JP_SJIS 2
46 #define JP_EUCJP 3
47
48 /* Japanese to Unicode conversion routine */
49 #include "jp2uc.h"
50
51 static wint_t
__jp2uc(wint_t c,int type)52 __jp2uc (wint_t c, int type)
53 {
54 int index, adj;
55 unsigned char byte1, byte2;
56 wint_t ret;
57
58 /* we actually use tables of EUCJP to Unicode. For JIS, we simply
59 note that EUCJP is essentially JIS with the top bits on in each
60 byte and translate to EUCJP. For SJIS, we do a translation to EUCJP before
61 accessing the tables. */
62 switch (type)
63 {
64 case JP_JIS:
65 byte1 = (c >> 8) + 0x80;
66 byte2 = (c & 0xff) + 0x80;
67 break;
68 case JP_EUCJP:
69 byte1 = (c >> 8);
70 byte2 = (c & 0xff);
71 break;
72 case JP_SJIS:
73 byte1 = c >> 8;
74 byte2 = c & 0xff;
75 if (byte2 <= 0x9e)
76 {
77 adj = 0xa1 - 0x22;
78 byte2 = (byte2 - 31) + 0xa1;
79 }
80 else
81 {
82 adj = 0xa1 - 0x21;
83 byte2 = (byte2 - 126) + 0xa1;
84 }
85 if (byte1 <= 0x9f)
86 byte1 = ((byte1 - 112) << 1) + adj;
87 else
88 byte1 = ((byte1 - 176) << 1) + adj;
89 break;
90 default:
91 return WEOF;
92 }
93
94 /* find conversion in jp2uc arrays */
95
96 /* handle larger ranges first */
97 if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= (wint_t) 0xcfd3)
98 {
99 index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
100 return b02cf[index];
101 }
102 else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= (wint_t) 0xf4a6)
103 {
104 index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
105 return d02f4[index];
106 }
107
108 /* handle smaller ranges here */
109 switch (byte1)
110 {
111 case 0xA1:
112 return (wint_t)a1[byte2 - 0xa1];
113 case 0xA2:
114 ret = a2[byte2 - 0xa1];
115 if (ret != 0)
116 return (wint_t)ret;
117 break;
118 case 0xA3:
119 if (a3[byte2 - 0xa1])
120 return (wint_t)(0xff00 + (byte2 - 0xa0));
121 break;
122 case 0xA4:
123 if (byte2 <= 0xf3)
124 return (wint_t)(0x3000 + (byte2 - 0x60));
125 break;
126 case 0xA5:
127 if (byte2 <= 0xf6)
128 return (wint_t)(0x3000 + byte2);
129 break;
130 case 0xA6:
131 ret = 0;
132 if (byte2 <= 0xd8)
133 ret = (wint_t)a6[byte2 - 0xa1];
134 if (ret != 0)
135 return ret;
136 break;
137 case 0xA7:
138 ret = 0;
139 if (byte2 <= 0xf1)
140 ret = (wint_t)a7[byte2 - 0xa1];
141 if (ret != 0)
142 return ret;
143 break;
144 case 0xA8:
145 if (byte2 <= 0xc0)
146 return (wint_t)a8[byte2 - 0xa1];
147 break;
148 default:
149 return WEOF;
150 }
151
152 return WEOF;
153 }
154
155 /* Unicode to Japanese conversion routine */
156 static wint_t
__uc2jp(wint_t c,int type)157 __uc2jp (wint_t c, int type)
158 {
159 (void) c;
160 (void) type;
161 //#warning back-conversion Unicode to Japanese not implemented; needed for towupper/towlower
162 return c;
163 }
164
165 /* Japanese to Unicode conversion interface */
166 wint_t
_jp2uc_l(wint_t c,struct __locale_t * l)167 _jp2uc_l (wint_t c, struct __locale_t * l)
168 {
169 const char * cs = l ? __locale_charset(l) : __current_locale_charset();
170 if (0 == strcmp (cs, "JIS"))
171 c = __jp2uc (c, JP_JIS);
172 else if (0 == strcmp (cs, "SJIS"))
173 c = __jp2uc (c, JP_SJIS);
174 else if (0 == strcmp (cs, "EUCJP"))
175 c = __jp2uc (c, JP_EUCJP);
176 return c;
177 }
178
179 wint_t
_jp2uc(wint_t c)180 _jp2uc (wint_t c)
181 {
182 (void) c;
183 return _jp2uc_l (c, 0);
184 }
185
186 /* Unicode to Japanese conversion interface */
187 wint_t
_uc2jp_l(wint_t c,struct __locale_t * l)188 _uc2jp_l (wint_t c, struct __locale_t * l)
189 {
190 const char * cs = l ? __locale_charset(l) : __current_locale_charset();
191 if (0 == strcmp (cs, "JIS"))
192 c = __uc2jp (c, JP_JIS);
193 else if (0 == strcmp (cs, "SJIS"))
194 c = __uc2jp (c, JP_SJIS);
195 else if (0 == strcmp (cs, "EUCJP"))
196 c = __uc2jp (c, JP_EUCJP);
197 return c;
198 }
199
200 #endif /* _MB_CAPABLE */
201