1 /* Routine to translate between Japanese characters and Unicode */
2
3 /* Copyright (c) 2002 Red Hat Incorporated.
4 All rights reserved.
5 Modified (m) 2017 Thomas Wolff: consider locale, add dummy uc2jp
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 The name of Red Hat Incorporated may not be used to endorse
18 or promote products derived from this software without specific
19 prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
25 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <newlib.h>
34
35 #ifdef _MB_CAPABLE
36 /* Under Cygwin, the incoming wide character is already given in UTF due
37 to the requirements of the underlying OS. */
38 #ifndef __CYGWIN__
39
40 #include <_ansi.h>
41 #include <string.h>
42 #include <wctype.h>
43 #include "local.h"
44
45 /* Japanese encoding types supported */
46 #define JP_JIS 1
47 #define JP_SJIS 2
48 #define JP_EUCJP 3
49
50 /* Japanese to Unicode conversion routine */
51 #include "jp2uc.h"
52
53 static wint_t
__jp2uc(wint_t c,int type)54 __jp2uc (wint_t c, int type)
55 {
56 int index, adj;
57 unsigned char byte1, byte2;
58 wint_t ret;
59
60 /* we actually use tables of EUCJP to Unicode. For JIS, we simply
61 note that EUCJP is essentially JIS with the top bits on in each
62 byte and translate to EUCJP. For SJIS, we do a translation to EUCJP before
63 accessing the tables. */
64 switch (type)
65 {
66 case JP_JIS:
67 byte1 = (c >> 8) + 0x80;
68 byte2 = (c & 0xff) + 0x80;
69 break;
70 case JP_EUCJP:
71 byte1 = (c >> 8);
72 byte2 = (c & 0xff);
73 break;
74 case JP_SJIS:
75 byte1 = c >> 8;
76 byte2 = c & 0xff;
77 if (byte2 <= 0x9e)
78 {
79 adj = 0xa1 - 0x22;
80 byte2 = (byte2 - 31) + 0xa1;
81 }
82 else
83 {
84 adj = 0xa1 - 0x21;
85 byte2 = (byte2 - 126) + 0xa1;
86 }
87 if (byte1 <= 0x9f)
88 byte1 = ((byte1 - 112) << 1) + adj;
89 else
90 byte1 = ((byte1 - 176) << 1) + adj;
91 break;
92 default:
93 return WEOF;
94 }
95
96 /* find conversion in jp2uc arrays */
97
98 /* handle larger ranges first */
99 if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= (wint_t) 0xcfd3)
100 {
101 index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
102 return b02cf[index];
103 }
104 else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= (wint_t) 0xf4a6)
105 {
106 index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
107 return d02f4[index];
108 }
109
110 /* handle smaller ranges here */
111 switch (byte1)
112 {
113 case 0xA1:
114 return (wint_t)a1[byte2 - 0xa1];
115 case 0xA2:
116 ret = a2[byte2 - 0xa1];
117 if (ret != 0)
118 return (wint_t)ret;
119 break;
120 case 0xA3:
121 if (a3[byte2 - 0xa1])
122 return (wint_t)(0xff00 + (byte2 - 0xa0));
123 break;
124 case 0xA4:
125 if (byte2 <= 0xf3)
126 return (wint_t)(0x3000 + (byte2 - 0x60));
127 break;
128 case 0xA5:
129 if (byte2 <= 0xf6)
130 return (wint_t)(0x3000 + byte2);
131 break;
132 case 0xA6:
133 ret = 0;
134 if (byte2 <= 0xd8)
135 ret = (wint_t)a6[byte2 - 0xa1];
136 if (ret != 0)
137 return ret;
138 break;
139 case 0xA7:
140 ret = 0;
141 if (byte2 <= 0xf1)
142 ret = (wint_t)a7[byte2 - 0xa1];
143 if (ret != 0)
144 return ret;
145 break;
146 case 0xA8:
147 if (byte2 <= 0xc0)
148 return (wint_t)a8[byte2 - 0xa1];
149 break;
150 default:
151 return WEOF;
152 }
153
154 return WEOF;
155 }
156
157 /* Unicode to Japanese conversion routine */
158 static wint_t
__uc2jp(wint_t c,int type)159 __uc2jp (wint_t c, int type)
160 {
161 (void) c;
162 (void) type;
163 //#warning back-conversion Unicode to Japanese not implemented; needed for towupper/towlower
164 return c;
165 }
166
167 /* Japanese to Unicode conversion interface */
168 wint_t
_jp2uc_l(wint_t c,struct __locale_t * l)169 _jp2uc_l (wint_t c, struct __locale_t * l)
170 {
171 const char * cs = l ? __locale_charset(l) : __current_locale_charset();
172 if (0 == strcmp (cs, "JIS"))
173 c = __jp2uc (c, JP_JIS);
174 else if (0 == strcmp (cs, "SJIS"))
175 c = __jp2uc (c, JP_SJIS);
176 else if (0 == strcmp (cs, "EUCJP"))
177 c = __jp2uc (c, JP_EUCJP);
178 return c;
179 }
180
181 wint_t
_jp2uc(wint_t c)182 _jp2uc (wint_t c)
183 {
184 (void) c;
185 return _jp2uc_l (c, 0);
186 }
187
188 /* Unicode to Japanese conversion interface */
189 wint_t
_uc2jp_l(wint_t c,struct __locale_t * l)190 _uc2jp_l (wint_t c, struct __locale_t * l)
191 {
192 const char * cs = l ? __locale_charset(l) : __current_locale_charset();
193 if (0 == strcmp (cs, "JIS"))
194 c = __uc2jp (c, JP_JIS);
195 else if (0 == strcmp (cs, "SJIS"))
196 c = __uc2jp (c, JP_SJIS);
197 else if (0 == strcmp (cs, "EUCJP"))
198 c = __uc2jp (c, JP_EUCJP);
199 return c;
200 }
201
202 #endif /* !__CYGWIN__ */
203 #endif /* _MB_CAPABLE */
204