1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4
5 INDEX
6 setlocale
7 INDEX
8 localeconv
9
10 SYNOPSIS
11 #include <locale.h>
12 char *setlocale(int <[category]>, const char *<[locale]>);
13 lconv *localeconv(void);
14
15 DESCRIPTION
16 <<setlocale>> is the facility defined by ANSI C to condition the
17 execution environment for international collating and formatting
18 information; <<localeconv>> reports on the settings of the current
19 locale.
20
21 This is a minimal implementation, supporting only the required <<"POSIX">>
22 and <<"C">> values for <[locale]>; strings representing other locales are not
23 honored unless _MB_CAPABLE is defined.
24
25 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
26 the form
27
28 language[_TERRITORY][.charset][@@modifier]
29
30 <<"language">> is a two character string per ISO 639, or, if not available
31 for a given language, a three character string per ISO 639-3.
32 <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
33 <<"modifier">> see below.
34
35 Additionally to the POSIX specifier, the following extension is supported
36 for backward compatibility with older implementations using newlib:
37 <<"C-charset">>.
38 Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
39 to specify language neutral locales while using other charsets than ASCII,
40 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
41 but uses the UTF-8 charset.
42
43 The following charsets are recognized:
44 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
45 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
46 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
47 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
48 1256, 1257, 1258].
49
50 Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
51 are equivalent. Charset names with dashes can also be written without
52 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
53 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
54
55 Full support for all of the above charsets requires that newlib has been
56 build with multibyte support and support for all ISO and Windows Codepage.
57 Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
58 only newlib for Cygwin is built with full charset support by default.
59 Under Cygwin, this implementation additionally supports the charsets
60 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
61 <<"Big5">>. Cygwin does not support <<"JIS">>.
62
63 Cygwin additionally supports locales from the file
64 /usr/share/locale/locale.alias.
65
66 (<<"">> is also accepted; if given, the settings are read from the
67 corresponding LC_* environment variables and $LANG according to POSIX rules.)
68
69 This implementation also supports the modifiers <<"cjknarrow">> and
70 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
71 handle characters from the "CJK Ambiguous Width" category of characters
72 described at http://www.unicode.org/reports/tr11/#Ambiguous.
73 These characters have a width of 1 for singlebyte charsets and UTF-8,
74 and a width of 2 for multibyte charsets other than UTF-8. Specifying
75 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
76
77 This implementation also supports the modifier <<"cjksingle">>
78 to enforce single-width character properties.
79
80 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
81 pointer to the string representing the current locale. The acceptable
82 values for <[category]> are defined in `<<locale.h>>' as macros
83 beginning with <<"LC_">>.
84
85 <<localeconv>> returns a pointer to a structure (also defined in
86 `<<locale.h>>') describing the locale-specific conventions currently
87 in effect.
88
89 RETURNS
90 A successful call to <<setlocale>> returns a pointer to a string
91 associated with the specified category for the new locale. The string
92 returned by <<setlocale>> is such that a subsequent call using that
93 string will restore that category (or all categories in case of LC_ALL),
94 to that state. The application shall not modify the string returned
95 which may be overwritten by a subsequent call to <<setlocale>>.
96 On error, <<setlocale>> returns <<NULL>>.
97
98 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
99 which describes the formatting and collating conventions in effect (in
100 this implementation, always those of the C locale).
101
102 PORTABILITY
103 ANSI C requires <<setlocale>>, but the only locale required across all
104 implementations is the C locale.
105
106 NOTES
107 There is no ISO-8859-12 codepage. It's also refused by this implementation.
108
109 No supporting OS subroutines are required.
110 */
111
112 /* Parts of this code are originally taken from FreeBSD. */
113 /*
114 * Copyright (c) 1996 - 2002 FreeBSD Project
115 * Copyright (c) 1991, 1993
116 * The Regents of the University of California. All rights reserved.
117 *
118 * This code is derived from software contributed to Berkeley by
119 * Paul Borman at Krystal Technologies.
120 *
121 * Redistribution and use in source and binary forms, with or without
122 * modification, are permitted provided that the following conditions
123 * are met:
124 * 1. Redistributions of source code must retain the above copyright
125 * notice, this list of conditions and the following disclaimer.
126 * 2. Redistributions in binary form must reproduce the above copyright
127 * notice, this list of conditions and the following disclaimer in the
128 * documentation and/or other materials provided with the distribution.
129 * 4. Neither the name of the University nor the names of its contributors
130 * may be used to endorse or promote products derived from this software
131 * without specific prior written permission.
132 *
133 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
134 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
135 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
136 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
137 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
138 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
139 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
140 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
141 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
142 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
143 * SUCH DAMAGE.
144 */
145
146 #define _DEFAULT_SOURCE
147 #define _PICOLIBC_CTYPE_SMALL 0
148 #include <newlib.h>
149 #include <errno.h>
150 #include <string.h>
151 #include <limits.h>
152 #include <stdlib.h>
153 #include <wchar.h>
154 #include "setlocale.h"
155 #include "../ctype/ctype_.h"
156 #include "../stdlib/local.h"
157
158 #ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
159 backward compatibility. Set it in setlocale, but
160 otherwise ignore it. Applications compiled after
161 2010 don't use it anymore. */
162 int __EXPORT __mb_cur_max = 6;
163 #endif
164
165 #ifdef __HAVE_LOCALE_INFO__
166 NEWLIB_THREAD_LOCAL struct __locale_t *_locale = &__global_locale;
167 #endif
168
169 char *_PathLocale = NULL;
170
171 #ifdef _MB_CAPABLE
172 /*
173 * Category names for getenv()
174 */
175 static char *categories[_LC_LAST] = {
176 "LC_ALL",
177 "LC_COLLATE",
178 "LC_CTYPE",
179 "LC_MONETARY",
180 "LC_NUMERIC",
181 "LC_TIME",
182 "LC_MESSAGES",
183 };
184 #endif /* _MB_CAPABLE */
185
186 /*
187 * Default locale per POSIX. Can be overridden on a per-target base.
188 */
189 #ifndef DEFAULT_LOCALE
190 #define DEFAULT_LOCALE "C"
191 #endif
192
193 #ifdef _MB_CAPABLE
194 /*
195 * This variable can be changed by any outside mechanism. This allows,
196 * for instance, to load the default locale from a file.
197 */
198 static const char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
199
200 const struct __locale_t __C_locale =
201 {
202 { "C", "C", "C", "C", "C", "C", "C", },
203 __ascii_wctomb,
204 __ascii_mbtowc,
205 0,
206 DEFAULT_CTYPE_PTR,
207 {
208 ".", "", "", "", "", "", "", "", "", "",
209 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
210 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
211 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
212 CHAR_MAX, CHAR_MAX
213 },
214 #ifndef __HAVE_LOCALE_INFO__
215 "\1",
216 "ASCII",
217 "ASCII",
218 #else /* __HAVE_LOCALE_INFO__ */
219 {
220 { NULL, NULL }, /* LC_ALL */
221 #ifdef __CYGWIN__
222 { &_C_collate_locale, NULL }, /* LC_COLLATE */
223 #else
224 { NULL, NULL }, /* LC_COLLATE */
225 #endif
226 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
227 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
228 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
229 { &_C_time_locale, NULL }, /* LC_TIME */
230 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
231 },
232 #endif /* __HAVE_LOCALE_INFO__ */
233 };
234 #endif /* _MB_CAPABLE */
235
236 struct __locale_t __global_locale =
237 {
238 { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
239 #ifdef __CYGWIN__
240 __utf8_wctomb,
241 __utf8_mbtowc,
242 #else
243 __ascii_wctomb,
244 __ascii_mbtowc,
245 #endif
246 0,
247 DEFAULT_CTYPE_PTR,
248 {
249 ".", "", "", "", "", "", "", "", "", "",
250 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
251 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
252 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
253 CHAR_MAX, CHAR_MAX
254 },
255 #ifndef __HAVE_LOCALE_INFO__
256 "\1",
257 "ASCII",
258 "ASCII",
259 #else /* __HAVE_LOCALE_INFO__ */
260 {
261 { NULL, NULL }, /* LC_ALL */
262 #ifdef __CYGWIN__
263 { &_C_collate_locale, NULL }, /* LC_COLLATE */
264 { &_C_utf8_ctype_locale, NULL }, /* LC_CTYPE */
265 #else
266 { NULL, NULL }, /* LC_COLLATE */
267 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
268 #endif
269 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
270 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
271 { &_C_time_locale, NULL }, /* LC_TIME */
272 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
273 },
274 #endif /* __HAVE_LOCALE_INFO__ */
275 };
276
277 #ifdef _MB_CAPABLE
278 /* Renamed from current_locale_string to make clear this is only the
279 *global* string for setlocale (LC_ALL, NULL). There's no equivalent
280 functionality for uselocale. */
281 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
282 = "C";
283 static char *currentlocale (void);
284
285 #endif /* _MB_CAPABLE */
286
287 char *
setlocale(int category,const char * locale)288 setlocale (
289 int category,
290 const char *locale)
291 {
292 (void) category;
293 #ifndef _MB_CAPABLE
294 if (locale)
295 {
296 if (strcmp (locale, "POSIX") && strcmp (locale, "C")
297 && strcmp (locale, ""))
298 return NULL;
299 }
300 return "C";
301 #else /* _MB_CAPABLE */
302 static char new_categories[_LC_LAST][ENCODING_LEN + 1];
303 static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
304 int i, j, len, saverr;
305 const char *env, *r;
306 char *ret;
307
308 if (category < LC_ALL || category >= _LC_LAST)
309 {
310 _REENT_ERRNO(p) = EINVAL;
311 return NULL;
312 }
313
314 if (locale == NULL)
315 return category != LC_ALL ? __get_global_locale ()->categories[category]
316 : global_locale_string;
317
318 /*
319 * Default to the current locale for everything.
320 */
321 for (i = 1; i < _LC_LAST; ++i)
322 strcpy (new_categories[i], __get_global_locale ()->categories[i]);
323
324 /*
325 * Now go fill up new_categories from the locale argument
326 */
327 if (!*locale)
328 {
329 if (category == LC_ALL)
330 {
331 for (i = 1; i < _LC_LAST; ++i)
332 {
333 env = __get_locale_env (i);
334 if (strlen (env) > ENCODING_LEN)
335 {
336 _REENT_ERRNO(p) = EINVAL;
337 return NULL;
338 }
339 strcpy (new_categories[i], env);
340 }
341 }
342 else
343 {
344 env = __get_locale_env (category);
345 if (strlen (env) > ENCODING_LEN)
346 {
347 _REENT_ERRNO(p) = EINVAL;
348 return NULL;
349 }
350 strcpy (new_categories[category], env);
351 }
352 }
353 else if (category != LC_ALL)
354 {
355 if (strlen (locale) > ENCODING_LEN)
356 {
357 _REENT_ERRNO(p) = EINVAL;
358 return NULL;
359 }
360 strcpy (new_categories[category], locale);
361 }
362 else
363 {
364 if ((r = strchr (locale, '/')) == NULL)
365 {
366 if (strlen (locale) > ENCODING_LEN)
367 {
368 _REENT_ERRNO(p) = EINVAL;
369 return NULL;
370 }
371 for (i = 1; i < _LC_LAST; ++i)
372 strcpy (new_categories[i], locale);
373 }
374 else
375 {
376 for (i = 1; r[1] == '/'; ++r)
377 ;
378 if (!r[1])
379 {
380 _REENT_ERRNO(p) = EINVAL;
381 return NULL; /* Hmm, just slashes... */
382 }
383 do
384 {
385 if (i == _LC_LAST)
386 break; /* Too many slashes... */
387 if ((len = r - locale) > ENCODING_LEN)
388 {
389 _REENT_ERRNO(p) = EINVAL;
390 return NULL;
391 }
392 strlcpy (new_categories[i], locale, len + 1);
393 i++;
394 while (*r == '/')
395 r++;
396 locale = r;
397 while (*r && *r != '/')
398 r++;
399 }
400 while (*locale);
401 while (i < _LC_LAST)
402 {
403 strcpy (new_categories[i], new_categories[i-1]);
404 i++;
405 }
406 }
407 }
408
409 if (category != LC_ALL)
410 {
411 ret = __loadlocale (__get_global_locale (), category,
412 new_categories[category]);
413 currentlocale ();
414 return ret;
415 }
416
417 for (i = 1; i < _LC_LAST; ++i)
418 {
419 strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
420 if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
421 {
422 saverr = _REENT_ERRNO(p);
423 for (j = 1; j < i; j++)
424 {
425 strcpy (new_categories[j], saved_categories[j]);
426 if (__loadlocale (__get_global_locale (), j, new_categories[j])
427 == NULL)
428 {
429 strcpy (new_categories[j], "C");
430 __loadlocale (__get_global_locale (), j, new_categories[j]);
431 }
432 }
433 _REENT_ERRNO(p) = saverr;
434 return NULL;
435 }
436 }
437 return currentlocale ();
438 #endif /* _MB_CAPABLE */
439 }
440
441 #ifdef _MB_CAPABLE
442 static char *
currentlocale(void)443 currentlocale (void)
444 {
445 int i;
446
447 strcpy (global_locale_string, __get_global_locale ()->categories[1]);
448
449 for (i = 2; i < _LC_LAST; ++i)
450 if (strcmp (__get_global_locale ()->categories[1],
451 __get_global_locale ()->categories[i]))
452 {
453 for (i = 2; i < _LC_LAST; ++i)
454 {
455 (void)strcat(global_locale_string, "/");
456 (void)strcat(global_locale_string,
457 __get_global_locale ()->categories[i]);
458 }
459 break;
460 }
461 return global_locale_string;
462 }
463
464 extern void __set_ctype (struct __locale_t *, const char *charset);
465
466 char *
__loadlocale(struct __locale_t * loc,int category,char * new_locale)467 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
468 {
469 /* At this point a full-featured system would just load the locale
470 specific data from the locale files.
471 What we do here for now is to check the incoming string for correctness.
472 The string must be in one of the allowed locale strings, either
473 one in POSIX-style, or one in the old newlib style to maintain
474 backward compatibility. If the local string is correct, the charset
475 is extracted and stored in ctype_codeset or message_charset
476 dependent on the cateogry. */
477 char *locale = NULL;
478 char charset[ENCODING_LEN + 1] = {};
479 long val = 0;
480 char *end, *c = NULL;
481 int mbc_max;
482 wctomb_p l_wctomb;
483 mbtowc_p l_mbtowc;
484 int cjksingle = 0;
485 int cjknarrow = 0;
486 int cjkwide = 0;
487
488 /* Avoid doing everything twice if nothing has changed.
489
490 duplocale relies on this test to go wrong so the locale is actually
491 duplicated when required. Any change here has to be synced with a
492 matching change in duplocale. */
493 if (!strcmp (new_locale, loc->categories[category]))
494 return loc->categories[category];
495
496 int ret = 0;
497
498 #ifdef __CYGWIN__
499 /* This additional code handles the case that the incoming locale string
500 is not valid. If so, it calls the function __set_locale_from_locale_alias,
501 which is only available on Cygwin right now. The function reads the
502 file /usr/share/locale/locale.alias. The file contains locale aliases
503 and their replacement locale. For instance, the alias "french" is
504 translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
505 "th_TH.TIS-620". If successful, the function returns with a pointer
506 to the second argument, which is a buffer in which the replacement locale
507 gets stored. Otherwise the function returns NULL. */
508 char tmp_locale[ENCODING_LEN + 1];
509
510 restart:
511 if (!locale)
512 locale = new_locale;
513 else if (locale != tmp_locale)
514 {
515 locale = __set_locale_from_locale_alias (locale, tmp_locale);
516 if (!locale)
517 return NULL;
518 }
519 # define FAIL goto restart
520 #else
521 locale = new_locale;
522 # define FAIL return NULL
523 #endif
524
525 /* "POSIX" is translated to "C", as on Linux. */
526 if (!strcmp (locale, "POSIX"))
527 strcpy (locale, "C");
528 if (!strcmp (locale, "C")) /* Default "C" locale */
529 strcpy (charset, "ASCII");
530 else if (locale[0] == 'C'
531 && (locale[1] == '-' /* Old newlib style */
532 || locale[1] == '.')) /* Extension for the C locale to allow
533 specifying different charsets while
534 sticking to the C locale in terms
535 of sort order, etc. Proposed in
536 the Debian project. */
537 {
538 char *chp;
539
540 c = locale + 2;
541 strcpy (charset, c);
542 if ((chp = strchr (charset, '@')))
543 /* Strip off modifier */
544 *chp = '\0';
545 c += strlen (charset);
546 }
547 else /* POSIX style */
548 {
549 c = locale;
550
551 /* Don't use ctype macros here, they might be localized. */
552 /* Language */
553 if (c[0] < 'a' || c[0] > 'z'
554 || c[1] < 'a' || c[1] > 'z')
555 FAIL;
556 c += 2;
557 /* Allow three character Language per ISO 639-3 */
558 if (c[0] >= 'a' && c[0] <= 'z')
559 ++c;
560 if (c[0] == '_')
561 {
562 /* Territory */
563 ++c;
564 if (c[0] < 'A' || c[0] > 'Z'
565 || c[1] < 'A' || c[1] > 'Z')
566 FAIL;
567 c += 2;
568 }
569 if (c[0] == '.')
570 {
571 /* Charset */
572 char *chp;
573
574 ++c;
575 strcpy (charset, c);
576 if ((chp = strchr (charset, '@')))
577 /* Strip off modifier */
578 *chp = '\0';
579 c += strlen (charset);
580 }
581 else if (c[0] == '\0' || c[0] == '@')
582 /* End of string or just a modifier */
583 #ifdef __CYGWIN__
584 /* The Cygwin-only function __set_charset_from_locale checks
585 for the default charset which is connected to the given locale.
586 The function uses Windows functions in turn so it can't be easily
587 adapted to other targets. However, if any other target provides
588 equivalent functionality, preferrably using the same function name
589 it would be sufficient to change the guarding #ifdef. */
590 __set_charset_from_locale (locale, charset);
591 #else
592 strcpy (charset, "ISO-8859-1");
593 #endif
594 else
595 /* Invalid string */
596 FAIL;
597 }
598 if (c && c[0] == '@')
599 {
600 /* Modifier "cjksingle" is recognized to enforce single-width mode. */
601 /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
602 behaviour of wcwidth() and wcswidth() for East Asian languages.
603 For details see the comment at the end of this function. */
604 if (!strcmp (c + 1, "cjksingle"))
605 cjksingle = 1;
606 else if (!strcmp (c + 1, "cjknarrow"))
607 cjknarrow = 1;
608 else if (!strcmp (c + 1, "cjkwide"))
609 cjkwide = 1;
610 }
611 /* We only support this subset of charsets. */
612 switch (charset[0])
613 {
614 case 'U':
615 case 'u':
616 if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
617 FAIL;
618 strcpy (charset, "UTF-8");
619 mbc_max = 6;
620 l_wctomb = __utf8_wctomb;
621 l_mbtowc = __utf8_mbtowc;
622 break;
623 #ifndef __CYGWIN__
624 /* Cygwin does not support JIS at all. */
625 case 'J':
626 case 'j':
627 if (strcasecmp (charset, "JIS"))
628 FAIL;
629 strcpy (charset, "JIS");
630 mbc_max = 8;
631 l_wctomb = __jis_wctomb;
632 l_mbtowc = __jis_mbtowc;
633 break;
634 #endif /* !__CYGWIN__ */
635 case 'E':
636 case 'e':
637 if (strncasecmp (charset, "EUC", 3))
638 FAIL;
639 c = charset + 3;
640 if (*c == '-')
641 ++c;
642 if (!strcasecmp (c, "JP"))
643 {
644 strcpy (charset, "EUCJP");
645 mbc_max = 3;
646 l_wctomb = __eucjp_wctomb;
647 l_mbtowc = __eucjp_mbtowc;
648 }
649 #ifdef __CYGWIN__
650 /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
651 implementation requires Windows support. */
652 else if (!strcasecmp (c, "KR"))
653 {
654 strcpy (charset, "EUCKR");
655 mbc_max = 2;
656 l_wctomb = __kr_wctomb;
657 l_mbtowc = __kr_mbtowc;
658 }
659 else if (!strcasecmp (c, "CN"))
660 {
661 strcpy (charset, "EUCCN");
662 mbc_max = 2;
663 l_wctomb = __gbk_wctomb;
664 l_mbtowc = __gbk_mbtowc;
665 }
666 #endif /* __CYGWIN__ */
667 else
668 FAIL;
669 break;
670 case 'S':
671 case 's':
672 if (strcasecmp (charset, "SJIS"))
673 FAIL;
674 strcpy (charset, "SJIS");
675 mbc_max = 2;
676 l_wctomb = __sjis_wctomb;
677 l_mbtowc = __sjis_mbtowc;
678 break;
679 case 'I':
680 case 'i':
681 /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
682 ISO-8859-12. This code also recognizes the aliases without dashes. */
683 if (strncasecmp (charset, "ISO", 3))
684 FAIL;
685 c = charset + 3;
686 if (*c == '-')
687 ++c;
688 if (strncasecmp (c, "8859", 4))
689 FAIL;
690 c += 4;
691 if (*c == '-')
692 ++c;
693 val = strtol (c, &end, 10);
694 if (val < 1 || val > 16 || val == 12 || *end)
695 FAIL;
696 strcpy (charset, "ISO-8859-");
697 c = charset + 9;
698 if (val > 10)
699 *c++ = '1';
700 *c++ = val % 10 + '0';
701 *c = '\0';
702 mbc_max = 1;
703 #ifdef _MB_EXTENDED_CHARSETS_ISO
704 l_wctomb = __iso_wctomb (val);
705 l_mbtowc = __iso_mbtowc (val);
706 #else /* !_MB_EXTENDED_CHARSETS_ISO */
707 l_wctomb = __ascii_wctomb;
708 l_mbtowc = __ascii_mbtowc;
709 #endif /* _MB_EXTENDED_CHARSETS_ISO */
710 break;
711 case 'C':
712 case 'c':
713 if (charset[1] != 'P' && charset[1] != 'p')
714 FAIL;
715 memcpy (charset, "CP", 2);
716 val = strtol (charset + 2, &end, 10);
717 if (*end)
718 FAIL;
719 switch (val)
720 {
721 case 437:
722 case 720:
723 case 737:
724 case 775:
725 case 850:
726 case 852:
727 case 855:
728 case 857:
729 case 858:
730 case 862:
731 case 866:
732 case 874:
733 case 1125:
734 case 1250:
735 case 1251:
736 case 1252:
737 case 1253:
738 case 1254:
739 case 1255:
740 case 1256:
741 case 1257:
742 case 1258:
743 mbc_max = 1;
744 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
745 l_wctomb = __cp_wctomb (val);
746 l_mbtowc = __cp_mbtowc (val);
747 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
748 l_wctomb = __ascii_wctomb;
749 l_mbtowc = __ascii_mbtowc;
750 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
751 break;
752 case 932:
753 mbc_max = 2;
754 l_wctomb = __sjis_wctomb;
755 l_mbtowc = __sjis_mbtowc;
756 break;
757 default:
758 FAIL;
759 }
760 break;
761 case 'K':
762 case 'k':
763 /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
764 if (strncasecmp (charset, "KOI8", 4))
765 FAIL;
766 c = charset + 4;
767 if (*c == '-')
768 ++c;
769 if (*c == 'R' || *c == 'r')
770 {
771 val = 20866;
772 strcpy (charset, "CP20866");
773 }
774 else if (*c == 'U' || *c == 'u')
775 {
776 val = 21866;
777 strcpy (charset, "CP21866");
778 }
779 else if (*c == 'T' || *c == 't')
780 {
781 val = 103;
782 strcpy (charset, "CP103");
783 }
784 else
785 FAIL;
786 mbc_max = 1;
787 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
788 l_wctomb = __cp_wctomb (val);
789 l_mbtowc = __cp_mbtowc (val);
790 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
791 l_wctomb = __ascii_wctomb;
792 l_mbtowc = __ascii_mbtowc;
793 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
794 break;
795 case 'A':
796 case 'a':
797 if (strcasecmp (charset, "ASCII"))
798 FAIL;
799 strcpy (charset, "ASCII");
800 mbc_max = 1;
801 l_wctomb = __ascii_wctomb;
802 l_mbtowc = __ascii_mbtowc;
803 break;
804 case 'G':
805 case 'g':
806 #ifdef __CYGWIN__
807 /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
808 requires Windows support. */
809 if (!strcasecmp (charset, "GBK")
810 || !strcasecmp (charset, "GB2312"))
811 {
812 strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
813 mbc_max = 2;
814 l_wctomb = __gbk_wctomb;
815 l_mbtowc = __gbk_mbtowc;
816 }
817 else if (!strcasecmp (charset, "GB18030"))
818 {
819 strcpy (charset, "GB18030");
820 mbc_max = 4;
821 l_wctomb = __gb18030_wctomb;
822 l_mbtowc = __gb18030_mbtowc;
823 }
824 else
825 #endif /* __CYGWIN__ */
826 /* GEORGIAN-PS and the alias without dash */
827 if (!strncasecmp (charset, "GEORGIAN", 8))
828 {
829 c = charset + 8;
830 if (*c == '-')
831 ++c;
832 if (strcasecmp (c, "PS"))
833 FAIL;
834 val = 101;
835 strcpy (charset, "CP101");
836 mbc_max = 1;
837 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
838 l_wctomb = __cp_wctomb (val);
839 l_mbtowc = __cp_mbtowc (val);
840 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
841 l_wctomb = __ascii_wctomb;
842 l_mbtowc = __ascii_mbtowc;
843 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
844 }
845 else
846 FAIL;
847 break;
848 case 'P':
849 case 'p':
850 /* PT154 */
851 if (strcasecmp (charset, "PT154"))
852 FAIL;
853 val = 102;
854 strcpy (charset, "CP102");
855 mbc_max = 1;
856 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
857 l_wctomb = __cp_wctomb (val);
858 l_mbtowc = __cp_mbtowc (val);
859 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
860 l_wctomb = __ascii_wctomb;
861 l_mbtowc = __ascii_mbtowc;
862 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
863 break;
864 case 'T':
865 case 't':
866 if (strncasecmp (charset, "TIS", 3))
867 FAIL;
868 c = charset + 3;
869 if (*c == '-')
870 ++c;
871 if (strcmp (c, "620"))
872 FAIL;
873 val = 874;
874 strcpy (charset, "CP874");
875 mbc_max = 1;
876 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
877 l_wctomb = __cp_wctomb (val);
878 l_mbtowc = __cp_mbtowc (val);
879 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
880 l_wctomb = __ascii_wctomb;
881 l_mbtowc = __ascii_mbtowc;
882 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
883 break;
884 #ifdef __CYGWIN__
885 /* Newlib does not provide Big5 and Cygwin's implementation
886 requires Windows support. */
887 case 'B':
888 case 'b':
889 if (strcasecmp (charset, "BIG5"))
890 FAIL;
891 strcpy (charset, "BIG5");
892 mbc_max = 2;
893 l_wctomb = __big5_wctomb;
894 l_mbtowc = __big5_mbtowc;
895 break;
896 #endif /* __CYGWIN__ */
897 default:
898 FAIL;
899 }
900 switch (category)
901 {
902 case LC_CTYPE:
903 #ifndef __HAVE_LOCALE_INFO__
904 strcpy (loc->ctype_codeset, charset);
905 loc->mb_cur_max[0] = mbc_max;
906 #endif
907 #ifdef __CYGWIN__
908 __mb_cur_max = mbc_max; /* Only for backward compat */
909 #endif
910 loc->wctomb = l_wctomb;
911 loc->mbtowc = l_mbtowc;
912 __set_ctype (loc, charset);
913 /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
914 /* Determine the width for the "CJK Ambiguous Width" category of
915 characters. This is used in wcwidth(). Assume single width for
916 single-byte charsets, and double width for multi-byte charsets
917 other than UTF-8. For UTF-8, use single width.
918 Single width can also be forced with the "@cjknarrow" modifier.
919 Double width can also be forced with the "@cjkwide" modifier.
920 */
921 loc->cjk_lang = cjkwide ||
922 (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
923 if (cjksingle)
924 loc->cjk_lang = -1; /* Disable CJK dual-width */
925 #ifdef __CYGWIN__
926 ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
927 mbc_max);
928 #endif /* __CYGWIN__ */
929 break;
930 #ifdef __CYGWIN__
931 /* Right now only Cygwin supports a __messages_load_locale function at all. */
932 case LC_MESSAGES:
933 #ifdef __HAVE_LOCALE_INFO__
934 ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
935 if (!ret)
936 #else
937 strcpy (loc->message_codeset, charset);
938 #endif /* __HAVE_LOCALE_INFO__ */
939 break;
940 #endif
941 #ifdef __HAVE_LOCALE_INFO__
942 #ifdef __CYGWIN__
943 /* Right now only Cygwin supports a __collate_load_locale function at all. */
944 case LC_COLLATE:
945 ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
946 break;
947 /* Right now only Cygwin supports a __monetary_load_locale function at all. */
948 case LC_MONETARY:
949 ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
950 break;
951 /* Right now only Cygwin supports a __numeric_load_locale function at all. */
952 case LC_NUMERIC:
953 ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
954 break;
955 /* Right now only Cygwin supports a __time_load_locale function at all. */
956 case LC_TIME:
957 ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
958 break;
959 #endif
960 #endif /* __HAVE_LOCALE_INFO__ */
961 default:
962 break;
963 }
964 #ifdef __HAVE_LOCALE_INFO__
965 if (ret)
966 FAIL;
967 #endif /* __HAVE_LOCALE_INFO__ */
968 return strcpy(loc->categories[category], new_locale);
969 }
970
971 const char *
__get_locale_env(int category)972 __get_locale_env (int category)
973 {
974 const char *env;
975
976 /* 1. check LC_ALL. */
977 env = getenv (categories[0]);
978
979 /* 2. check LC_* */
980 if (env == NULL || !*env)
981 env = getenv (categories[category]);
982
983 /* 3. check LANG */
984 if (env == NULL || !*env)
985 env = getenv ("LANG");
986
987 /* 4. if none is set, fall to default locale */
988 if (env == NULL || !*env)
989 env = __default_locale;
990
991 return env;
992 }
993 #endif /* _MB_CAPABLE */
994
995 size_t
__locale_mb_cur_max(void)996 __locale_mb_cur_max (void)
997 {
998 #ifdef __HAVE_LOCALE_INFO__
999 return __get_current_ctype_locale ()->mb_cur_max[0];
1000 #else
1001 return __get_current_locale ()->mb_cur_max[0];
1002 #endif
1003 }
1004
1005 #ifdef __HAVE_LOCALE_INFO__
1006 const char *
__locale_ctype_ptr_l(struct __locale_t * locale)1007 __locale_ctype_ptr_l (struct __locale_t *locale)
1008 {
1009 return locale->ctype_ptr;
1010 }
1011
1012 const char *
__locale_ctype_ptr(void)1013 __locale_ctype_ptr (void)
1014 {
1015 return __get_current_locale ()->ctype_ptr;
1016 }
1017 #endif /* __HAVE_LOCALE_INFO__ */
1018