1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4
5 INDEX
6 setlocale
7 INDEX
8 localeconv
9
10 SYNOPSIS
11 #include <locale.h>
12 char *setlocale(int <[category]>, const char *<[locale]>);
13 lconv *localeconv(void);
14
15 DESCRIPTION
16 <<setlocale>> is the facility defined by ANSI C to condition the
17 execution environment for international collating and formatting
18 information; <<localeconv>> reports on the settings of the current
19 locale.
20
21 This is a minimal implementation, supporting only the required <<"POSIX">>
22 and <<"C">> values for <[locale]>; strings representing other locales are not
23 honored unless _MB_CAPABLE is defined.
24
25 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
26 the form
27
28 language[_TERRITORY][.charset][@@modifier]
29
30 <<"language">> is a two character string per ISO 639, or, if not available
31 for a given language, a three character string per ISO 639-3.
32 <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
33 <<"modifier">> see below.
34
35 Additionally to the POSIX specifier, the following extension is supported
36 for backward compatibility with older implementations using newlib:
37 <<"C-charset">>.
38 Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
39 to specify language neutral locales while using other charsets than ASCII,
40 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
41 but uses the UTF-8 charset.
42
43 The following charsets are recognized:
44 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
45 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
46 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
47 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
48 1256, 1257, 1258].
49
50 Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
51 are equivalent. Charset names with dashes can also be written without
52 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
53 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
54
55 Full support for all of the above charsets requires that newlib has been
56 build with multibyte support and support for all ISO and Windows Codepage.
57 Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
58 only newlib for Cygwin is built with full charset support by default.
59 Under Cygwin, this implementation additionally supports the charsets
60 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
61 <<"Big5">>. Cygwin does not support <<"JIS">>.
62
63 Cygwin additionally supports locales from the file
64 /usr/share/locale/locale.alias.
65
66 (<<"">> is also accepted; if given, the settings are read from the
67 corresponding LC_* environment variables and $LANG according to POSIX rules.)
68
69 This implementation also supports the modifiers <<"cjknarrow">> and
70 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
71 handle characters from the "CJK Ambiguous Width" category of characters
72 described at http://www.unicode.org/reports/tr11/#Ambiguous.
73 These characters have a width of 1 for singlebyte charsets and UTF-8,
74 and a width of 2 for multibyte charsets other than UTF-8. Specifying
75 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
76
77 This implementation also supports the modifier <<"cjksingle">>
78 to enforce single-width character properties.
79
80 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
81 pointer to the string representing the current locale. The acceptable
82 values for <[category]> are defined in `<<locale.h>>' as macros
83 beginning with <<"LC_">>.
84
85 <<localeconv>> returns a pointer to a structure (also defined in
86 `<<locale.h>>') describing the locale-specific conventions currently
87 in effect.
88
89 RETURNS
90 A successful call to <<setlocale>> returns a pointer to a string
91 associated with the specified category for the new locale. The string
92 returned by <<setlocale>> is such that a subsequent call using that
93 string will restore that category (or all categories in case of LC_ALL),
94 to that state. The application shall not modify the string returned
95 which may be overwritten by a subsequent call to <<setlocale>>.
96 On error, <<setlocale>> returns <<NULL>>.
97
98 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
99 which describes the formatting and collating conventions in effect (in
100 this implementation, always those of the C locale).
101
102 PORTABILITY
103 ANSI C requires <<setlocale>>, but the only locale required across all
104 implementations is the C locale.
105
106 NOTES
107 There is no ISO-8859-12 codepage. It's also refused by this implementation.
108
109 No supporting OS subroutines are required.
110 */
111
112 /* Parts of this code are originally taken from FreeBSD. */
113 /*
114 * Copyright (c) 1996 - 2002 FreeBSD Project
115 * Copyright (c) 1991, 1993
116 * The Regents of the University of California. All rights reserved.
117 *
118 * This code is derived from software contributed to Berkeley by
119 * Paul Borman at Krystal Technologies.
120 *
121 * Redistribution and use in source and binary forms, with or without
122 * modification, are permitted provided that the following conditions
123 * are met:
124 * 1. Redistributions of source code must retain the above copyright
125 * notice, this list of conditions and the following disclaimer.
126 * 2. Redistributions in binary form must reproduce the above copyright
127 * notice, this list of conditions and the following disclaimer in the
128 * documentation and/or other materials provided with the distribution.
129 * 4. Neither the name of the University nor the names of its contributors
130 * may be used to endorse or promote products derived from this software
131 * without specific prior written permission.
132 *
133 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
134 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
135 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
136 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
137 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
138 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
139 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
140 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
141 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
142 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
143 * SUCH DAMAGE.
144 */
145
146 #define _DEFAULT_SOURCE
147 #include <newlib.h>
148 #include <errno.h>
149 #include <string.h>
150 #include <limits.h>
151 #include <stdlib.h>
152 #include <wchar.h>
153 #include "setlocale.h"
154 #include "../ctype/ctype_.h"
155 #include "../stdlib/local.h"
156
157 #ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
158 backward compatibility. Set it in setlocale, but
159 otherwise ignore it. Applications compiled after
160 2010 don't use it anymore. */
161 int __EXPORT __mb_cur_max = 6;
162 #endif
163
164 #ifdef __HAVE_LOCALE_INFO__
165 NEWLIB_THREAD_LOCAL struct __locale_t *_locale = &__global_locale;
166 #endif
167
168 char *_PathLocale = NULL;
169
170 #ifdef _MB_CAPABLE
171 /*
172 * Category names for getenv()
173 */
174 static char *categories[_LC_LAST] = {
175 "LC_ALL",
176 "LC_COLLATE",
177 "LC_CTYPE",
178 "LC_MONETARY",
179 "LC_NUMERIC",
180 "LC_TIME",
181 "LC_MESSAGES",
182 };
183 #endif /* _MB_CAPABLE */
184
185 /*
186 * Default locale per POSIX. Can be overridden on a per-target base.
187 */
188 #ifndef DEFAULT_LOCALE
189 #define DEFAULT_LOCALE "C"
190 #endif
191
192 #ifdef _MB_CAPABLE
193 /*
194 * This variable can be changed by any outside mechanism. This allows,
195 * for instance, to load the default locale from a file.
196 */
197 char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
198
199 const struct __locale_t __C_locale =
200 {
201 { "C", "C", "C", "C", "C", "C", "C", },
202 __ascii_wctomb,
203 __ascii_mbtowc,
204 0,
205 DEFAULT_CTYPE_PTR,
206 {
207 ".", "", "", "", "", "", "", "", "", "",
208 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
209 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
210 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
211 CHAR_MAX, CHAR_MAX
212 },
213 #ifndef __HAVE_LOCALE_INFO__
214 "\1",
215 "ASCII",
216 "ASCII",
217 #else /* __HAVE_LOCALE_INFO__ */
218 {
219 { NULL, NULL }, /* LC_ALL */
220 #ifdef __CYGWIN__
221 { &_C_collate_locale, NULL }, /* LC_COLLATE */
222 #else
223 { NULL, NULL }, /* LC_COLLATE */
224 #endif
225 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
226 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
227 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
228 { &_C_time_locale, NULL }, /* LC_TIME */
229 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
230 },
231 #endif /* __HAVE_LOCALE_INFO__ */
232 };
233 #endif /* _MB_CAPABLE */
234
235 struct __locale_t __global_locale =
236 {
237 { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
238 #ifdef __CYGWIN__
239 __utf8_wctomb,
240 __utf8_mbtowc,
241 #else
242 __ascii_wctomb,
243 __ascii_mbtowc,
244 #endif
245 0,
246 DEFAULT_CTYPE_PTR,
247 {
248 ".", "", "", "", "", "", "", "", "", "",
249 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
250 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
251 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
252 CHAR_MAX, CHAR_MAX
253 },
254 #ifndef __HAVE_LOCALE_INFO__
255 "\1",
256 "ASCII",
257 "ASCII",
258 #else /* __HAVE_LOCALE_INFO__ */
259 {
260 { NULL, NULL }, /* LC_ALL */
261 #ifdef __CYGWIN__
262 { &_C_collate_locale, NULL }, /* LC_COLLATE */
263 { &_C_utf8_ctype_locale, NULL }, /* LC_CTYPE */
264 #else
265 { NULL, NULL }, /* LC_COLLATE */
266 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
267 #endif
268 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
269 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
270 { &_C_time_locale, NULL }, /* LC_TIME */
271 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
272 },
273 #endif /* __HAVE_LOCALE_INFO__ */
274 };
275
276 #ifdef _MB_CAPABLE
277 /* Renamed from current_locale_string to make clear this is only the
278 *global* string for setlocale (LC_ALL, NULL). There's no equivalent
279 functionality for uselocale. */
280 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
281 = "C";
282 static char *currentlocale (void);
283
284 #endif /* _MB_CAPABLE */
285
286 char *
setlocale(int category,const char * locale)287 setlocale (
288 int category,
289 const char *locale)
290 {
291 (void) category;
292 #ifndef _MB_CAPABLE
293 if (locale)
294 {
295 if (strcmp (locale, "POSIX") && strcmp (locale, "C")
296 && strcmp (locale, ""))
297 return NULL;
298 }
299 return "C";
300 #else /* _MB_CAPABLE */
301 static char new_categories[_LC_LAST][ENCODING_LEN + 1];
302 static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
303 int i, j, len, saverr;
304 const char *env, *r;
305 char *ret;
306
307 if (category < LC_ALL || category >= _LC_LAST)
308 {
309 __errno_r(p) = EINVAL;
310 return NULL;
311 }
312
313 if (locale == NULL)
314 return category != LC_ALL ? __get_global_locale ()->categories[category]
315 : global_locale_string;
316
317 /*
318 * Default to the current locale for everything.
319 */
320 for (i = 1; i < _LC_LAST; ++i)
321 strcpy (new_categories[i], __get_global_locale ()->categories[i]);
322
323 /*
324 * Now go fill up new_categories from the locale argument
325 */
326 if (!*locale)
327 {
328 if (category == LC_ALL)
329 {
330 for (i = 1; i < _LC_LAST; ++i)
331 {
332 env = __get_locale_env (i);
333 if (strlen (env) > ENCODING_LEN)
334 {
335 __errno_r(p) = EINVAL;
336 return NULL;
337 }
338 strcpy (new_categories[i], env);
339 }
340 }
341 else
342 {
343 env = __get_locale_env (category);
344 if (strlen (env) > ENCODING_LEN)
345 {
346 __errno_r(p) = EINVAL;
347 return NULL;
348 }
349 strcpy (new_categories[category], env);
350 }
351 }
352 else if (category != LC_ALL)
353 {
354 if (strlen (locale) > ENCODING_LEN)
355 {
356 __errno_r(p) = EINVAL;
357 return NULL;
358 }
359 strcpy (new_categories[category], locale);
360 }
361 else
362 {
363 if ((r = strchr (locale, '/')) == NULL)
364 {
365 if (strlen (locale) > ENCODING_LEN)
366 {
367 __errno_r(p) = EINVAL;
368 return NULL;
369 }
370 for (i = 1; i < _LC_LAST; ++i)
371 strcpy (new_categories[i], locale);
372 }
373 else
374 {
375 for (i = 1; r[1] == '/'; ++r)
376 ;
377 if (!r[1])
378 {
379 __errno_r(p) = EINVAL;
380 return NULL; /* Hmm, just slashes... */
381 }
382 do
383 {
384 if (i == _LC_LAST)
385 break; /* Too many slashes... */
386 if ((len = r - locale) > ENCODING_LEN)
387 {
388 __errno_r(p) = EINVAL;
389 return NULL;
390 }
391 strlcpy (new_categories[i], locale, len + 1);
392 i++;
393 while (*r == '/')
394 r++;
395 locale = r;
396 while (*r && *r != '/')
397 r++;
398 }
399 while (*locale);
400 while (i < _LC_LAST)
401 {
402 strcpy (new_categories[i], new_categories[i-1]);
403 i++;
404 }
405 }
406 }
407
408 if (category != LC_ALL)
409 {
410 ret = __loadlocale (__get_global_locale (), category,
411 new_categories[category]);
412 currentlocale ();
413 return ret;
414 }
415
416 for (i = 1; i < _LC_LAST; ++i)
417 {
418 strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
419 if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
420 {
421 saverr = __errno_r(p);
422 for (j = 1; j < i; j++)
423 {
424 strcpy (new_categories[j], saved_categories[j]);
425 if (__loadlocale (__get_global_locale (), j, new_categories[j])
426 == NULL)
427 {
428 strcpy (new_categories[j], "C");
429 __loadlocale (__get_global_locale (), j, new_categories[j]);
430 }
431 }
432 __errno_r(p) = saverr;
433 return NULL;
434 }
435 }
436 return currentlocale ();
437 #endif /* _MB_CAPABLE */
438 }
439
440 #ifdef _MB_CAPABLE
441 static char *
currentlocale(void)442 currentlocale (void)
443 {
444 int i;
445
446 strcpy (global_locale_string, __get_global_locale ()->categories[1]);
447
448 for (i = 2; i < _LC_LAST; ++i)
449 if (strcmp (__get_global_locale ()->categories[1],
450 __get_global_locale ()->categories[i]))
451 {
452 for (i = 2; i < _LC_LAST; ++i)
453 {
454 (void)strcat(global_locale_string, "/");
455 (void)strcat(global_locale_string,
456 __get_global_locale ()->categories[i]);
457 }
458 break;
459 }
460 return global_locale_string;
461 }
462
463 extern void __set_ctype (struct __locale_t *, const char *charset);
464
465 char *
__loadlocale(struct __locale_t * loc,int category,char * new_locale)466 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
467 {
468 /* At this point a full-featured system would just load the locale
469 specific data from the locale files.
470 What we do here for now is to check the incoming string for correctness.
471 The string must be in one of the allowed locale strings, either
472 one in POSIX-style, or one in the old newlib style to maintain
473 backward compatibility. If the local string is correct, the charset
474 is extracted and stored in ctype_codeset or message_charset
475 dependent on the cateogry. */
476 char *locale = NULL;
477 char charset[ENCODING_LEN + 1];
478 long val = 0;
479 char *end, *c = NULL;
480 int mbc_max;
481 wctomb_p l_wctomb;
482 mbtowc_p l_mbtowc;
483 int cjksingle = 0;
484 int cjknarrow = 0;
485 int cjkwide = 0;
486
487 /* Avoid doing everything twice if nothing has changed.
488
489 duplocale relies on this test to go wrong so the locale is actually
490 duplicated when required. Any change here has to be synced with a
491 matching change in duplocale. */
492 if (!strcmp (new_locale, loc->categories[category]))
493 return loc->categories[category];
494
495 int ret = 0;
496
497 #ifdef __CYGWIN__
498 /* This additional code handles the case that the incoming locale string
499 is not valid. If so, it calls the function __set_locale_from_locale_alias,
500 which is only available on Cygwin right now. The function reads the
501 file /usr/share/locale/locale.alias. The file contains locale aliases
502 and their replacement locale. For instance, the alias "french" is
503 translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
504 "th_TH.TIS-620". If successful, the function returns with a pointer
505 to the second argument, which is a buffer in which the replacement locale
506 gets stored. Otherwise the function returns NULL. */
507 char tmp_locale[ENCODING_LEN + 1];
508
509 restart:
510 if (!locale)
511 locale = new_locale;
512 else if (locale != tmp_locale)
513 {
514 locale = __set_locale_from_locale_alias (locale, tmp_locale);
515 if (!locale)
516 return NULL;
517 }
518 # define FAIL goto restart
519 #else
520 locale = new_locale;
521 # define FAIL return NULL
522 #endif
523
524 /* "POSIX" is translated to "C", as on Linux. */
525 if (!strcmp (locale, "POSIX"))
526 strcpy (locale, "C");
527 if (!strcmp (locale, "C")) /* Default "C" locale */
528 strcpy (charset, "ASCII");
529 else if (locale[0] == 'C'
530 && (locale[1] == '-' /* Old newlib style */
531 || locale[1] == '.')) /* Extension for the C locale to allow
532 specifying different charsets while
533 sticking to the C locale in terms
534 of sort order, etc. Proposed in
535 the Debian project. */
536 {
537 char *chp;
538
539 c = locale + 2;
540 strcpy (charset, c);
541 if ((chp = strchr (charset, '@')))
542 /* Strip off modifier */
543 *chp = '\0';
544 c += strlen (charset);
545 }
546 else /* POSIX style */
547 {
548 c = locale;
549
550 /* Don't use ctype macros here, they might be localized. */
551 /* Language */
552 if (c[0] < 'a' || c[0] > 'z'
553 || c[1] < 'a' || c[1] > 'z')
554 FAIL;
555 c += 2;
556 /* Allow three character Language per ISO 639-3 */
557 if (c[0] >= 'a' && c[0] <= 'z')
558 ++c;
559 if (c[0] == '_')
560 {
561 /* Territory */
562 ++c;
563 if (c[0] < 'A' || c[0] > 'Z'
564 || c[1] < 'A' || c[1] > 'Z')
565 FAIL;
566 c += 2;
567 }
568 if (c[0] == '.')
569 {
570 /* Charset */
571 char *chp;
572
573 ++c;
574 strcpy (charset, c);
575 if ((chp = strchr (charset, '@')))
576 /* Strip off modifier */
577 *chp = '\0';
578 c += strlen (charset);
579 }
580 else if (c[0] == '\0' || c[0] == '@')
581 /* End of string or just a modifier */
582 #ifdef __CYGWIN__
583 /* The Cygwin-only function __set_charset_from_locale checks
584 for the default charset which is connected to the given locale.
585 The function uses Windows functions in turn so it can't be easily
586 adapted to other targets. However, if any other target provides
587 equivalent functionality, preferrably using the same function name
588 it would be sufficient to change the guarding #ifdef. */
589 __set_charset_from_locale (locale, charset);
590 #else
591 strcpy (charset, "ISO-8859-1");
592 #endif
593 else
594 /* Invalid string */
595 FAIL;
596 }
597 if (c && c[0] == '@')
598 {
599 /* Modifier "cjksingle" is recognized to enforce single-width mode. */
600 /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
601 behaviour of wcwidth() and wcswidth() for East Asian languages.
602 For details see the comment at the end of this function. */
603 if (!strcmp (c + 1, "cjksingle"))
604 cjksingle = 1;
605 else if (!strcmp (c + 1, "cjknarrow"))
606 cjknarrow = 1;
607 else if (!strcmp (c + 1, "cjkwide"))
608 cjkwide = 1;
609 }
610 /* We only support this subset of charsets. */
611 switch (charset[0])
612 {
613 case 'U':
614 case 'u':
615 if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
616 FAIL;
617 strcpy (charset, "UTF-8");
618 mbc_max = 6;
619 l_wctomb = __utf8_wctomb;
620 l_mbtowc = __utf8_mbtowc;
621 break;
622 #ifndef __CYGWIN__
623 /* Cygwin does not support JIS at all. */
624 case 'J':
625 case 'j':
626 if (strcasecmp (charset, "JIS"))
627 FAIL;
628 strcpy (charset, "JIS");
629 mbc_max = 8;
630 l_wctomb = __jis_wctomb;
631 l_mbtowc = __jis_mbtowc;
632 break;
633 #endif /* !__CYGWIN__ */
634 case 'E':
635 case 'e':
636 if (strncasecmp (charset, "EUC", 3))
637 FAIL;
638 c = charset + 3;
639 if (*c == '-')
640 ++c;
641 if (!strcasecmp (c, "JP"))
642 {
643 strcpy (charset, "EUCJP");
644 mbc_max = 3;
645 l_wctomb = __eucjp_wctomb;
646 l_mbtowc = __eucjp_mbtowc;
647 }
648 #ifdef __CYGWIN__
649 /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
650 implementation requires Windows support. */
651 else if (!strcasecmp (c, "KR"))
652 {
653 strcpy (charset, "EUCKR");
654 mbc_max = 2;
655 l_wctomb = __kr_wctomb;
656 l_mbtowc = __kr_mbtowc;
657 }
658 else if (!strcasecmp (c, "CN"))
659 {
660 strcpy (charset, "EUCCN");
661 mbc_max = 2;
662 l_wctomb = __gbk_wctomb;
663 l_mbtowc = __gbk_mbtowc;
664 }
665 #endif /* __CYGWIN__ */
666 else
667 FAIL;
668 break;
669 case 'S':
670 case 's':
671 if (strcasecmp (charset, "SJIS"))
672 FAIL;
673 strcpy (charset, "SJIS");
674 mbc_max = 2;
675 l_wctomb = __sjis_wctomb;
676 l_mbtowc = __sjis_mbtowc;
677 break;
678 case 'I':
679 case 'i':
680 /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
681 ISO-8859-12. This code also recognizes the aliases without dashes. */
682 if (strncasecmp (charset, "ISO", 3))
683 FAIL;
684 c = charset + 3;
685 if (*c == '-')
686 ++c;
687 if (strncasecmp (c, "8859", 4))
688 FAIL;
689 c += 4;
690 if (*c == '-')
691 ++c;
692 val = strtol (c, &end, 10);
693 if (val < 1 || val > 16 || val == 12 || *end)
694 FAIL;
695 strcpy (charset, "ISO-8859-");
696 c = charset + 9;
697 if (val > 10)
698 *c++ = '1';
699 *c++ = val % 10 + '0';
700 *c = '\0';
701 mbc_max = 1;
702 #ifdef _MB_EXTENDED_CHARSETS_ISO
703 l_wctomb = __iso_wctomb (val);
704 l_mbtowc = __iso_mbtowc (val);
705 #else /* !_MB_EXTENDED_CHARSETS_ISO */
706 l_wctomb = __ascii_wctomb;
707 l_mbtowc = __ascii_mbtowc;
708 #endif /* _MB_EXTENDED_CHARSETS_ISO */
709 break;
710 case 'C':
711 case 'c':
712 if (charset[1] != 'P' && charset[1] != 'p')
713 FAIL;
714 memcpy (charset, "CP", 2);
715 val = strtol (charset + 2, &end, 10);
716 if (*end)
717 FAIL;
718 switch (val)
719 {
720 case 437:
721 case 720:
722 case 737:
723 case 775:
724 case 850:
725 case 852:
726 case 855:
727 case 857:
728 case 858:
729 case 862:
730 case 866:
731 case 874:
732 case 1125:
733 case 1250:
734 case 1251:
735 case 1252:
736 case 1253:
737 case 1254:
738 case 1255:
739 case 1256:
740 case 1257:
741 case 1258:
742 mbc_max = 1;
743 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
744 l_wctomb = __cp_wctomb (val);
745 l_mbtowc = __cp_mbtowc (val);
746 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
747 l_wctomb = __ascii_wctomb;
748 l_mbtowc = __ascii_mbtowc;
749 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
750 break;
751 case 932:
752 mbc_max = 2;
753 l_wctomb = __sjis_wctomb;
754 l_mbtowc = __sjis_mbtowc;
755 break;
756 default:
757 FAIL;
758 }
759 break;
760 case 'K':
761 case 'k':
762 /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
763 if (strncasecmp (charset, "KOI8", 4))
764 FAIL;
765 c = charset + 4;
766 if (*c == '-')
767 ++c;
768 if (*c == 'R' || *c == 'r')
769 {
770 val = 20866;
771 strcpy (charset, "CP20866");
772 }
773 else if (*c == 'U' || *c == 'u')
774 {
775 val = 21866;
776 strcpy (charset, "CP21866");
777 }
778 else if (*c == 'T' || *c == 't')
779 {
780 val = 103;
781 strcpy (charset, "CP103");
782 }
783 else
784 FAIL;
785 mbc_max = 1;
786 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
787 l_wctomb = __cp_wctomb (val);
788 l_mbtowc = __cp_mbtowc (val);
789 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
790 l_wctomb = __ascii_wctomb;
791 l_mbtowc = __ascii_mbtowc;
792 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
793 break;
794 case 'A':
795 case 'a':
796 if (strcasecmp (charset, "ASCII"))
797 FAIL;
798 strcpy (charset, "ASCII");
799 mbc_max = 1;
800 l_wctomb = __ascii_wctomb;
801 l_mbtowc = __ascii_mbtowc;
802 break;
803 case 'G':
804 case 'g':
805 #ifdef __CYGWIN__
806 /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
807 requires Windows support. */
808 if (!strcasecmp (charset, "GBK")
809 || !strcasecmp (charset, "GB2312"))
810 {
811 strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
812 mbc_max = 2;
813 l_wctomb = __gbk_wctomb;
814 l_mbtowc = __gbk_mbtowc;
815 }
816 else if (!strcasecmp (charset, "GB18030"))
817 {
818 strcpy (charset, "GB18030");
819 mbc_max = 4;
820 l_wctomb = __gb18030_wctomb;
821 l_mbtowc = __gb18030_mbtowc;
822 }
823 else
824 #endif /* __CYGWIN__ */
825 /* GEORGIAN-PS and the alias without dash */
826 if (!strncasecmp (charset, "GEORGIAN", 8))
827 {
828 c = charset + 8;
829 if (*c == '-')
830 ++c;
831 if (strcasecmp (c, "PS"))
832 FAIL;
833 val = 101;
834 strcpy (charset, "CP101");
835 mbc_max = 1;
836 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
837 l_wctomb = __cp_wctomb (val);
838 l_mbtowc = __cp_mbtowc (val);
839 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
840 l_wctomb = __ascii_wctomb;
841 l_mbtowc = __ascii_mbtowc;
842 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
843 }
844 else
845 FAIL;
846 break;
847 case 'P':
848 case 'p':
849 /* PT154 */
850 if (strcasecmp (charset, "PT154"))
851 FAIL;
852 val = 102;
853 strcpy (charset, "CP102");
854 mbc_max = 1;
855 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
856 l_wctomb = __cp_wctomb (val);
857 l_mbtowc = __cp_mbtowc (val);
858 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
859 l_wctomb = __ascii_wctomb;
860 l_mbtowc = __ascii_mbtowc;
861 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
862 break;
863 case 'T':
864 case 't':
865 if (strncasecmp (charset, "TIS", 3))
866 FAIL;
867 c = charset + 3;
868 if (*c == '-')
869 ++c;
870 if (strcmp (c, "620"))
871 FAIL;
872 val = 874;
873 strcpy (charset, "CP874");
874 mbc_max = 1;
875 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
876 l_wctomb = __cp_wctomb (val);
877 l_mbtowc = __cp_mbtowc (val);
878 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
879 l_wctomb = __ascii_wctomb;
880 l_mbtowc = __ascii_mbtowc;
881 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
882 break;
883 #ifdef __CYGWIN__
884 /* Newlib does not provide Big5 and Cygwin's implementation
885 requires Windows support. */
886 case 'B':
887 case 'b':
888 if (strcasecmp (charset, "BIG5"))
889 FAIL;
890 strcpy (charset, "BIG5");
891 mbc_max = 2;
892 l_wctomb = __big5_wctomb;
893 l_mbtowc = __big5_mbtowc;
894 break;
895 #endif /* __CYGWIN__ */
896 default:
897 FAIL;
898 }
899 switch (category)
900 {
901 case LC_CTYPE:
902 #ifndef __HAVE_LOCALE_INFO__
903 strcpy (loc->ctype_codeset, charset);
904 loc->mb_cur_max[0] = mbc_max;
905 #endif
906 #ifdef __CYGWIN__
907 __mb_cur_max = mbc_max; /* Only for backward compat */
908 #endif
909 loc->wctomb = l_wctomb;
910 loc->mbtowc = l_mbtowc;
911 __set_ctype (loc, charset);
912 /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
913 /* Determine the width for the "CJK Ambiguous Width" category of
914 characters. This is used in wcwidth(). Assume single width for
915 single-byte charsets, and double width for multi-byte charsets
916 other than UTF-8. For UTF-8, use single width.
917 Single width can also be forced with the "@cjknarrow" modifier.
918 Double width can also be forced with the "@cjkwide" modifier.
919 */
920 loc->cjk_lang = cjkwide ||
921 (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
922 if (cjksingle)
923 loc->cjk_lang = -1; /* Disable CJK dual-width */
924 #ifdef __CYGWIN__
925 ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
926 mbc_max);
927 #endif /* __CYGWIN__ */
928 break;
929 #ifdef __CYGWIN__
930 /* Right now only Cygwin supports a __messages_load_locale function at all. */
931 case LC_MESSAGES:
932 #ifdef __HAVE_LOCALE_INFO__
933 ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
934 if (!ret)
935 #else
936 strcpy (loc->message_codeset, charset);
937 #endif /* __HAVE_LOCALE_INFO__ */
938 break;
939 #endif
940 #ifdef __HAVE_LOCALE_INFO__
941 #ifdef __CYGWIN__
942 /* Right now only Cygwin supports a __collate_load_locale function at all. */
943 case LC_COLLATE:
944 ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
945 break;
946 /* Right now only Cygwin supports a __monetary_load_locale function at all. */
947 case LC_MONETARY:
948 ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
949 break;
950 /* Right now only Cygwin supports a __numeric_load_locale function at all. */
951 case LC_NUMERIC:
952 ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
953 break;
954 /* Right now only Cygwin supports a __time_load_locale function at all. */
955 case LC_TIME:
956 ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
957 break;
958 #endif
959 #endif /* __HAVE_LOCALE_INFO__ */
960 default:
961 break;
962 }
963 #ifdef __HAVE_LOCALE_INFO__
964 if (ret)
965 FAIL;
966 #endif /* __HAVE_LOCALE_INFO__ */
967 return strcpy(loc->categories[category], new_locale);
968 }
969
970 const char *
__get_locale_env(int category)971 __get_locale_env (int category)
972 {
973 const char *env;
974
975 /* 1. check LC_ALL. */
976 env = getenv (categories[0]);
977
978 /* 2. check LC_* */
979 if (env == NULL || !*env)
980 env = getenv (categories[category]);
981
982 /* 3. check LANG */
983 if (env == NULL || !*env)
984 env = getenv ("LANG");
985
986 /* 4. if none is set, fall to default locale */
987 if (env == NULL || !*env)
988 env = __default_locale;
989
990 return env;
991 }
992 #endif /* _MB_CAPABLE */
993
994 size_t
__locale_mb_cur_max(void)995 __locale_mb_cur_max (void)
996 {
997 #ifdef __HAVE_LOCALE_INFO__
998 return __get_current_ctype_locale ()->mb_cur_max[0];
999 #else
1000 return __get_current_locale ()->mb_cur_max[0];
1001 #endif
1002 }
1003
1004 #ifdef __HAVE_LOCALE_INFO__
1005 const char *
__locale_ctype_ptr_l(struct __locale_t * locale)1006 __locale_ctype_ptr_l (struct __locale_t *locale)
1007 {
1008 return locale->ctype_ptr;
1009 }
1010
1011 const char *
__locale_ctype_ptr(void)1012 __locale_ctype_ptr (void)
1013 {
1014 return __get_current_locale ()->ctype_ptr;
1015 }
1016 #endif /* __HAVE_LOCALE_INFO__ */
1017