1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4
5 INDEX
6 setlocale
7 INDEX
8 localeconv
9
10 SYNOPSIS
11 #include <locale.h>
12 char *setlocale(int <[category]>, const char *<[locale]>);
13 lconv *localeconv(void);
14
15 DESCRIPTION
16 <<setlocale>> is the facility defined by ANSI C to condition the
17 execution environment for international collating and formatting
18 information; <<localeconv>> reports on the settings of the current
19 locale.
20
21 This is a minimal implementation, supporting only the required <<"POSIX">>
22 and <<"C">> values for <[locale]>; strings representing other locales are not
23 honored unless _MB_CAPABLE is defined.
24
25 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
26 the form
27
28 language[_TERRITORY][.charset][@@modifier]
29
30 <<"language">> is a two character string per ISO 639, or, if not available
31 for a given language, a three character string per ISO 639-3.
32 <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
33 <<"modifier">> see below.
34
35 Additionally to the POSIX specifier, the following extension is supported
36 for backward compatibility with older implementations using newlib:
37 <<"C-charset">>.
38 Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
39 to specify language neutral locales while using other charsets than ASCII,
40 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
41 but uses the UTF-8 charset.
42
43 The following charsets are recognized:
44 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
45 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
46 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
47 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
48 1256, 1257, 1258].
49
50 Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
51 are equivalent. Charset names with dashes can also be written without
52 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
53 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
54
55 Full support for all of the above charsets requires that newlib has been
56 build with multibyte support and support for all ISO and Windows Codepage.
57 Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
58 only newlib for Cygwin is built with full charset support by default.
59 Under Cygwin, this implementation additionally supports the charsets
60 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
61 <<"Big5">>. Cygwin does not support <<"JIS">>.
62
63 Cygwin additionally supports locales from the file
64 /usr/share/locale/locale.alias.
65
66 (<<"">> is also accepted; if given, the settings are read from the
67 corresponding LC_* environment variables and $LANG according to POSIX rules.)
68
69 This implementation also supports the modifiers <<"cjknarrow">> and
70 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
71 handle characters from the "CJK Ambiguous Width" category of characters
72 described at http://www.unicode.org/reports/tr11/#Ambiguous.
73 These characters have a width of 1 for singlebyte charsets and UTF-8,
74 and a width of 2 for multibyte charsets other than UTF-8. Specifying
75 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
76
77 This implementation also supports the modifier <<"cjksingle">>
78 to enforce single-width character properties.
79
80 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
81 pointer to the string representing the current locale. The acceptable
82 values for <[category]> are defined in `<<locale.h>>' as macros
83 beginning with <<"LC_">>.
84
85 <<localeconv>> returns a pointer to a structure (also defined in
86 `<<locale.h>>') describing the locale-specific conventions currently
87 in effect.
88
89 RETURNS
90 A successful call to <<setlocale>> returns a pointer to a string
91 associated with the specified category for the new locale. The string
92 returned by <<setlocale>> is such that a subsequent call using that
93 string will restore that category (or all categories in case of LC_ALL),
94 to that state. The application shall not modify the string returned
95 which may be overwritten by a subsequent call to <<setlocale>>.
96 On error, <<setlocale>> returns <<NULL>>.
97
98 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
99 which describes the formatting and collating conventions in effect (in
100 this implementation, always those of the C locale).
101
102 PORTABILITY
103 ANSI C requires <<setlocale>>, but the only locale required across all
104 implementations is the C locale.
105
106 NOTES
107 There is no ISO-8859-12 codepage. It's also refused by this implementation.
108
109 No supporting OS subroutines are required.
110 */
111
112 /* Parts of this code are originally taken from FreeBSD. */
113 /*
114 * Copyright (c) 1996 - 2002 FreeBSD Project
115 * Copyright (c) 1991, 1993
116 * The Regents of the University of California. All rights reserved.
117 *
118 * This code is derived from software contributed to Berkeley by
119 * Paul Borman at Krystal Technologies.
120 *
121 * Redistribution and use in source and binary forms, with or without
122 * modification, are permitted provided that the following conditions
123 * are met:
124 * 1. Redistributions of source code must retain the above copyright
125 * notice, this list of conditions and the following disclaimer.
126 * 2. Redistributions in binary form must reproduce the above copyright
127 * notice, this list of conditions and the following disclaimer in the
128 * documentation and/or other materials provided with the distribution.
129 * 4. Neither the name of the University nor the names of its contributors
130 * may be used to endorse or promote products derived from this software
131 * without specific prior written permission.
132 *
133 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
134 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
135 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
136 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
137 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
138 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
139 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
140 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
141 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
142 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
143 * SUCH DAMAGE.
144 */
145
146 #define _GNU_SOURCE
147 #define _PICOLIBC_CTYPE_SMALL 0
148 #include <errno.h>
149 #include <string.h>
150 #include <limits.h>
151 #include <stdlib.h>
152 #include <wchar.h>
153 #include "setlocale.h"
154 #include "../ctype/ctype_.h"
155 #include "../stdlib/local.h"
156
157 #ifdef __HAVE_LOCALE_INFO__
158 NEWLIB_THREAD_LOCAL struct __locale_t *_locale = &__global_locale;
159 #endif
160
161 char *_PathLocale = NULL;
162
163 #ifdef _MB_CAPABLE
164 /*
165 * Category names for getenv()
166 */
167 static char *categories[_LC_LAST] = {
168 "LC_ALL",
169 "LC_COLLATE",
170 "LC_CTYPE",
171 "LC_MONETARY",
172 "LC_NUMERIC",
173 "LC_TIME",
174 "LC_MESSAGES",
175 };
176 #endif /* _MB_CAPABLE */
177
178 /*
179 * Default locale per POSIX. Can be overridden on a per-target base.
180 */
181 #ifndef DEFAULT_LOCALE
182 #define DEFAULT_LOCALE "C"
183 #endif
184
185 #ifdef _MB_CAPABLE
186 /*
187 * This variable can be changed by any outside mechanism. This allows,
188 * for instance, to load the default locale from a file.
189 */
190 static const char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
191
192 const struct __locale_t __C_locale =
193 {
194 { "C", "C", "C", "C", "C", "C", "C", },
195 __ascii_wctomb,
196 __ascii_mbtowc,
197 0,
198 DEFAULT_CTYPE_PTR,
199 {
200 ".", "", "", "", "", "", "", "", "", "",
201 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
202 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
203 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
204 CHAR_MAX, CHAR_MAX
205 },
206 #ifndef __HAVE_LOCALE_INFO__
207 "\1",
208 "ASCII",
209 "ASCII",
210 #else /* __HAVE_LOCALE_INFO__ */
211 {
212 { NULL, NULL }, /* LC_ALL */
213 { NULL, NULL }, /* LC_COLLATE */
214 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
215 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
216 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
217 { &_C_time_locale, NULL }, /* LC_TIME */
218 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
219 },
220 #endif /* __HAVE_LOCALE_INFO__ */
221 };
222 #endif /* _MB_CAPABLE */
223
224 struct __locale_t __global_locale =
225 {
226 { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
227 __ascii_wctomb,
228 __ascii_mbtowc,
229 0,
230 DEFAULT_CTYPE_PTR,
231 {
232 ".", "", "", "", "", "", "", "", "", "",
233 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
234 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
235 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
236 CHAR_MAX, CHAR_MAX
237 },
238 #ifndef __HAVE_LOCALE_INFO__
239 "\1",
240 "ASCII",
241 "ASCII",
242 #else /* __HAVE_LOCALE_INFO__ */
243 {
244 { NULL, NULL }, /* LC_ALL */
245 { NULL, NULL }, /* LC_COLLATE */
246 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
247 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
248 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
249 { &_C_time_locale, NULL }, /* LC_TIME */
250 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
251 },
252 #endif /* __HAVE_LOCALE_INFO__ */
253 };
254
255 #ifdef _MB_CAPABLE
256 /* Renamed from current_locale_string to make clear this is only the
257 *global* string for setlocale (LC_ALL, NULL). There's no equivalent
258 functionality for uselocale. */
259 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
260 = "C";
261 static char *currentlocale (void);
262
263 #endif /* _MB_CAPABLE */
264
265 char *
setlocale(int category,const char * locale)266 setlocale (
267 int category,
268 const char *locale)
269 {
270 (void) category;
271 #ifndef _MB_CAPABLE
272 if (locale)
273 {
274 if (strcmp (locale, "POSIX") && strcmp (locale, "C")
275 && strcmp (locale, ""))
276 return NULL;
277 }
278 return "C";
279 #else /* _MB_CAPABLE */
280 static char new_categories[_LC_LAST][ENCODING_LEN + 1];
281 static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
282 int i, j, len, saverr;
283 const char *env, *r;
284 char *ret;
285
286 if (category < LC_ALL || category >= _LC_LAST)
287 {
288 _REENT_ERRNO(p) = EINVAL;
289 return NULL;
290 }
291
292 if (locale == NULL)
293 return category != LC_ALL ? __get_global_locale ()->categories[category]
294 : global_locale_string;
295
296 /*
297 * Default to the current locale for everything.
298 */
299 for (i = 1; i < _LC_LAST; ++i)
300 strcpy (new_categories[i], __get_global_locale ()->categories[i]);
301
302 /*
303 * Now go fill up new_categories from the locale argument
304 */
305 if (!*locale)
306 {
307 if (category == LC_ALL)
308 {
309 for (i = 1; i < _LC_LAST; ++i)
310 {
311 env = __get_locale_env (i);
312 if (strlen (env) > ENCODING_LEN)
313 {
314 _REENT_ERRNO(p) = EINVAL;
315 return NULL;
316 }
317 strcpy (new_categories[i], env);
318 }
319 }
320 else
321 {
322 env = __get_locale_env (category);
323 if (strlen (env) > ENCODING_LEN)
324 {
325 _REENT_ERRNO(p) = EINVAL;
326 return NULL;
327 }
328 strcpy (new_categories[category], env);
329 }
330 }
331 else if (category != LC_ALL)
332 {
333 if (strlen (locale) > ENCODING_LEN)
334 {
335 _REENT_ERRNO(p) = EINVAL;
336 return NULL;
337 }
338 strcpy (new_categories[category], locale);
339 }
340 else
341 {
342 if ((r = strchr (locale, '/')) == NULL)
343 {
344 if (strlen (locale) > ENCODING_LEN)
345 {
346 _REENT_ERRNO(p) = EINVAL;
347 return NULL;
348 }
349 for (i = 1; i < _LC_LAST; ++i)
350 strcpy (new_categories[i], locale);
351 }
352 else
353 {
354 for (i = 1; r[1] == '/'; ++r)
355 ;
356 if (!r[1])
357 {
358 _REENT_ERRNO(p) = EINVAL;
359 return NULL; /* Hmm, just slashes... */
360 }
361 do
362 {
363 if (i == _LC_LAST)
364 break; /* Too many slashes... */
365 if ((len = r - locale) > ENCODING_LEN)
366 {
367 _REENT_ERRNO(p) = EINVAL;
368 return NULL;
369 }
370 strlcpy (new_categories[i], locale, len + 1);
371 i++;
372 while (*r == '/')
373 r++;
374 locale = r;
375 while (*r && *r != '/')
376 r++;
377 }
378 while (*locale);
379 while (i < _LC_LAST)
380 {
381 #ifdef __GNUC__
382 #pragma GCC diagnostic push
383 #pragma GCC diagnostic ignored "-Wpragmas"
384 #pragma GCC diagnostic ignored "-Wunknown-warning-option"
385 /*
386 * We're copying the last specified category into the rest
387 * which appears to confuse the gcc analyzer
388 */
389 #pragma GCC diagnostic ignored "-Wanalyzer-overlapping-buffers"
390 #endif
391 strcpy (new_categories[i], new_categories[i-1]);
392 #ifdef __GNUC__
393 #pragma GCC diagnostic pop
394 #endif
395 i++;
396 }
397 }
398 }
399
400 if (category != LC_ALL)
401 {
402 ret = __loadlocale (__get_global_locale (), category,
403 new_categories[category]);
404 currentlocale ();
405 return ret;
406 }
407
408 for (i = 1; i < _LC_LAST; ++i)
409 {
410 strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
411 if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
412 {
413 saverr = _REENT_ERRNO(p);
414 for (j = 1; j < i; j++)
415 {
416 strcpy (new_categories[j], saved_categories[j]);
417 if (__loadlocale (__get_global_locale (), j, new_categories[j])
418 == NULL)
419 {
420 strcpy (new_categories[j], "C");
421 __loadlocale (__get_global_locale (), j, new_categories[j]);
422 }
423 }
424 _REENT_ERRNO(p) = saverr;
425 return NULL;
426 }
427 }
428 return currentlocale ();
429 #endif /* _MB_CAPABLE */
430 }
431
432 #ifdef _MB_CAPABLE
433 static char *
currentlocale(void)434 currentlocale (void)
435 {
436 int i;
437
438 strcpy (global_locale_string, __get_global_locale ()->categories[1]);
439
440 for (i = 2; i < _LC_LAST; ++i)
441 if (strcmp (__get_global_locale ()->categories[1],
442 __get_global_locale ()->categories[i]))
443 {
444 for (i = 2; i < _LC_LAST; ++i)
445 {
446 (void)strcat(global_locale_string, "/");
447 (void)strcat(global_locale_string,
448 __get_global_locale ()->categories[i]);
449 }
450 break;
451 }
452 return global_locale_string;
453 }
454
455 char *
__loadlocale(struct __locale_t * loc,int category,char * new_locale)456 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
457 {
458 /* At this point a full-featured system would just load the locale
459 specific data from the locale files.
460 What we do here for now is to check the incoming string for correctness.
461 The string must be in one of the allowed locale strings, either
462 one in POSIX-style, or one in the old newlib style to maintain
463 backward compatibility. If the local string is correct, the charset
464 is extracted and stored in ctype_codeset or message_charset
465 dependent on the cateogry. */
466 char *locale = NULL;
467 char charset[ENCODING_LEN + 1] = {};
468 long val = 0;
469 char *end, *c = NULL;
470 int mbc_max;
471 wctomb_p l_wctomb;
472 mbtowc_p l_mbtowc;
473 int cjksingle = 0;
474 int cjknarrow = 0;
475 int cjkwide = 0;
476
477 /* Avoid doing everything twice if nothing has changed.
478
479 duplocale relies on this test to go wrong so the locale is actually
480 duplicated when required. Any change here has to be synced with a
481 matching change in duplocale. */
482 if (!strcmp (new_locale, loc->categories[category]))
483 return loc->categories[category];
484
485 locale = new_locale;
486 # define FAIL return NULL
487
488 /* "POSIX" is translated to "C", as on Linux. */
489 if (!strcmp (locale, "POSIX"))
490 strcpy (locale, "C");
491 if (!strcmp (locale, "C")) /* Default "C" locale */
492 strcpy (charset, "ASCII");
493 else if (locale[0] == 'C'
494 && (locale[1] == '-' /* Old newlib style */
495 || locale[1] == '.')) /* Extension for the C locale to allow
496 specifying different charsets while
497 sticking to the C locale in terms
498 of sort order, etc. Proposed in
499 the Debian project. */
500 {
501 char *chp;
502
503 c = locale + 2;
504 strcpy (charset, c);
505 if ((chp = strchr (charset, '@')))
506 /* Strip off modifier */
507 *chp = '\0';
508 c += strlen (charset);
509 }
510 else /* POSIX style */
511 {
512 c = locale;
513
514 /* Don't use ctype macros here, they might be localized. */
515 /* Language */
516 if (c[0] < 'a' || c[0] > 'z'
517 || c[1] < 'a' || c[1] > 'z')
518 FAIL;
519 c += 2;
520 /* Allow three character Language per ISO 639-3 */
521 if (c[0] >= 'a' && c[0] <= 'z')
522 ++c;
523 if (c[0] == '_')
524 {
525 /* Territory */
526 ++c;
527 if (c[0] < 'A' || c[0] > 'Z'
528 || c[1] < 'A' || c[1] > 'Z')
529 FAIL;
530 c += 2;
531 }
532 if (c[0] == '.')
533 {
534 /* Charset */
535 char *chp;
536
537 ++c;
538 strcpy (charset, c);
539 if ((chp = strchr (charset, '@')))
540 /* Strip off modifier */
541 *chp = '\0';
542 c += strlen (charset);
543 }
544 else if (c[0] == '\0' || c[0] == '@')
545 /* End of string or just a modifier */
546 strcpy (charset, "ISO-8859-1");
547 else
548 /* Invalid string */
549 FAIL;
550 }
551 if (c && c[0] == '@')
552 {
553 /* Modifier "cjksingle" is recognized to enforce single-width mode. */
554 /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
555 behaviour of wcwidth() and wcswidth() for East Asian languages.
556 For details see the comment at the end of this function. */
557 if (!strcmp (c + 1, "cjksingle"))
558 cjksingle = 1;
559 else if (!strcmp (c + 1, "cjknarrow"))
560 cjknarrow = 1;
561 else if (!strcmp (c + 1, "cjkwide"))
562 cjkwide = 1;
563 }
564 /* We only support this subset of charsets. */
565 switch (charset[0])
566 {
567 case 'U':
568 case 'u':
569 if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
570 FAIL;
571 strcpy (charset, "UTF-8");
572 mbc_max = 6;
573 l_wctomb = __utf8_wctomb;
574 l_mbtowc = __utf8_mbtowc;
575 break;
576 /* Cygwin does not support JIS at all. */
577 case 'J':
578 case 'j':
579 if (strcasecmp (charset, "JIS"))
580 FAIL;
581 strcpy (charset, "JIS");
582 mbc_max = 8;
583 l_wctomb = __jis_wctomb;
584 l_mbtowc = __jis_mbtowc;
585 break;
586 case 'E':
587 case 'e':
588 if (strncasecmp (charset, "EUC", 3))
589 FAIL;
590 c = charset + 3;
591 if (*c == '-')
592 ++c;
593 if (!strcasecmp (c, "JP"))
594 {
595 strcpy (charset, "EUCJP");
596 mbc_max = 3;
597 l_wctomb = __eucjp_wctomb;
598 l_mbtowc = __eucjp_mbtowc;
599 }
600 else
601 FAIL;
602 break;
603 case 'S':
604 case 's':
605 if (strcasecmp (charset, "SJIS"))
606 FAIL;
607 strcpy (charset, "SJIS");
608 mbc_max = 2;
609 l_wctomb = __sjis_wctomb;
610 l_mbtowc = __sjis_mbtowc;
611 break;
612 case 'I':
613 case 'i':
614 /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
615 ISO-8859-12. This code also recognizes the aliases without dashes. */
616 if (strncasecmp (charset, "ISO", 3))
617 FAIL;
618 c = charset + 3;
619 if (*c == '-')
620 ++c;
621 if (strncasecmp (c, "8859", 4))
622 FAIL;
623 c += 4;
624 if (*c == '-')
625 ++c;
626 val = strtol (c, &end, 10);
627 if (val < 1 || val > 16 || val == 12 || *end)
628 FAIL;
629 strcpy (charset, "ISO-8859-");
630 c = charset + 9;
631 if (val > 10)
632 *c++ = '1';
633 *c++ = val % 10 + '0';
634 *c = '\0';
635 mbc_max = 1;
636 #ifdef _MB_EXTENDED_CHARSETS_ISO
637 l_wctomb = __iso_wctomb (val);
638 l_mbtowc = __iso_mbtowc (val);
639 #else /* !_MB_EXTENDED_CHARSETS_ISO */
640 l_wctomb = __ascii_wctomb;
641 l_mbtowc = __ascii_mbtowc;
642 #endif /* _MB_EXTENDED_CHARSETS_ISO */
643 break;
644 case 'C':
645 case 'c':
646 if (charset[1] != 'P' && charset[1] != 'p')
647 FAIL;
648 memcpy (charset, "CP", 2);
649 val = strtol (charset + 2, &end, 10);
650 if (*end)
651 FAIL;
652 switch (val)
653 {
654 case 437:
655 case 720:
656 case 737:
657 case 775:
658 case 850:
659 case 852:
660 case 855:
661 case 857:
662 case 858:
663 case 862:
664 case 866:
665 case 874:
666 case 1125:
667 case 1250:
668 case 1251:
669 case 1252:
670 case 1253:
671 case 1254:
672 case 1255:
673 case 1256:
674 case 1257:
675 case 1258:
676 mbc_max = 1;
677 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
678 l_wctomb = __cp_wctomb (val);
679 l_mbtowc = __cp_mbtowc (val);
680 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
681 l_wctomb = __ascii_wctomb;
682 l_mbtowc = __ascii_mbtowc;
683 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
684 break;
685 case 932:
686 mbc_max = 2;
687 l_wctomb = __sjis_wctomb;
688 l_mbtowc = __sjis_mbtowc;
689 break;
690 default:
691 FAIL;
692 }
693 break;
694 case 'K':
695 case 'k':
696 /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
697 if (strncasecmp (charset, "KOI8", 4))
698 FAIL;
699 c = charset + 4;
700 if (*c == '-')
701 ++c;
702 if (*c == 'R' || *c == 'r')
703 {
704 val = 20866;
705 strcpy (charset, "CP20866");
706 }
707 else if (*c == 'U' || *c == 'u')
708 {
709 val = 21866;
710 strcpy (charset, "CP21866");
711 }
712 else if (*c == 'T' || *c == 't')
713 {
714 val = 103;
715 strcpy (charset, "CP103");
716 }
717 else
718 FAIL;
719 mbc_max = 1;
720 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
721 l_wctomb = __cp_wctomb (val);
722 l_mbtowc = __cp_mbtowc (val);
723 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
724 l_wctomb = __ascii_wctomb;
725 l_mbtowc = __ascii_mbtowc;
726 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
727 break;
728 case 'A':
729 case 'a':
730 if (strcasecmp (charset, "ASCII"))
731 FAIL;
732 strcpy (charset, "ASCII");
733 mbc_max = 1;
734 l_wctomb = __ascii_wctomb;
735 l_mbtowc = __ascii_mbtowc;
736 break;
737 case 'G':
738 case 'g':
739 /* GEORGIAN-PS and the alias without dash */
740 if (!strncasecmp (charset, "GEORGIAN", 8))
741 {
742 c = charset + 8;
743 if (*c == '-')
744 ++c;
745 if (strcasecmp (c, "PS"))
746 FAIL;
747 val = 101;
748 strcpy (charset, "CP101");
749 mbc_max = 1;
750 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
751 l_wctomb = __cp_wctomb (val);
752 l_mbtowc = __cp_mbtowc (val);
753 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
754 l_wctomb = __ascii_wctomb;
755 l_mbtowc = __ascii_mbtowc;
756 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
757 }
758 else
759 FAIL;
760 break;
761 case 'P':
762 case 'p':
763 /* PT154 */
764 if (strcasecmp (charset, "PT154"))
765 FAIL;
766 val = 102;
767 strcpy (charset, "CP102");
768 mbc_max = 1;
769 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
770 l_wctomb = __cp_wctomb (val);
771 l_mbtowc = __cp_mbtowc (val);
772 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
773 l_wctomb = __ascii_wctomb;
774 l_mbtowc = __ascii_mbtowc;
775 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
776 break;
777 case 'T':
778 case 't':
779 if (strncasecmp (charset, "TIS", 3))
780 FAIL;
781 c = charset + 3;
782 if (*c == '-')
783 ++c;
784 if (strcmp (c, "620"))
785 FAIL;
786 val = 874;
787 strcpy (charset, "CP874");
788 mbc_max = 1;
789 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
790 l_wctomb = __cp_wctomb (val);
791 l_mbtowc = __cp_mbtowc (val);
792 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
793 l_wctomb = __ascii_wctomb;
794 l_mbtowc = __ascii_mbtowc;
795 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
796 break;
797 default:
798 FAIL;
799 }
800 switch (category)
801 {
802 case LC_CTYPE:
803 #ifndef __HAVE_LOCALE_INFO__
804 strcpy (loc->ctype_codeset, charset);
805 loc->mb_cur_max[0] = mbc_max;
806 #endif
807 loc->wctomb = l_wctomb;
808 loc->mbtowc = l_mbtowc;
809 __set_ctype (loc, charset);
810 /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
811 /* Determine the width for the "CJK Ambiguous Width" category of
812 characters. This is used in wcwidth(). Assume single width for
813 single-byte charsets, and double width for multi-byte charsets
814 other than UTF-8. For UTF-8, use single width.
815 Single width can also be forced with the "@cjknarrow" modifier.
816 Double width can also be forced with the "@cjkwide" modifier.
817 */
818 loc->cjk_lang = cjkwide ||
819 (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
820 if (cjksingle)
821 loc->cjk_lang = -1; /* Disable CJK dual-width */
822 break;
823 #ifdef __HAVE_LOCALE_INFO__
824 #endif /* __HAVE_LOCALE_INFO__ */
825 default:
826 break;
827 }
828 #ifdef __HAVE_LOCALE_INFO__
829 #endif /* __HAVE_LOCALE_INFO__ */
830 return strcpy(loc->categories[category], new_locale);
831 }
832
833 const char *
__get_locale_env(int category)834 __get_locale_env (int category)
835 {
836 const char *env;
837
838 /* 1. check LC_ALL. */
839 env = getenv (categories[0]);
840
841 /* 2. check LC_* */
842 if (env == NULL || !*env)
843 env = getenv (categories[category]);
844
845 /* 3. check LANG */
846 if (env == NULL || !*env)
847 env = getenv ("LANG");
848
849 /* 4. if none is set, fall to default locale */
850 if (env == NULL || !*env)
851 env = __default_locale;
852
853 return env;
854 }
855 #endif /* _MB_CAPABLE */
856
857 size_t
__locale_mb_cur_max(void)858 __locale_mb_cur_max (void)
859 {
860 #ifdef __HAVE_LOCALE_INFO__
861 return __get_current_ctype_locale ()->mb_cur_max[0];
862 #else
863 return __get_current_locale ()->mb_cur_max[0];
864 #endif
865 }
866
867 #ifdef __HAVE_LOCALE_INFO__
868 const char *
__locale_ctype_ptr_l(struct __locale_t * locale)869 __locale_ctype_ptr_l (struct __locale_t *locale)
870 {
871 return locale->ctype_ptr;
872 }
873
874 const char *
__locale_ctype_ptr(void)875 __locale_ctype_ptr (void)
876 {
877 return __get_current_locale ()->ctype_ptr;
878 }
879 #endif /* __HAVE_LOCALE_INFO__ */
880