1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4 
5 INDEX
6 	setlocale
7 INDEX
8 	localeconv
9 
10 SYNOPSIS
11 	#include <locale.h>
12 	char *setlocale(int <[category]>, const char *<[locale]>);
13 	lconv *localeconv(void);
14 
15 DESCRIPTION
16 <<setlocale>> is the facility defined by ANSI C to condition the
17 execution environment for international collating and formatting
18 information; <<localeconv>> reports on the settings of the current
19 locale.
20 
21 This is a minimal implementation, supporting only the required <<"POSIX">>
22 and <<"C">> values for <[locale]>; strings representing other locales are not
23 honored unless _MB_CAPABLE is defined.
24 
25 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
26 the form
27 
28   language[_TERRITORY][.charset][@@modifier]
29 
30 <<"language">> is a two character string per ISO 639, or, if not available
31 for a given language, a three character string per ISO 639-3.
32 <<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
33 <<"modifier">> see below.
34 
35 Additionally to the POSIX specifier, the following extension is supported
36 for backward compatibility with older implementations using newlib:
37 <<"C-charset">>.
38 Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
39 to specify language neutral locales while using other charsets than ASCII,
40 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
41 but uses the UTF-8 charset.
42 
43 The following charsets are recognized:
44 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
45 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
46 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
47 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
48 1256, 1257, 1258].
49 
50 Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
51 are equivalent.  Charset names with dashes can also be written without
52 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
53 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
54 
55 Full support for all of the above charsets requires that newlib has been
56 build with multibyte support and support for all ISO and Windows Codepage.
57 Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
58 only newlib for Cygwin is built with full charset support by default.
59 Under Cygwin, this implementation additionally supports the charsets
60 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
61 <<"Big5">>.  Cygwin does not support <<"JIS">>.
62 
63 Cygwin additionally supports locales from the file
64 /usr/share/locale/locale.alias.
65 
66 (<<"">> is also accepted; if given, the settings are read from the
67 corresponding LC_* environment variables and $LANG according to POSIX rules.)
68 
69 This implementation also supports the modifiers <<"cjknarrow">> and
70 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
71 handle characters from the "CJK Ambiguous Width" category of characters
72 described at http://www.unicode.org/reports/tr11/#Ambiguous.
73 These characters have a width of 1 for singlebyte charsets and UTF-8,
74 and a width of 2 for multibyte charsets other than UTF-8. Specifying
75 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
76 
77 This implementation also supports the modifier <<"cjksingle">>
78 to enforce single-width character properties.
79 
80 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
81 pointer to the string representing the current locale.  The acceptable
82 values for <[category]> are defined in `<<locale.h>>' as macros
83 beginning with <<"LC_">>.
84 
85 <<localeconv>> returns a pointer to a structure (also defined in
86 `<<locale.h>>') describing the locale-specific conventions currently
87 in effect.
88 
89 RETURNS
90 A successful call to <<setlocale>> returns a pointer to a string
91 associated with the specified category for the new locale.  The string
92 returned by <<setlocale>> is such that a subsequent call using that
93 string will restore that category (or all categories in case of LC_ALL),
94 to that state.  The application shall not modify the string returned
95 which may be overwritten by a subsequent call to <<setlocale>>.
96 On error, <<setlocale>> returns <<NULL>>.
97 
98 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
99 which describes the formatting and collating conventions in effect (in
100 this implementation, always those of the C locale).
101 
102 PORTABILITY
103 ANSI C requires <<setlocale>>, but the only locale required across all
104 implementations is the C locale.
105 
106 NOTES
107 There is no ISO-8859-12 codepage.  It's also refused by this implementation.
108 
109 No supporting OS subroutines are required.
110 */
111 
112 /* Parts of this code are originally taken from FreeBSD. */
113 /*
114  * Copyright (c) 1996 - 2002 FreeBSD Project
115  * Copyright (c) 1991, 1993
116  *      The Regents of the University of California.  All rights reserved.
117  *
118  * This code is derived from software contributed to Berkeley by
119  * Paul Borman at Krystal Technologies.
120  *
121  * Redistribution and use in source and binary forms, with or without
122  * modification, are permitted provided that the following conditions
123  * are met:
124  * 1. Redistributions of source code must retain the above copyright
125  *    notice, this list of conditions and the following disclaimer.
126  * 2. Redistributions in binary form must reproduce the above copyright
127  *    notice, this list of conditions and the following disclaimer in the
128  *    documentation and/or other materials provided with the distribution.
129  * 4. Neither the name of the University nor the names of its contributors
130  *    may be used to endorse or promote products derived from this software
131  *    without specific prior written permission.
132  *
133  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
134  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
135  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
136  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
137  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
138  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
139  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
140  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
141  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
142  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
143  * SUCH DAMAGE.
144  */
145 
146 #define _GNU_SOURCE
147 #define _PICOLIBC_CTYPE_SMALL 0
148 #include <errno.h>
149 #include <string.h>
150 #include <limits.h>
151 #include <stdlib.h>
152 #include <wchar.h>
153 #include "setlocale.h"
154 #include "../ctype/ctype_.h"
155 #include "../stdlib/local.h"
156 
157 #ifdef __HAVE_LOCALE_INFO__
158 NEWLIB_THREAD_LOCAL struct __locale_t *_locale = &__global_locale;
159 #endif
160 
161 char *_PathLocale = NULL;
162 
163 #ifdef _MB_CAPABLE
164 /*
165  * Category names for getenv()
166  */
167 static char *categories[_LC_LAST] = {
168   "LC_ALL",
169   "LC_COLLATE",
170   "LC_CTYPE",
171   "LC_MONETARY",
172   "LC_NUMERIC",
173   "LC_TIME",
174   "LC_MESSAGES",
175 };
176 #endif /* _MB_CAPABLE */
177 
178 /*
179  * Default locale per POSIX.  Can be overridden on a per-target base.
180  */
181 #ifndef DEFAULT_LOCALE
182 #define DEFAULT_LOCALE	"C"
183 #endif
184 
185 #ifdef _MB_CAPABLE
186 /*
187  * This variable can be changed by any outside mechanism.  This allows,
188  * for instance, to load the default locale from a file.
189  */
190 static const char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
191 
192 const struct __locale_t __C_locale =
193 {
194   { "C", "C", "C", "C", "C", "C", "C", },
195   __ascii_wctomb,
196   __ascii_mbtowc,
197   0,
198   DEFAULT_CTYPE_PTR,
199   {
200     ".", "", "", "", "", "", "", "", "", "",
201     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
202     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
203     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
204     CHAR_MAX, CHAR_MAX
205   },
206 #ifndef __HAVE_LOCALE_INFO__
207   "\1",
208   "ASCII",
209   "ASCII",
210 #else /* __HAVE_LOCALE_INFO__ */
211   {
212     { NULL, NULL },			/* LC_ALL */
213     { NULL, NULL },			/* LC_COLLATE */
214     { &_C_ctype_locale, NULL },		/* LC_CTYPE */
215     { &_C_monetary_locale, NULL },	/* LC_MONETARY */
216     { &_C_numeric_locale, NULL },	/* LC_NUMERIC */
217     { &_C_time_locale, NULL },		/* LC_TIME */
218     { &_C_messages_locale, NULL },	/* LC_MESSAGES */
219   },
220 #endif /* __HAVE_LOCALE_INFO__ */
221 };
222 #endif /* _MB_CAPABLE */
223 
224 struct __locale_t __global_locale =
225 {
226   { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
227   __ascii_wctomb,
228   __ascii_mbtowc,
229   0,
230   DEFAULT_CTYPE_PTR,
231   {
232     ".", "", "", "", "", "", "", "", "", "",
233     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
234     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
235     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
236     CHAR_MAX, CHAR_MAX
237   },
238 #ifndef __HAVE_LOCALE_INFO__
239   "\1",
240   "ASCII",
241   "ASCII",
242 #else /* __HAVE_LOCALE_INFO__ */
243   {
244     { NULL, NULL },			/* LC_ALL */
245     { NULL, NULL },			/* LC_COLLATE */
246     { &_C_ctype_locale, NULL },		/* LC_CTYPE */
247     { &_C_monetary_locale, NULL },	/* LC_MONETARY */
248     { &_C_numeric_locale, NULL },	/* LC_NUMERIC */
249     { &_C_time_locale, NULL },		/* LC_TIME */
250     { &_C_messages_locale, NULL },	/* LC_MESSAGES */
251   },
252 #endif /* __HAVE_LOCALE_INFO__ */
253 };
254 
255 #ifdef _MB_CAPABLE
256 /* Renamed from current_locale_string to make clear this is only the
257    *global* string for setlocale (LC_ALL, NULL).  There's no equivalent
258    functionality for uselocale. */
259 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
260 	    = "C";
261 static char *currentlocale (void);
262 
263 #endif /* _MB_CAPABLE */
264 
265 char *
setlocale(int category,const char * locale)266 setlocale (
267        int category,
268        const char *locale)
269 {
270   (void) category;
271 #ifndef _MB_CAPABLE
272   if (locale)
273     {
274       if (strcmp (locale, "POSIX") && strcmp (locale, "C")
275 	  && strcmp (locale, ""))
276         return NULL;
277     }
278   return "C";
279 #else /* _MB_CAPABLE */
280   static char new_categories[_LC_LAST][ENCODING_LEN + 1];
281   static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
282   int i, j, len, saverr;
283   const char *env, *r;
284   char *ret;
285 
286   if (category < LC_ALL || category >= _LC_LAST)
287     {
288       _REENT_ERRNO(p) = EINVAL;
289       return NULL;
290     }
291 
292   if (locale == NULL)
293     return category != LC_ALL ? __get_global_locale ()->categories[category]
294 			      : global_locale_string;
295 
296   /*
297    * Default to the current locale for everything.
298    */
299   for (i = 1; i < _LC_LAST; ++i)
300     strcpy (new_categories[i], __get_global_locale ()->categories[i]);
301 
302   /*
303    * Now go fill up new_categories from the locale argument
304    */
305   if (!*locale)
306     {
307       if (category == LC_ALL)
308 	{
309 	  for (i = 1; i < _LC_LAST; ++i)
310 	    {
311 	      env = __get_locale_env (i);
312 	      if (strlen (env) > ENCODING_LEN)
313 		{
314 		  _REENT_ERRNO(p) = EINVAL;
315 		  return NULL;
316 		}
317 	      strcpy (new_categories[i], env);
318 	    }
319 	}
320       else
321 	{
322 	  env = __get_locale_env (category);
323 	  if (strlen (env) > ENCODING_LEN)
324 	    {
325 	      _REENT_ERRNO(p) = EINVAL;
326 	      return NULL;
327 	    }
328 	  strcpy (new_categories[category], env);
329 	}
330     }
331   else if (category != LC_ALL)
332     {
333       if (strlen (locale) > ENCODING_LEN)
334 	{
335 	  _REENT_ERRNO(p) = EINVAL;
336 	  return NULL;
337 	}
338       strcpy (new_categories[category], locale);
339     }
340   else
341     {
342       if ((r = strchr (locale, '/')) == NULL)
343 	{
344 	  if (strlen (locale) > ENCODING_LEN)
345 	    {
346 	      _REENT_ERRNO(p) = EINVAL;
347 	      return NULL;
348 	    }
349 	  for (i = 1; i < _LC_LAST; ++i)
350 	    strcpy (new_categories[i], locale);
351 	}
352       else
353 	{
354 	  for (i = 1; r[1] == '/'; ++r)
355 	    ;
356 	  if (!r[1])
357 	    {
358 	      _REENT_ERRNO(p) = EINVAL;
359 	      return NULL;  /* Hmm, just slashes... */
360 	    }
361 	  do
362 	    {
363 	      if (i == _LC_LAST)
364 		break;  /* Too many slashes... */
365 	      if ((len = r - locale) > ENCODING_LEN)
366 		{
367 		  _REENT_ERRNO(p) = EINVAL;
368 		  return NULL;
369 		}
370 	      strlcpy (new_categories[i], locale, len + 1);
371 	      i++;
372 	      while (*r == '/')
373 		r++;
374 	      locale = r;
375 	      while (*r && *r != '/')
376 		r++;
377 	    }
378 	  while (*locale);
379 	  while (i < _LC_LAST)
380 	    {
381 #ifdef __GNUC__
382 #pragma GCC diagnostic push
383 #pragma GCC diagnostic ignored "-Wpragmas"
384 #pragma GCC diagnostic ignored "-Wunknown-warning-option"
385 /*
386  * We're copying the last specified category into the rest
387  * which appears to confuse the gcc analyzer
388  */
389 #pragma GCC diagnostic ignored "-Wanalyzer-overlapping-buffers"
390 #endif
391 	      strcpy (new_categories[i], new_categories[i-1]);
392 #ifdef __GNUC__
393 #pragma GCC diagnostic pop
394 #endif
395 	      i++;
396 	    }
397 	}
398     }
399 
400   if (category != LC_ALL)
401     {
402       ret = __loadlocale (__get_global_locale (), category,
403 			  new_categories[category]);
404       currentlocale ();
405       return ret;
406     }
407 
408   for (i = 1; i < _LC_LAST; ++i)
409     {
410       strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
411       if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
412 	{
413 	  saverr = _REENT_ERRNO(p);
414 	  for (j = 1; j < i; j++)
415 	    {
416 	      strcpy (new_categories[j], saved_categories[j]);
417 	      if (__loadlocale (__get_global_locale (), j, new_categories[j])
418 		  == NULL)
419 		{
420 		  strcpy (new_categories[j], "C");
421 		  __loadlocale (__get_global_locale (), j, new_categories[j]);
422 		}
423 	    }
424 	  _REENT_ERRNO(p) = saverr;
425 	  return NULL;
426 	}
427     }
428   return currentlocale ();
429 #endif /* _MB_CAPABLE */
430 }
431 
432 #ifdef _MB_CAPABLE
433 static char *
currentlocale(void)434 currentlocale (void)
435 {
436   int i;
437 
438   strcpy (global_locale_string, __get_global_locale ()->categories[1]);
439 
440   for (i = 2; i < _LC_LAST; ++i)
441     if (strcmp (__get_global_locale ()->categories[1],
442 		__get_global_locale ()->categories[i]))
443       {
444 	for (i = 2; i < _LC_LAST; ++i)
445 	  {
446 	    (void)strcat(global_locale_string, "/");
447 	    (void)strcat(global_locale_string,
448 			 __get_global_locale ()->categories[i]);
449 	  }
450 	break;
451       }
452   return global_locale_string;
453 }
454 
455 char *
__loadlocale(struct __locale_t * loc,int category,char * new_locale)456 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
457 {
458   /* At this point a full-featured system would just load the locale
459      specific data from the locale files.
460      What we do here for now is to check the incoming string for correctness.
461      The string must be in one of the allowed locale strings, either
462      one in POSIX-style, or one in the old newlib style to maintain
463      backward compatibility.  If the local string is correct, the charset
464      is extracted and stored in ctype_codeset or message_charset
465      dependent on the cateogry. */
466   char *locale = NULL;
467   char charset[ENCODING_LEN + 1] = {};
468   long val = 0;
469   char *end, *c = NULL;
470   int mbc_max;
471   wctomb_p l_wctomb;
472   mbtowc_p l_mbtowc;
473   int cjksingle = 0;
474   int cjknarrow = 0;
475   int cjkwide = 0;
476 
477   /* Avoid doing everything twice if nothing has changed.
478 
479      duplocale relies on this test to go wrong so the locale is actually
480      duplicated when required.  Any change here has to be synced with a
481      matching change in duplocale. */
482   if (!strcmp (new_locale, loc->categories[category]))
483     return loc->categories[category];
484 
485   locale = new_locale;
486 # define FAIL	return NULL
487 
488   /* "POSIX" is translated to "C", as on Linux. */
489   if (!strcmp (locale, "POSIX"))
490     strcpy (locale, "C");
491   if (!strcmp (locale, "C"))				/* Default "C" locale */
492     strcpy (charset, "ASCII");
493   else if (locale[0] == 'C'
494 	   && (locale[1] == '-'		/* Old newlib style */
495 	       || locale[1] == '.'))	/* Extension for the C locale to allow
496 					   specifying different charsets while
497 					   sticking to the C locale in terms
498 					   of sort order, etc.  Proposed in
499 					   the Debian project. */
500     {
501       char *chp;
502 
503       c = locale + 2;
504       strcpy (charset, c);
505       if ((chp = strchr (charset, '@')))
506         /* Strip off modifier */
507         *chp = '\0';
508       c += strlen (charset);
509     }
510   else							/* POSIX style */
511     {
512       c = locale;
513 
514       /* Don't use ctype macros here, they might be localized. */
515       /* Language */
516       if (c[0] < 'a' || c[0] > 'z'
517 	  || c[1] < 'a' || c[1] > 'z')
518 	FAIL;
519       c += 2;
520       /* Allow three character Language per ISO 639-3 */
521       if (c[0] >= 'a' && c[0] <= 'z')
522       	++c;
523       if (c[0] == '_')
524         {
525 	  /* Territory */
526 	  ++c;
527 	  if (c[0] < 'A' || c[0] > 'Z'
528 	      || c[1] < 'A' || c[1] > 'Z')
529 	    FAIL;
530 	  c += 2;
531 	}
532       if (c[0] == '.')
533 	{
534 	  /* Charset */
535 	  char *chp;
536 
537 	  ++c;
538 	  strcpy (charset, c);
539 	  if ((chp = strchr (charset, '@')))
540 	    /* Strip off modifier */
541 	    *chp = '\0';
542 	  c += strlen (charset);
543 	}
544       else if (c[0] == '\0' || c[0] == '@')
545 	/* End of string or just a modifier */
546 	strcpy (charset, "ISO-8859-1");
547       else
548 	/* Invalid string */
549       	FAIL;
550     }
551   if (c && c[0] == '@')
552     {
553       /* Modifier "cjksingle" is recognized to enforce single-width mode. */
554       /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
555          behaviour of wcwidth() and wcswidth() for East Asian languages.
556          For details see the comment at the end of this function. */
557       if (!strcmp (c + 1, "cjksingle"))
558 	cjksingle = 1;
559       else if (!strcmp (c + 1, "cjknarrow"))
560 	cjknarrow = 1;
561       else if (!strcmp (c + 1, "cjkwide"))
562 	cjkwide = 1;
563     }
564   /* We only support this subset of charsets. */
565   switch (charset[0])
566     {
567     case 'U':
568     case 'u':
569       if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
570 	FAIL;
571       strcpy (charset, "UTF-8");
572       mbc_max = 6;
573       l_wctomb = __utf8_wctomb;
574       l_mbtowc = __utf8_mbtowc;
575     break;
576     /* Cygwin does not support JIS at all. */
577     case 'J':
578     case 'j':
579       if (strcasecmp (charset, "JIS"))
580 	FAIL;
581       strcpy (charset, "JIS");
582       mbc_max = 8;
583       l_wctomb = __jis_wctomb;
584       l_mbtowc = __jis_mbtowc;
585     break;
586     case 'E':
587     case 'e':
588       if (strncasecmp (charset, "EUC", 3))
589 	FAIL;
590       c = charset + 3;
591       if (*c == '-')
592 	++c;
593       if (!strcasecmp (c, "JP"))
594 	{
595 	  strcpy (charset, "EUCJP");
596 	  mbc_max = 3;
597 	  l_wctomb = __eucjp_wctomb;
598 	  l_mbtowc = __eucjp_mbtowc;
599 	}
600       else
601 	FAIL;
602     break;
603     case 'S':
604     case 's':
605       if (strcasecmp (charset, "SJIS"))
606 	FAIL;
607       strcpy (charset, "SJIS");
608       mbc_max = 2;
609       l_wctomb = __sjis_wctomb;
610       l_mbtowc = __sjis_mbtowc;
611     break;
612     case 'I':
613     case 'i':
614       /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
615          ISO-8859-12.  This code also recognizes the aliases without dashes. */
616       if (strncasecmp (charset, "ISO", 3))
617 	FAIL;
618       c = charset + 3;
619       if (*c == '-')
620 	++c;
621       if (strncasecmp (c, "8859", 4))
622 	FAIL;
623       c += 4;
624       if (*c == '-')
625 	++c;
626       val = strtol (c, &end, 10);
627       if (val < 1 || val > 16 || val == 12 || *end)
628 	FAIL;
629       strcpy (charset, "ISO-8859-");
630       c = charset + 9;
631       if (val > 10)
632       	*c++ = '1';
633       *c++ = val % 10 + '0';
634       *c = '\0';
635       mbc_max = 1;
636 #ifdef _MB_EXTENDED_CHARSETS_ISO
637       l_wctomb = __iso_wctomb (val);
638       l_mbtowc = __iso_mbtowc (val);
639 #else /* !_MB_EXTENDED_CHARSETS_ISO */
640       l_wctomb = __ascii_wctomb;
641       l_mbtowc = __ascii_mbtowc;
642 #endif /* _MB_EXTENDED_CHARSETS_ISO */
643     break;
644     case 'C':
645     case 'c':
646       if (charset[1] != 'P' && charset[1] != 'p')
647 	FAIL;
648       memcpy (charset, "CP", 2);
649       val = strtol (charset + 2, &end, 10);
650       if (*end)
651 	FAIL;
652       switch (val)
653 	{
654 	case 437:
655 	case 720:
656 	case 737:
657 	case 775:
658 	case 850:
659 	case 852:
660 	case 855:
661 	case 857:
662 	case 858:
663 	case 862:
664 	case 866:
665 	case 874:
666 	case 1125:
667 	case 1250:
668 	case 1251:
669 	case 1252:
670 	case 1253:
671 	case 1254:
672 	case 1255:
673 	case 1256:
674 	case 1257:
675 	case 1258:
676 	  mbc_max = 1;
677 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
678 	  l_wctomb = __cp_wctomb (val);
679 	  l_mbtowc = __cp_mbtowc (val);
680 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
681 	  l_wctomb = __ascii_wctomb;
682 	  l_mbtowc = __ascii_mbtowc;
683 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
684 	  break;
685 	case 932:
686 	  mbc_max = 2;
687 	  l_wctomb = __sjis_wctomb;
688 	  l_mbtowc = __sjis_mbtowc;
689 	  break;
690 	default:
691 	  FAIL;
692 	}
693     break;
694     case 'K':
695     case 'k':
696       /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
697       if (strncasecmp (charset, "KOI8", 4))
698 	FAIL;
699       c = charset + 4;
700       if (*c == '-')
701 	++c;
702       if (*c == 'R' || *c == 'r')
703 	{
704 	  val = 20866;
705 	  strcpy (charset, "CP20866");
706 	}
707       else if (*c == 'U' || *c == 'u')
708 	{
709 	  val = 21866;
710 	  strcpy (charset, "CP21866");
711 	}
712       else if (*c == 'T' || *c == 't')
713 	{
714 	  val = 103;
715 	  strcpy (charset, "CP103");
716 	}
717       else
718 	FAIL;
719       mbc_max = 1;
720 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
721       l_wctomb = __cp_wctomb (val);
722       l_mbtowc = __cp_mbtowc (val);
723 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
724       l_wctomb = __ascii_wctomb;
725       l_mbtowc = __ascii_mbtowc;
726 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
727       break;
728     case 'A':
729     case 'a':
730       if (strcasecmp (charset, "ASCII"))
731 	FAIL;
732       strcpy (charset, "ASCII");
733       mbc_max = 1;
734       l_wctomb = __ascii_wctomb;
735       l_mbtowc = __ascii_mbtowc;
736       break;
737     case 'G':
738     case 'g':
739       /* GEORGIAN-PS and the alias without dash */
740       if (!strncasecmp (charset, "GEORGIAN", 8))
741 	{
742 	  c = charset + 8;
743 	  if (*c == '-')
744 	    ++c;
745 	  if (strcasecmp (c, "PS"))
746 	    FAIL;
747 	  val = 101;
748 	  strcpy (charset, "CP101");
749 	  mbc_max = 1;
750 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
751 	  l_wctomb = __cp_wctomb (val);
752 	  l_mbtowc = __cp_mbtowc (val);
753 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
754 	  l_wctomb = __ascii_wctomb;
755 	  l_mbtowc = __ascii_mbtowc;
756 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
757 	}
758       else
759 	FAIL;
760       break;
761     case 'P':
762     case 'p':
763       /* PT154 */
764       if (strcasecmp (charset, "PT154"))
765 	FAIL;
766       val = 102;
767       strcpy (charset, "CP102");
768       mbc_max = 1;
769 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
770       l_wctomb = __cp_wctomb (val);
771       l_mbtowc = __cp_mbtowc (val);
772 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
773       l_wctomb = __ascii_wctomb;
774       l_mbtowc = __ascii_mbtowc;
775 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
776       break;
777     case 'T':
778     case 't':
779       if (strncasecmp (charset, "TIS", 3))
780       	FAIL;
781       c = charset + 3;
782       if (*c == '-')
783 	++c;
784       if (strcmp (c, "620"))
785       	FAIL;
786       val = 874;
787       strcpy (charset, "CP874");
788       mbc_max = 1;
789 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
790       l_wctomb = __cp_wctomb (val);
791       l_mbtowc = __cp_mbtowc (val);
792 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
793       l_wctomb = __ascii_wctomb;
794       l_mbtowc = __ascii_mbtowc;
795 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
796       break;
797     default:
798       FAIL;
799     }
800   switch (category)
801     {
802     case LC_CTYPE:
803 #ifndef __HAVE_LOCALE_INFO__
804       strcpy (loc->ctype_codeset, charset);
805       loc->mb_cur_max[0] = mbc_max;
806 #endif
807       loc->wctomb = l_wctomb;
808       loc->mbtowc = l_mbtowc;
809       __set_ctype (loc, charset);
810       /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
811       /* Determine the width for the "CJK Ambiguous Width" category of
812          characters. This is used in wcwidth(). Assume single width for
813          single-byte charsets, and double width for multi-byte charsets
814          other than UTF-8. For UTF-8, use single width.
815          Single width can also be forced with the "@cjknarrow" modifier.
816          Double width can also be forced with the "@cjkwide" modifier.
817        */
818       loc->cjk_lang = cjkwide ||
819 		      (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
820       if (cjksingle)
821 	loc->cjk_lang = -1;	/* Disable CJK dual-width */
822       break;
823 #ifdef __HAVE_LOCALE_INFO__
824 #endif /* __HAVE_LOCALE_INFO__ */
825     default:
826       break;
827     }
828 #ifdef __HAVE_LOCALE_INFO__
829 #endif /* __HAVE_LOCALE_INFO__ */
830   return strcpy(loc->categories[category], new_locale);
831 }
832 
833 const char *
__get_locale_env(int category)834 __get_locale_env (int category)
835 {
836   const char *env;
837 
838   /* 1. check LC_ALL. */
839   env = getenv (categories[0]);
840 
841   /* 2. check LC_* */
842   if (env == NULL || !*env)
843     env = getenv (categories[category]);
844 
845   /* 3. check LANG */
846   if (env == NULL || !*env)
847     env = getenv ("LANG");
848 
849   /* 4. if none is set, fall to default locale */
850   if (env == NULL || !*env)
851     env = __default_locale;
852 
853   return env;
854 }
855 #endif /* _MB_CAPABLE */
856 
857 size_t
__locale_mb_cur_max(void)858 __locale_mb_cur_max (void)
859 {
860 #ifdef __HAVE_LOCALE_INFO__
861   return __get_current_ctype_locale ()->mb_cur_max[0];
862 #else
863   return __get_current_locale ()->mb_cur_max[0];
864 #endif
865 }
866 
867 #ifdef __HAVE_LOCALE_INFO__
868 const char *
__locale_ctype_ptr_l(struct __locale_t * locale)869 __locale_ctype_ptr_l (struct __locale_t *locale)
870 {
871   return locale->ctype_ptr;
872 }
873 
874 const char *
__locale_ctype_ptr(void)875 __locale_ctype_ptr (void)
876 {
877   return __get_current_locale ()->ctype_ptr;
878 }
879 #endif /* __HAVE_LOCALE_INFO__ */
880