1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4 
5 INDEX
6 	setlocale
7 INDEX
8 	localeconv
9 
10 SYNOPSIS
11 	#include <locale.h>
12 	char *setlocale(int <[category]>, const char *<[locale]>);
13 	lconv *localeconv(void);
14 
15 DESCRIPTION
16 <<setlocale>> is the facility defined by ANSI C to condition the
17 execution environment for international collating and formatting
18 information; <<localeconv>> reports on the settings of the current
19 locale.
20 
21 This is a minimal implementation, supporting only the required <<"POSIX">>
22 and <<"C">> values for <[locale]>; strings representing other locales are not
23 honored unless _MB_CAPABLE is defined.
24 
25 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
26 the form
27 
28   language[_TERRITORY][.charset][@@modifier]
29 
30 <<"language">> is a two character string per ISO 639, or, if not available
31 for a given language, a three character string per ISO 639-3.
32 <<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
33 <<"modifier">> see below.
34 
35 Additionally to the POSIX specifier, the following extension is supported
36 for backward compatibility with older implementations using newlib:
37 <<"C-charset">>.
38 Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
39 to specify language neutral locales while using other charsets than ASCII,
40 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
41 but uses the UTF-8 charset.
42 
43 The following charsets are recognized:
44 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
45 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
46 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
47 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
48 1256, 1257, 1258].
49 
50 Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
51 are equivalent.  Charset names with dashes can also be written without
52 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
53 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
54 
55 Full support for all of the above charsets requires that newlib has been
56 build with multibyte support and support for all ISO and Windows Codepage.
57 Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
58 only newlib for Cygwin is built with full charset support by default.
59 Under Cygwin, this implementation additionally supports the charsets
60 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
61 <<"Big5">>.  Cygwin does not support <<"JIS">>.
62 
63 Cygwin additionally supports locales from the file
64 /usr/share/locale/locale.alias.
65 
66 (<<"">> is also accepted; if given, the settings are read from the
67 corresponding LC_* environment variables and $LANG according to POSIX rules.)
68 
69 This implementation also supports the modifiers <<"cjknarrow">> and
70 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
71 handle characters from the "CJK Ambiguous Width" category of characters
72 described at http://www.unicode.org/reports/tr11/#Ambiguous.
73 These characters have a width of 1 for singlebyte charsets and UTF-8,
74 and a width of 2 for multibyte charsets other than UTF-8. Specifying
75 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
76 
77 This implementation also supports the modifier <<"cjksingle">>
78 to enforce single-width character properties.
79 
80 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
81 pointer to the string representing the current locale.  The acceptable
82 values for <[category]> are defined in `<<locale.h>>' as macros
83 beginning with <<"LC_">>.
84 
85 <<localeconv>> returns a pointer to a structure (also defined in
86 `<<locale.h>>') describing the locale-specific conventions currently
87 in effect.
88 
89 RETURNS
90 A successful call to <<setlocale>> returns a pointer to a string
91 associated with the specified category for the new locale.  The string
92 returned by <<setlocale>> is such that a subsequent call using that
93 string will restore that category (or all categories in case of LC_ALL),
94 to that state.  The application shall not modify the string returned
95 which may be overwritten by a subsequent call to <<setlocale>>.
96 On error, <<setlocale>> returns <<NULL>>.
97 
98 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
99 which describes the formatting and collating conventions in effect (in
100 this implementation, always those of the C locale).
101 
102 PORTABILITY
103 ANSI C requires <<setlocale>>, but the only locale required across all
104 implementations is the C locale.
105 
106 NOTES
107 There is no ISO-8859-12 codepage.  It's also refused by this implementation.
108 
109 No supporting OS subroutines are required.
110 */
111 
112 /* Parts of this code are originally taken from FreeBSD. */
113 /*
114  * Copyright (c) 1996 - 2002 FreeBSD Project
115  * Copyright (c) 1991, 1993
116  *      The Regents of the University of California.  All rights reserved.
117  *
118  * This code is derived from software contributed to Berkeley by
119  * Paul Borman at Krystal Technologies.
120  *
121  * Redistribution and use in source and binary forms, with or without
122  * modification, are permitted provided that the following conditions
123  * are met:
124  * 1. Redistributions of source code must retain the above copyright
125  *    notice, this list of conditions and the following disclaimer.
126  * 2. Redistributions in binary form must reproduce the above copyright
127  *    notice, this list of conditions and the following disclaimer in the
128  *    documentation and/or other materials provided with the distribution.
129  * 4. Neither the name of the University nor the names of its contributors
130  *    may be used to endorse or promote products derived from this software
131  *    without specific prior written permission.
132  *
133  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
134  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
135  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
136  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
137  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
138  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
139  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
140  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
141  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
142  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
143  * SUCH DAMAGE.
144  */
145 
146 #define _DEFAULT_SOURCE
147 #include <newlib.h>
148 #include <errno.h>
149 #include <string.h>
150 #include <limits.h>
151 #include <stdlib.h>
152 #include <wchar.h>
153 #include "setlocale.h"
154 #include "../ctype/ctype_.h"
155 #include "../stdlib/local.h"
156 
157 #ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
158 		     backward compatibility.  Set it in setlocale, but
159 		     otherwise ignore it.  Applications compiled after
160 		     2010 don't use it anymore. */
161 int __EXPORT __mb_cur_max = 6;
162 #endif
163 
164 #ifdef __HAVE_LOCALE_INFO__
165 NEWLIB_THREAD_LOCAL struct __locale_t *_locale = &__global_locale;
166 #endif
167 
168 char *_PathLocale = NULL;
169 
170 #ifdef _MB_CAPABLE
171 /*
172  * Category names for getenv()
173  */
174 static char *categories[_LC_LAST] = {
175   "LC_ALL",
176   "LC_COLLATE",
177   "LC_CTYPE",
178   "LC_MONETARY",
179   "LC_NUMERIC",
180   "LC_TIME",
181   "LC_MESSAGES",
182 };
183 #endif /* _MB_CAPABLE */
184 
185 /*
186  * Default locale per POSIX.  Can be overridden on a per-target base.
187  */
188 #ifndef DEFAULT_LOCALE
189 #define DEFAULT_LOCALE	"C"
190 #endif
191 
192 #ifdef _MB_CAPABLE
193 /*
194  * This variable can be changed by any outside mechanism.  This allows,
195  * for instance, to load the default locale from a file.
196  */
197 char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
198 
199 const struct __locale_t __C_locale =
200 {
201   { "C", "C", "C", "C", "C", "C", "C", },
202   __ascii_wctomb,
203   __ascii_mbtowc,
204   0,
205   DEFAULT_CTYPE_PTR,
206   {
207     ".", "", "", "", "", "", "", "", "", "",
208     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
209     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
210     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
211     CHAR_MAX, CHAR_MAX
212   },
213 #ifndef __HAVE_LOCALE_INFO__
214   "\1",
215   "ASCII",
216   "ASCII",
217 #else /* __HAVE_LOCALE_INFO__ */
218   {
219     { NULL, NULL },			/* LC_ALL */
220 #ifdef __CYGWIN__
221     { &_C_collate_locale, NULL },	/* LC_COLLATE */
222 #else
223     { NULL, NULL },			/* LC_COLLATE */
224 #endif
225     { &_C_ctype_locale, NULL },		/* LC_CTYPE */
226     { &_C_monetary_locale, NULL },	/* LC_MONETARY */
227     { &_C_numeric_locale, NULL },	/* LC_NUMERIC */
228     { &_C_time_locale, NULL },		/* LC_TIME */
229     { &_C_messages_locale, NULL },	/* LC_MESSAGES */
230   },
231 #endif /* __HAVE_LOCALE_INFO__ */
232 };
233 #endif /* _MB_CAPABLE */
234 
235 struct __locale_t __global_locale =
236 {
237   { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
238 #ifdef __CYGWIN__
239   __utf8_wctomb,
240   __utf8_mbtowc,
241 #else
242   __ascii_wctomb,
243   __ascii_mbtowc,
244 #endif
245   0,
246   DEFAULT_CTYPE_PTR,
247   {
248     ".", "", "", "", "", "", "", "", "", "",
249     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
250     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
251     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
252     CHAR_MAX, CHAR_MAX
253   },
254 #ifndef __HAVE_LOCALE_INFO__
255   "\1",
256   "ASCII",
257   "ASCII",
258 #else /* __HAVE_LOCALE_INFO__ */
259   {
260     { NULL, NULL },			/* LC_ALL */
261 #ifdef __CYGWIN__
262     { &_C_collate_locale, NULL },	/* LC_COLLATE */
263     { &_C_utf8_ctype_locale, NULL },	/* LC_CTYPE */
264 #else
265     { NULL, NULL },			/* LC_COLLATE */
266     { &_C_ctype_locale, NULL },		/* LC_CTYPE */
267 #endif
268     { &_C_monetary_locale, NULL },	/* LC_MONETARY */
269     { &_C_numeric_locale, NULL },	/* LC_NUMERIC */
270     { &_C_time_locale, NULL },		/* LC_TIME */
271     { &_C_messages_locale, NULL },	/* LC_MESSAGES */
272   },
273 #endif /* __HAVE_LOCALE_INFO__ */
274 };
275 
276 #ifdef _MB_CAPABLE
277 /* Renamed from current_locale_string to make clear this is only the
278    *global* string for setlocale (LC_ALL, NULL).  There's no equivalent
279    functionality for uselocale. */
280 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
281 	    = "C";
282 static char *currentlocale (void);
283 
284 #endif /* _MB_CAPABLE */
285 
286 char *
setlocale(int category,const char * locale)287 setlocale (
288        int category,
289        const char *locale)
290 {
291   (void) category;
292 #ifndef _MB_CAPABLE
293   if (locale)
294     {
295       if (strcmp (locale, "POSIX") && strcmp (locale, "C")
296 	  && strcmp (locale, ""))
297         return NULL;
298     }
299   return "C";
300 #else /* _MB_CAPABLE */
301   static char new_categories[_LC_LAST][ENCODING_LEN + 1];
302   static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
303   int i, j, len, saverr;
304   const char *env, *r;
305   char *ret;
306 
307   if (category < LC_ALL || category >= _LC_LAST)
308     {
309       __errno_r(p) = EINVAL;
310       return NULL;
311     }
312 
313   if (locale == NULL)
314     return category != LC_ALL ? __get_global_locale ()->categories[category]
315 			      : global_locale_string;
316 
317   /*
318    * Default to the current locale for everything.
319    */
320   for (i = 1; i < _LC_LAST; ++i)
321     strcpy (new_categories[i], __get_global_locale ()->categories[i]);
322 
323   /*
324    * Now go fill up new_categories from the locale argument
325    */
326   if (!*locale)
327     {
328       if (category == LC_ALL)
329 	{
330 	  for (i = 1; i < _LC_LAST; ++i)
331 	    {
332 	      env = __get_locale_env (i);
333 	      if (strlen (env) > ENCODING_LEN)
334 		{
335 		  __errno_r(p) = EINVAL;
336 		  return NULL;
337 		}
338 	      strcpy (new_categories[i], env);
339 	    }
340 	}
341       else
342 	{
343 	  env = __get_locale_env (category);
344 	  if (strlen (env) > ENCODING_LEN)
345 	    {
346 	      __errno_r(p) = EINVAL;
347 	      return NULL;
348 	    }
349 	  strcpy (new_categories[category], env);
350 	}
351     }
352   else if (category != LC_ALL)
353     {
354       if (strlen (locale) > ENCODING_LEN)
355 	{
356 	  __errno_r(p) = EINVAL;
357 	  return NULL;
358 	}
359       strcpy (new_categories[category], locale);
360     }
361   else
362     {
363       if ((r = strchr (locale, '/')) == NULL)
364 	{
365 	  if (strlen (locale) > ENCODING_LEN)
366 	    {
367 	      __errno_r(p) = EINVAL;
368 	      return NULL;
369 	    }
370 	  for (i = 1; i < _LC_LAST; ++i)
371 	    strcpy (new_categories[i], locale);
372 	}
373       else
374 	{
375 	  for (i = 1; r[1] == '/'; ++r)
376 	    ;
377 	  if (!r[1])
378 	    {
379 	      __errno_r(p) = EINVAL;
380 	      return NULL;  /* Hmm, just slashes... */
381 	    }
382 	  do
383 	    {
384 	      if (i == _LC_LAST)
385 		break;  /* Too many slashes... */
386 	      if ((len = r - locale) > ENCODING_LEN)
387 		{
388 		  __errno_r(p) = EINVAL;
389 		  return NULL;
390 		}
391 	      strlcpy (new_categories[i], locale, len + 1);
392 	      i++;
393 	      while (*r == '/')
394 		r++;
395 	      locale = r;
396 	      while (*r && *r != '/')
397 		r++;
398 	    }
399 	  while (*locale);
400 	  while (i < _LC_LAST)
401 	    {
402 	      strcpy (new_categories[i], new_categories[i-1]);
403 	      i++;
404 	    }
405 	}
406     }
407 
408   if (category != LC_ALL)
409     {
410       ret = __loadlocale (__get_global_locale (), category,
411 			  new_categories[category]);
412       currentlocale ();
413       return ret;
414     }
415 
416   for (i = 1; i < _LC_LAST; ++i)
417     {
418       strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
419       if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
420 	{
421 	  saverr = __errno_r(p);
422 	  for (j = 1; j < i; j++)
423 	    {
424 	      strcpy (new_categories[j], saved_categories[j]);
425 	      if (__loadlocale (__get_global_locale (), j, new_categories[j])
426 		  == NULL)
427 		{
428 		  strcpy (new_categories[j], "C");
429 		  __loadlocale (__get_global_locale (), j, new_categories[j]);
430 		}
431 	    }
432 	  __errno_r(p) = saverr;
433 	  return NULL;
434 	}
435     }
436   return currentlocale ();
437 #endif /* _MB_CAPABLE */
438 }
439 
440 #ifdef _MB_CAPABLE
441 static char *
currentlocale(void)442 currentlocale (void)
443 {
444   int i;
445 
446   strcpy (global_locale_string, __get_global_locale ()->categories[1]);
447 
448   for (i = 2; i < _LC_LAST; ++i)
449     if (strcmp (__get_global_locale ()->categories[1],
450 		__get_global_locale ()->categories[i]))
451       {
452 	for (i = 2; i < _LC_LAST; ++i)
453 	  {
454 	    (void)strcat(global_locale_string, "/");
455 	    (void)strcat(global_locale_string,
456 			 __get_global_locale ()->categories[i]);
457 	  }
458 	break;
459       }
460   return global_locale_string;
461 }
462 
463 extern void __set_ctype (struct __locale_t *, const char *charset);
464 
465 char *
__loadlocale(struct __locale_t * loc,int category,char * new_locale)466 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
467 {
468   /* At this point a full-featured system would just load the locale
469      specific data from the locale files.
470      What we do here for now is to check the incoming string for correctness.
471      The string must be in one of the allowed locale strings, either
472      one in POSIX-style, or one in the old newlib style to maintain
473      backward compatibility.  If the local string is correct, the charset
474      is extracted and stored in ctype_codeset or message_charset
475      dependent on the cateogry. */
476   char *locale = NULL;
477   char charset[ENCODING_LEN + 1];
478   long val = 0;
479   char *end, *c = NULL;
480   int mbc_max;
481   wctomb_p l_wctomb;
482   mbtowc_p l_mbtowc;
483   int cjksingle = 0;
484   int cjknarrow = 0;
485   int cjkwide = 0;
486 
487   /* Avoid doing everything twice if nothing has changed.
488 
489      duplocale relies on this test to go wrong so the locale is actually
490      duplicated when required.  Any change here has to be synced with a
491      matching change in duplocale. */
492   if (!strcmp (new_locale, loc->categories[category]))
493     return loc->categories[category];
494 
495   int ret = 0;
496 
497 #ifdef __CYGWIN__
498   /* This additional code handles the case that the incoming locale string
499      is not valid.  If so, it calls the function __set_locale_from_locale_alias,
500      which is only available on Cygwin right now.  The function reads the
501      file /usr/share/locale/locale.alias.  The file contains locale aliases
502      and their replacement locale.  For instance, the alias "french" is
503      translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
504      "th_TH.TIS-620".  If successful, the function returns with a pointer
505      to the second argument, which is a buffer in which the replacement locale
506      gets stored.  Otherwise the function returns NULL. */
507   char tmp_locale[ENCODING_LEN + 1];
508 
509 restart:
510   if (!locale)
511     locale = new_locale;
512   else if (locale != tmp_locale)
513     {
514       locale = __set_locale_from_locale_alias (locale, tmp_locale);
515       if (!locale)
516 	return NULL;
517     }
518 # define FAIL	goto restart
519 #else
520   locale = new_locale;
521 # define FAIL	return NULL
522 #endif
523 
524   /* "POSIX" is translated to "C", as on Linux. */
525   if (!strcmp (locale, "POSIX"))
526     strcpy (locale, "C");
527   if (!strcmp (locale, "C"))				/* Default "C" locale */
528     strcpy (charset, "ASCII");
529   else if (locale[0] == 'C'
530 	   && (locale[1] == '-'		/* Old newlib style */
531 	       || locale[1] == '.'))	/* Extension for the C locale to allow
532 					   specifying different charsets while
533 					   sticking to the C locale in terms
534 					   of sort order, etc.  Proposed in
535 					   the Debian project. */
536     {
537       char *chp;
538 
539       c = locale + 2;
540       strcpy (charset, c);
541       if ((chp = strchr (charset, '@')))
542         /* Strip off modifier */
543         *chp = '\0';
544       c += strlen (charset);
545     }
546   else							/* POSIX style */
547     {
548       c = locale;
549 
550       /* Don't use ctype macros here, they might be localized. */
551       /* Language */
552       if (c[0] < 'a' || c[0] > 'z'
553 	  || c[1] < 'a' || c[1] > 'z')
554 	FAIL;
555       c += 2;
556       /* Allow three character Language per ISO 639-3 */
557       if (c[0] >= 'a' && c[0] <= 'z')
558       	++c;
559       if (c[0] == '_')
560         {
561 	  /* Territory */
562 	  ++c;
563 	  if (c[0] < 'A' || c[0] > 'Z'
564 	      || c[1] < 'A' || c[1] > 'Z')
565 	    FAIL;
566 	  c += 2;
567 	}
568       if (c[0] == '.')
569 	{
570 	  /* Charset */
571 	  char *chp;
572 
573 	  ++c;
574 	  strcpy (charset, c);
575 	  if ((chp = strchr (charset, '@')))
576 	    /* Strip off modifier */
577 	    *chp = '\0';
578 	  c += strlen (charset);
579 	}
580       else if (c[0] == '\0' || c[0] == '@')
581 	/* End of string or just a modifier */
582 #ifdef __CYGWIN__
583 	/* The Cygwin-only function __set_charset_from_locale checks
584 	   for the default charset which is connected to the given locale.
585 	   The function uses Windows functions in turn so it can't be easily
586 	   adapted to other targets.  However, if any other target provides
587 	   equivalent functionality, preferrably using the same function name
588 	   it would be sufficient to change the guarding #ifdef. */
589 	__set_charset_from_locale (locale, charset);
590 #else
591 	strcpy (charset, "ISO-8859-1");
592 #endif
593       else
594 	/* Invalid string */
595       	FAIL;
596     }
597   if (c && c[0] == '@')
598     {
599       /* Modifier "cjksingle" is recognized to enforce single-width mode. */
600       /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
601          behaviour of wcwidth() and wcswidth() for East Asian languages.
602          For details see the comment at the end of this function. */
603       if (!strcmp (c + 1, "cjksingle"))
604 	cjksingle = 1;
605       else if (!strcmp (c + 1, "cjknarrow"))
606 	cjknarrow = 1;
607       else if (!strcmp (c + 1, "cjkwide"))
608 	cjkwide = 1;
609     }
610   /* We only support this subset of charsets. */
611   switch (charset[0])
612     {
613     case 'U':
614     case 'u':
615       if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
616 	FAIL;
617       strcpy (charset, "UTF-8");
618       mbc_max = 6;
619       l_wctomb = __utf8_wctomb;
620       l_mbtowc = __utf8_mbtowc;
621     break;
622 #ifndef __CYGWIN__
623     /* Cygwin does not support JIS at all. */
624     case 'J':
625     case 'j':
626       if (strcasecmp (charset, "JIS"))
627 	FAIL;
628       strcpy (charset, "JIS");
629       mbc_max = 8;
630       l_wctomb = __jis_wctomb;
631       l_mbtowc = __jis_mbtowc;
632     break;
633 #endif /* !__CYGWIN__ */
634     case 'E':
635     case 'e':
636       if (strncasecmp (charset, "EUC", 3))
637 	FAIL;
638       c = charset + 3;
639       if (*c == '-')
640 	++c;
641       if (!strcasecmp (c, "JP"))
642 	{
643 	  strcpy (charset, "EUCJP");
644 	  mbc_max = 3;
645 	  l_wctomb = __eucjp_wctomb;
646 	  l_mbtowc = __eucjp_mbtowc;
647 	}
648 #ifdef __CYGWIN__
649       /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
650 	 implementation requires Windows support. */
651       else if (!strcasecmp (c, "KR"))
652 	{
653 	  strcpy (charset, "EUCKR");
654 	  mbc_max = 2;
655 	  l_wctomb = __kr_wctomb;
656 	  l_mbtowc = __kr_mbtowc;
657 	}
658       else if (!strcasecmp (c, "CN"))
659 	{
660 	  strcpy (charset, "EUCCN");
661 	  mbc_max = 2;
662 	  l_wctomb = __gbk_wctomb;
663 	  l_mbtowc = __gbk_mbtowc;
664 	}
665 #endif /* __CYGWIN__ */
666       else
667 	FAIL;
668     break;
669     case 'S':
670     case 's':
671       if (strcasecmp (charset, "SJIS"))
672 	FAIL;
673       strcpy (charset, "SJIS");
674       mbc_max = 2;
675       l_wctomb = __sjis_wctomb;
676       l_mbtowc = __sjis_mbtowc;
677     break;
678     case 'I':
679     case 'i':
680       /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
681          ISO-8859-12.  This code also recognizes the aliases without dashes. */
682       if (strncasecmp (charset, "ISO", 3))
683 	FAIL;
684       c = charset + 3;
685       if (*c == '-')
686 	++c;
687       if (strncasecmp (c, "8859", 4))
688 	FAIL;
689       c += 4;
690       if (*c == '-')
691 	++c;
692       val = strtol (c, &end, 10);
693       if (val < 1 || val > 16 || val == 12 || *end)
694 	FAIL;
695       strcpy (charset, "ISO-8859-");
696       c = charset + 9;
697       if (val > 10)
698       	*c++ = '1';
699       *c++ = val % 10 + '0';
700       *c = '\0';
701       mbc_max = 1;
702 #ifdef _MB_EXTENDED_CHARSETS_ISO
703       l_wctomb = __iso_wctomb (val);
704       l_mbtowc = __iso_mbtowc (val);
705 #else /* !_MB_EXTENDED_CHARSETS_ISO */
706       l_wctomb = __ascii_wctomb;
707       l_mbtowc = __ascii_mbtowc;
708 #endif /* _MB_EXTENDED_CHARSETS_ISO */
709     break;
710     case 'C':
711     case 'c':
712       if (charset[1] != 'P' && charset[1] != 'p')
713 	FAIL;
714       memcpy (charset, "CP", 2);
715       val = strtol (charset + 2, &end, 10);
716       if (*end)
717 	FAIL;
718       switch (val)
719 	{
720 	case 437:
721 	case 720:
722 	case 737:
723 	case 775:
724 	case 850:
725 	case 852:
726 	case 855:
727 	case 857:
728 	case 858:
729 	case 862:
730 	case 866:
731 	case 874:
732 	case 1125:
733 	case 1250:
734 	case 1251:
735 	case 1252:
736 	case 1253:
737 	case 1254:
738 	case 1255:
739 	case 1256:
740 	case 1257:
741 	case 1258:
742 	  mbc_max = 1;
743 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
744 	  l_wctomb = __cp_wctomb (val);
745 	  l_mbtowc = __cp_mbtowc (val);
746 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
747 	  l_wctomb = __ascii_wctomb;
748 	  l_mbtowc = __ascii_mbtowc;
749 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
750 	  break;
751 	case 932:
752 	  mbc_max = 2;
753 	  l_wctomb = __sjis_wctomb;
754 	  l_mbtowc = __sjis_mbtowc;
755 	  break;
756 	default:
757 	  FAIL;
758 	}
759     break;
760     case 'K':
761     case 'k':
762       /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
763       if (strncasecmp (charset, "KOI8", 4))
764 	FAIL;
765       c = charset + 4;
766       if (*c == '-')
767 	++c;
768       if (*c == 'R' || *c == 'r')
769 	{
770 	  val = 20866;
771 	  strcpy (charset, "CP20866");
772 	}
773       else if (*c == 'U' || *c == 'u')
774 	{
775 	  val = 21866;
776 	  strcpy (charset, "CP21866");
777 	}
778       else if (*c == 'T' || *c == 't')
779 	{
780 	  val = 103;
781 	  strcpy (charset, "CP103");
782 	}
783       else
784 	FAIL;
785       mbc_max = 1;
786 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
787       l_wctomb = __cp_wctomb (val);
788       l_mbtowc = __cp_mbtowc (val);
789 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
790       l_wctomb = __ascii_wctomb;
791       l_mbtowc = __ascii_mbtowc;
792 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
793       break;
794     case 'A':
795     case 'a':
796       if (strcasecmp (charset, "ASCII"))
797 	FAIL;
798       strcpy (charset, "ASCII");
799       mbc_max = 1;
800       l_wctomb = __ascii_wctomb;
801       l_mbtowc = __ascii_mbtowc;
802       break;
803     case 'G':
804     case 'g':
805 #ifdef __CYGWIN__
806       /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
807 	 requires Windows support. */
808       if (!strcasecmp (charset, "GBK")
809 	  || !strcasecmp (charset, "GB2312"))
810 	{
811 	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
812 	  mbc_max = 2;
813 	  l_wctomb = __gbk_wctomb;
814 	  l_mbtowc = __gbk_mbtowc;
815 	}
816       else if (!strcasecmp (charset, "GB18030"))
817 	{
818 	  strcpy (charset, "GB18030");
819 	  mbc_max = 4;
820 	  l_wctomb = __gb18030_wctomb;
821 	  l_mbtowc = __gb18030_mbtowc;
822 	}
823       else
824 #endif /* __CYGWIN__ */
825       /* GEORGIAN-PS and the alias without dash */
826       if (!strncasecmp (charset, "GEORGIAN", 8))
827 	{
828 	  c = charset + 8;
829 	  if (*c == '-')
830 	    ++c;
831 	  if (strcasecmp (c, "PS"))
832 	    FAIL;
833 	  val = 101;
834 	  strcpy (charset, "CP101");
835 	  mbc_max = 1;
836 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
837 	  l_wctomb = __cp_wctomb (val);
838 	  l_mbtowc = __cp_mbtowc (val);
839 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
840 	  l_wctomb = __ascii_wctomb;
841 	  l_mbtowc = __ascii_mbtowc;
842 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
843 	}
844       else
845 	FAIL;
846       break;
847     case 'P':
848     case 'p':
849       /* PT154 */
850       if (strcasecmp (charset, "PT154"))
851 	FAIL;
852       val = 102;
853       strcpy (charset, "CP102");
854       mbc_max = 1;
855 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
856       l_wctomb = __cp_wctomb (val);
857       l_mbtowc = __cp_mbtowc (val);
858 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
859       l_wctomb = __ascii_wctomb;
860       l_mbtowc = __ascii_mbtowc;
861 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
862       break;
863     case 'T':
864     case 't':
865       if (strncasecmp (charset, "TIS", 3))
866       	FAIL;
867       c = charset + 3;
868       if (*c == '-')
869 	++c;
870       if (strcmp (c, "620"))
871       	FAIL;
872       val = 874;
873       strcpy (charset, "CP874");
874       mbc_max = 1;
875 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
876       l_wctomb = __cp_wctomb (val);
877       l_mbtowc = __cp_mbtowc (val);
878 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
879       l_wctomb = __ascii_wctomb;
880       l_mbtowc = __ascii_mbtowc;
881 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
882       break;
883 #ifdef __CYGWIN__
884     /* Newlib does not provide Big5 and Cygwin's implementation
885        requires Windows support. */
886     case 'B':
887     case 'b':
888       if (strcasecmp (charset, "BIG5"))
889       	FAIL;
890       strcpy (charset, "BIG5");
891       mbc_max = 2;
892       l_wctomb = __big5_wctomb;
893       l_mbtowc = __big5_mbtowc;
894       break;
895 #endif /* __CYGWIN__ */
896     default:
897       FAIL;
898     }
899   switch (category)
900     {
901     case LC_CTYPE:
902 #ifndef __HAVE_LOCALE_INFO__
903       strcpy (loc->ctype_codeset, charset);
904       loc->mb_cur_max[0] = mbc_max;
905 #endif
906 #ifdef __CYGWIN__
907       __mb_cur_max = mbc_max;	/* Only for backward compat */
908 #endif
909       loc->wctomb = l_wctomb;
910       loc->mbtowc = l_mbtowc;
911       __set_ctype (loc, charset);
912       /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
913       /* Determine the width for the "CJK Ambiguous Width" category of
914          characters. This is used in wcwidth(). Assume single width for
915          single-byte charsets, and double width for multi-byte charsets
916          other than UTF-8. For UTF-8, use single width.
917          Single width can also be forced with the "@cjknarrow" modifier.
918          Double width can also be forced with the "@cjkwide" modifier.
919        */
920       loc->cjk_lang = cjkwide ||
921 		      (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
922       if (cjksingle)
923 	loc->cjk_lang = -1;	/* Disable CJK dual-width */
924 #ifdef __CYGWIN__
925       ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
926 				 mbc_max);
927 #endif /* __CYGWIN__ */
928       break;
929 #ifdef __CYGWIN__
930   /* Right now only Cygwin supports a __messages_load_locale function at all. */
931     case LC_MESSAGES:
932 #ifdef __HAVE_LOCALE_INFO__
933       ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
934       if (!ret)
935 #else
936       strcpy (loc->message_codeset, charset);
937 #endif /* __HAVE_LOCALE_INFO__ */
938       break;
939 #endif
940 #ifdef __HAVE_LOCALE_INFO__
941 #ifdef __CYGWIN__
942   /* Right now only Cygwin supports a __collate_load_locale function at all. */
943     case LC_COLLATE:
944       ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
945       break;
946   /* Right now only Cygwin supports a __monetary_load_locale function at all. */
947     case LC_MONETARY:
948       ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
949       break;
950   /* Right now only Cygwin supports a __numeric_load_locale function at all. */
951     case LC_NUMERIC:
952       ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
953       break;
954   /* Right now only Cygwin supports a __time_load_locale function at all. */
955     case LC_TIME:
956       ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
957       break;
958 #endif
959 #endif /* __HAVE_LOCALE_INFO__ */
960     default:
961       break;
962     }
963 #ifdef __HAVE_LOCALE_INFO__
964   if (ret)
965     FAIL;
966 #endif /* __HAVE_LOCALE_INFO__ */
967   return strcpy(loc->categories[category], new_locale);
968 }
969 
970 const char *
__get_locale_env(int category)971 __get_locale_env (int category)
972 {
973   const char *env;
974 
975   /* 1. check LC_ALL. */
976   env = getenv (categories[0]);
977 
978   /* 2. check LC_* */
979   if (env == NULL || !*env)
980     env = getenv (categories[category]);
981 
982   /* 3. check LANG */
983   if (env == NULL || !*env)
984     env = getenv ("LANG");
985 
986   /* 4. if none is set, fall to default locale */
987   if (env == NULL || !*env)
988     env = __default_locale;
989 
990   return env;
991 }
992 #endif /* _MB_CAPABLE */
993 
994 size_t
__locale_mb_cur_max(void)995 __locale_mb_cur_max (void)
996 {
997 #ifdef __HAVE_LOCALE_INFO__
998   return __get_current_ctype_locale ()->mb_cur_max[0];
999 #else
1000   return __get_current_locale ()->mb_cur_max[0];
1001 #endif
1002 }
1003 
1004 #ifdef __HAVE_LOCALE_INFO__
1005 const char *
__locale_ctype_ptr_l(struct __locale_t * locale)1006 __locale_ctype_ptr_l (struct __locale_t *locale)
1007 {
1008   return locale->ctype_ptr;
1009 }
1010 
1011 const char *
__locale_ctype_ptr(void)1012 __locale_ctype_ptr (void)
1013 {
1014   return __get_current_locale ()->ctype_ptr;
1015 }
1016 #endif /* __HAVE_LOCALE_INFO__ */
1017