1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4 
5 INDEX
6 	setlocale
7 INDEX
8 	localeconv
9 
10 SYNOPSIS
11 	#include <locale.h>
12 	char *setlocale(int <[category]>, const char *<[locale]>);
13 	lconv *localeconv(void);
14 
15 DESCRIPTION
16 <<setlocale>> is the facility defined by ANSI C to condition the
17 execution environment for international collating and formatting
18 information; <<localeconv>> reports on the settings of the current
19 locale.
20 
21 This is a minimal implementation, supporting only the required <<"POSIX">>
22 and <<"C">> values for <[locale]>; strings representing other locales are not
23 honored unless _MB_CAPABLE is defined.
24 
25 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
26 the form
27 
28   language[_TERRITORY][.charset][@@modifier]
29 
30 <<"language">> is a two character string per ISO 639, or, if not available
31 for a given language, a three character string per ISO 639-3.
32 <<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
33 <<"modifier">> see below.
34 
35 Additionally to the POSIX specifier, the following extension is supported
36 for backward compatibility with older implementations using newlib:
37 <<"C-charset">>.
38 Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
39 to specify language neutral locales while using other charsets than ASCII,
40 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
41 but uses the UTF-8 charset.
42 
43 The following charsets are recognized:
44 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
45 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
46 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
47 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
48 1256, 1257, 1258].
49 
50 Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
51 are equivalent.  Charset names with dashes can also be written without
52 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
53 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
54 
55 Full support for all of the above charsets requires that newlib has been
56 build with multibyte support and support for all ISO and Windows Codepage.
57 Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
58 only newlib for Cygwin is built with full charset support by default.
59 Under Cygwin, this implementation additionally supports the charsets
60 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
61 <<"Big5">>.  Cygwin does not support <<"JIS">>.
62 
63 Cygwin additionally supports locales from the file
64 /usr/share/locale/locale.alias.
65 
66 (<<"">> is also accepted; if given, the settings are read from the
67 corresponding LC_* environment variables and $LANG according to POSIX rules.)
68 
69 This implementation also supports the modifiers <<"cjknarrow">> and
70 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
71 handle characters from the "CJK Ambiguous Width" category of characters
72 described at http://www.unicode.org/reports/tr11/#Ambiguous.
73 These characters have a width of 1 for singlebyte charsets and UTF-8,
74 and a width of 2 for multibyte charsets other than UTF-8. Specifying
75 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
76 
77 This implementation also supports the modifier <<"cjksingle">>
78 to enforce single-width character properties.
79 
80 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
81 pointer to the string representing the current locale.  The acceptable
82 values for <[category]> are defined in `<<locale.h>>' as macros
83 beginning with <<"LC_">>.
84 
85 <<localeconv>> returns a pointer to a structure (also defined in
86 `<<locale.h>>') describing the locale-specific conventions currently
87 in effect.
88 
89 RETURNS
90 A successful call to <<setlocale>> returns a pointer to a string
91 associated with the specified category for the new locale.  The string
92 returned by <<setlocale>> is such that a subsequent call using that
93 string will restore that category (or all categories in case of LC_ALL),
94 to that state.  The application shall not modify the string returned
95 which may be overwritten by a subsequent call to <<setlocale>>.
96 On error, <<setlocale>> returns <<NULL>>.
97 
98 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
99 which describes the formatting and collating conventions in effect (in
100 this implementation, always those of the C locale).
101 
102 PORTABILITY
103 ANSI C requires <<setlocale>>, but the only locale required across all
104 implementations is the C locale.
105 
106 NOTES
107 There is no ISO-8859-12 codepage.  It's also refused by this implementation.
108 
109 No supporting OS subroutines are required.
110 */
111 
112 /* Parts of this code are originally taken from FreeBSD. */
113 /*
114  * Copyright (c) 1996 - 2002 FreeBSD Project
115  * Copyright (c) 1991, 1993
116  *      The Regents of the University of California.  All rights reserved.
117  *
118  * This code is derived from software contributed to Berkeley by
119  * Paul Borman at Krystal Technologies.
120  *
121  * Redistribution and use in source and binary forms, with or without
122  * modification, are permitted provided that the following conditions
123  * are met:
124  * 1. Redistributions of source code must retain the above copyright
125  *    notice, this list of conditions and the following disclaimer.
126  * 2. Redistributions in binary form must reproduce the above copyright
127  *    notice, this list of conditions and the following disclaimer in the
128  *    documentation and/or other materials provided with the distribution.
129  * 4. Neither the name of the University nor the names of its contributors
130  *    may be used to endorse or promote products derived from this software
131  *    without specific prior written permission.
132  *
133  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
134  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
135  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
136  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
137  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
138  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
139  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
140  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
141  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
142  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
143  * SUCH DAMAGE.
144  */
145 
146 #define _DEFAULT_SOURCE
147 #define _PICOLIBC_CTYPE_SMALL 0
148 #include <newlib.h>
149 #include <errno.h>
150 #include <string.h>
151 #include <limits.h>
152 #include <stdlib.h>
153 #include <wchar.h>
154 #include "setlocale.h"
155 #include "../ctype/ctype_.h"
156 #include "../stdlib/local.h"
157 
158 #ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
159 		     backward compatibility.  Set it in setlocale, but
160 		     otherwise ignore it.  Applications compiled after
161 		     2010 don't use it anymore. */
162 int __EXPORT __mb_cur_max = 6;
163 #endif
164 
165 #ifdef __HAVE_LOCALE_INFO__
166 NEWLIB_THREAD_LOCAL struct __locale_t *_locale = &__global_locale;
167 #endif
168 
169 char *_PathLocale = NULL;
170 
171 #ifdef _MB_CAPABLE
172 /*
173  * Category names for getenv()
174  */
175 static char *categories[_LC_LAST] = {
176   "LC_ALL",
177   "LC_COLLATE",
178   "LC_CTYPE",
179   "LC_MONETARY",
180   "LC_NUMERIC",
181   "LC_TIME",
182   "LC_MESSAGES",
183 };
184 #endif /* _MB_CAPABLE */
185 
186 /*
187  * Default locale per POSIX.  Can be overridden on a per-target base.
188  */
189 #ifndef DEFAULT_LOCALE
190 #define DEFAULT_LOCALE	"C"
191 #endif
192 
193 #ifdef _MB_CAPABLE
194 /*
195  * This variable can be changed by any outside mechanism.  This allows,
196  * for instance, to load the default locale from a file.
197  */
198 static const char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
199 
200 const struct __locale_t __C_locale =
201 {
202   { "C", "C", "C", "C", "C", "C", "C", },
203   __ascii_wctomb,
204   __ascii_mbtowc,
205   0,
206   DEFAULT_CTYPE_PTR,
207   {
208     ".", "", "", "", "", "", "", "", "", "",
209     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
210     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
211     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
212     CHAR_MAX, CHAR_MAX
213   },
214 #ifndef __HAVE_LOCALE_INFO__
215   "\1",
216   "ASCII",
217   "ASCII",
218 #else /* __HAVE_LOCALE_INFO__ */
219   {
220     { NULL, NULL },			/* LC_ALL */
221 #ifdef __CYGWIN__
222     { &_C_collate_locale, NULL },	/* LC_COLLATE */
223 #else
224     { NULL, NULL },			/* LC_COLLATE */
225 #endif
226     { &_C_ctype_locale, NULL },		/* LC_CTYPE */
227     { &_C_monetary_locale, NULL },	/* LC_MONETARY */
228     { &_C_numeric_locale, NULL },	/* LC_NUMERIC */
229     { &_C_time_locale, NULL },		/* LC_TIME */
230     { &_C_messages_locale, NULL },	/* LC_MESSAGES */
231   },
232 #endif /* __HAVE_LOCALE_INFO__ */
233 };
234 #endif /* _MB_CAPABLE */
235 
236 struct __locale_t __global_locale =
237 {
238   { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
239 #ifdef __CYGWIN__
240   __utf8_wctomb,
241   __utf8_mbtowc,
242 #else
243   __ascii_wctomb,
244   __ascii_mbtowc,
245 #endif
246   0,
247   DEFAULT_CTYPE_PTR,
248   {
249     ".", "", "", "", "", "", "", "", "", "",
250     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
251     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
252     CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
253     CHAR_MAX, CHAR_MAX
254   },
255 #ifndef __HAVE_LOCALE_INFO__
256   "\1",
257   "ASCII",
258   "ASCII",
259 #else /* __HAVE_LOCALE_INFO__ */
260   {
261     { NULL, NULL },			/* LC_ALL */
262 #ifdef __CYGWIN__
263     { &_C_collate_locale, NULL },	/* LC_COLLATE */
264     { &_C_utf8_ctype_locale, NULL },	/* LC_CTYPE */
265 #else
266     { NULL, NULL },			/* LC_COLLATE */
267     { &_C_ctype_locale, NULL },		/* LC_CTYPE */
268 #endif
269     { &_C_monetary_locale, NULL },	/* LC_MONETARY */
270     { &_C_numeric_locale, NULL },	/* LC_NUMERIC */
271     { &_C_time_locale, NULL },		/* LC_TIME */
272     { &_C_messages_locale, NULL },	/* LC_MESSAGES */
273   },
274 #endif /* __HAVE_LOCALE_INFO__ */
275 };
276 
277 #ifdef _MB_CAPABLE
278 /* Renamed from current_locale_string to make clear this is only the
279    *global* string for setlocale (LC_ALL, NULL).  There's no equivalent
280    functionality for uselocale. */
281 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
282 	    = "C";
283 static char *currentlocale (void);
284 
285 #endif /* _MB_CAPABLE */
286 
287 char *
setlocale(int category,const char * locale)288 setlocale (
289        int category,
290        const char *locale)
291 {
292   (void) category;
293 #ifndef _MB_CAPABLE
294   if (locale)
295     {
296       if (strcmp (locale, "POSIX") && strcmp (locale, "C")
297 	  && strcmp (locale, ""))
298         return NULL;
299     }
300   return "C";
301 #else /* _MB_CAPABLE */
302   static char new_categories[_LC_LAST][ENCODING_LEN + 1];
303   static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
304   int i, j, len, saverr;
305   const char *env, *r;
306   char *ret;
307 
308   if (category < LC_ALL || category >= _LC_LAST)
309     {
310       _REENT_ERRNO(p) = EINVAL;
311       return NULL;
312     }
313 
314   if (locale == NULL)
315     return category != LC_ALL ? __get_global_locale ()->categories[category]
316 			      : global_locale_string;
317 
318   /*
319    * Default to the current locale for everything.
320    */
321   for (i = 1; i < _LC_LAST; ++i)
322     strcpy (new_categories[i], __get_global_locale ()->categories[i]);
323 
324   /*
325    * Now go fill up new_categories from the locale argument
326    */
327   if (!*locale)
328     {
329       if (category == LC_ALL)
330 	{
331 	  for (i = 1; i < _LC_LAST; ++i)
332 	    {
333 	      env = __get_locale_env (i);
334 	      if (strlen (env) > ENCODING_LEN)
335 		{
336 		  _REENT_ERRNO(p) = EINVAL;
337 		  return NULL;
338 		}
339 	      strcpy (new_categories[i], env);
340 	    }
341 	}
342       else
343 	{
344 	  env = __get_locale_env (category);
345 	  if (strlen (env) > ENCODING_LEN)
346 	    {
347 	      _REENT_ERRNO(p) = EINVAL;
348 	      return NULL;
349 	    }
350 	  strcpy (new_categories[category], env);
351 	}
352     }
353   else if (category != LC_ALL)
354     {
355       if (strlen (locale) > ENCODING_LEN)
356 	{
357 	  _REENT_ERRNO(p) = EINVAL;
358 	  return NULL;
359 	}
360       strcpy (new_categories[category], locale);
361     }
362   else
363     {
364       if ((r = strchr (locale, '/')) == NULL)
365 	{
366 	  if (strlen (locale) > ENCODING_LEN)
367 	    {
368 	      _REENT_ERRNO(p) = EINVAL;
369 	      return NULL;
370 	    }
371 	  for (i = 1; i < _LC_LAST; ++i)
372 	    strcpy (new_categories[i], locale);
373 	}
374       else
375 	{
376 	  for (i = 1; r[1] == '/'; ++r)
377 	    ;
378 	  if (!r[1])
379 	    {
380 	      _REENT_ERRNO(p) = EINVAL;
381 	      return NULL;  /* Hmm, just slashes... */
382 	    }
383 	  do
384 	    {
385 	      if (i == _LC_LAST)
386 		break;  /* Too many slashes... */
387 	      if ((len = r - locale) > ENCODING_LEN)
388 		{
389 		  _REENT_ERRNO(p) = EINVAL;
390 		  return NULL;
391 		}
392 	      strlcpy (new_categories[i], locale, len + 1);
393 	      i++;
394 	      while (*r == '/')
395 		r++;
396 	      locale = r;
397 	      while (*r && *r != '/')
398 		r++;
399 	    }
400 	  while (*locale);
401 	  while (i < _LC_LAST)
402 	    {
403 	      strcpy (new_categories[i], new_categories[i-1]);
404 	      i++;
405 	    }
406 	}
407     }
408 
409   if (category != LC_ALL)
410     {
411       ret = __loadlocale (__get_global_locale (), category,
412 			  new_categories[category]);
413       currentlocale ();
414       return ret;
415     }
416 
417   for (i = 1; i < _LC_LAST; ++i)
418     {
419       strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
420       if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
421 	{
422 	  saverr = _REENT_ERRNO(p);
423 	  for (j = 1; j < i; j++)
424 	    {
425 	      strcpy (new_categories[j], saved_categories[j]);
426 	      if (__loadlocale (__get_global_locale (), j, new_categories[j])
427 		  == NULL)
428 		{
429 		  strcpy (new_categories[j], "C");
430 		  __loadlocale (__get_global_locale (), j, new_categories[j]);
431 		}
432 	    }
433 	  _REENT_ERRNO(p) = saverr;
434 	  return NULL;
435 	}
436     }
437   return currentlocale ();
438 #endif /* _MB_CAPABLE */
439 }
440 
441 #ifdef _MB_CAPABLE
442 static char *
currentlocale(void)443 currentlocale (void)
444 {
445   int i;
446 
447   strcpy (global_locale_string, __get_global_locale ()->categories[1]);
448 
449   for (i = 2; i < _LC_LAST; ++i)
450     if (strcmp (__get_global_locale ()->categories[1],
451 		__get_global_locale ()->categories[i]))
452       {
453 	for (i = 2; i < _LC_LAST; ++i)
454 	  {
455 	    (void)strcat(global_locale_string, "/");
456 	    (void)strcat(global_locale_string,
457 			 __get_global_locale ()->categories[i]);
458 	  }
459 	break;
460       }
461   return global_locale_string;
462 }
463 
464 extern void __set_ctype (struct __locale_t *, const char *charset);
465 
466 char *
__loadlocale(struct __locale_t * loc,int category,char * new_locale)467 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
468 {
469   /* At this point a full-featured system would just load the locale
470      specific data from the locale files.
471      What we do here for now is to check the incoming string for correctness.
472      The string must be in one of the allowed locale strings, either
473      one in POSIX-style, or one in the old newlib style to maintain
474      backward compatibility.  If the local string is correct, the charset
475      is extracted and stored in ctype_codeset or message_charset
476      dependent on the cateogry. */
477   char *locale = NULL;
478   char charset[ENCODING_LEN + 1] = {};
479   long val = 0;
480   char *end, *c = NULL;
481   int mbc_max;
482   wctomb_p l_wctomb;
483   mbtowc_p l_mbtowc;
484   int cjksingle = 0;
485   int cjknarrow = 0;
486   int cjkwide = 0;
487 
488   /* Avoid doing everything twice if nothing has changed.
489 
490      duplocale relies on this test to go wrong so the locale is actually
491      duplicated when required.  Any change here has to be synced with a
492      matching change in duplocale. */
493   if (!strcmp (new_locale, loc->categories[category]))
494     return loc->categories[category];
495 
496   int ret = 0;
497 
498 #ifdef __CYGWIN__
499   /* This additional code handles the case that the incoming locale string
500      is not valid.  If so, it calls the function __set_locale_from_locale_alias,
501      which is only available on Cygwin right now.  The function reads the
502      file /usr/share/locale/locale.alias.  The file contains locale aliases
503      and their replacement locale.  For instance, the alias "french" is
504      translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
505      "th_TH.TIS-620".  If successful, the function returns with a pointer
506      to the second argument, which is a buffer in which the replacement locale
507      gets stored.  Otherwise the function returns NULL. */
508   char tmp_locale[ENCODING_LEN + 1];
509 
510 restart:
511   if (!locale)
512     locale = new_locale;
513   else if (locale != tmp_locale)
514     {
515       locale = __set_locale_from_locale_alias (locale, tmp_locale);
516       if (!locale)
517 	return NULL;
518     }
519 # define FAIL	goto restart
520 #else
521   locale = new_locale;
522 # define FAIL	return NULL
523 #endif
524 
525   /* "POSIX" is translated to "C", as on Linux. */
526   if (!strcmp (locale, "POSIX"))
527     strcpy (locale, "C");
528   if (!strcmp (locale, "C"))				/* Default "C" locale */
529     strcpy (charset, "ASCII");
530   else if (locale[0] == 'C'
531 	   && (locale[1] == '-'		/* Old newlib style */
532 	       || locale[1] == '.'))	/* Extension for the C locale to allow
533 					   specifying different charsets while
534 					   sticking to the C locale in terms
535 					   of sort order, etc.  Proposed in
536 					   the Debian project. */
537     {
538       char *chp;
539 
540       c = locale + 2;
541       strcpy (charset, c);
542       if ((chp = strchr (charset, '@')))
543         /* Strip off modifier */
544         *chp = '\0';
545       c += strlen (charset);
546     }
547   else							/* POSIX style */
548     {
549       c = locale;
550 
551       /* Don't use ctype macros here, they might be localized. */
552       /* Language */
553       if (c[0] < 'a' || c[0] > 'z'
554 	  || c[1] < 'a' || c[1] > 'z')
555 	FAIL;
556       c += 2;
557       /* Allow three character Language per ISO 639-3 */
558       if (c[0] >= 'a' && c[0] <= 'z')
559       	++c;
560       if (c[0] == '_')
561         {
562 	  /* Territory */
563 	  ++c;
564 	  if (c[0] < 'A' || c[0] > 'Z'
565 	      || c[1] < 'A' || c[1] > 'Z')
566 	    FAIL;
567 	  c += 2;
568 	}
569       if (c[0] == '.')
570 	{
571 	  /* Charset */
572 	  char *chp;
573 
574 	  ++c;
575 	  strcpy (charset, c);
576 	  if ((chp = strchr (charset, '@')))
577 	    /* Strip off modifier */
578 	    *chp = '\0';
579 	  c += strlen (charset);
580 	}
581       else if (c[0] == '\0' || c[0] == '@')
582 	/* End of string or just a modifier */
583 #ifdef __CYGWIN__
584 	/* The Cygwin-only function __set_charset_from_locale checks
585 	   for the default charset which is connected to the given locale.
586 	   The function uses Windows functions in turn so it can't be easily
587 	   adapted to other targets.  However, if any other target provides
588 	   equivalent functionality, preferrably using the same function name
589 	   it would be sufficient to change the guarding #ifdef. */
590 	__set_charset_from_locale (locale, charset);
591 #else
592 	strcpy (charset, "ISO-8859-1");
593 #endif
594       else
595 	/* Invalid string */
596       	FAIL;
597     }
598   if (c && c[0] == '@')
599     {
600       /* Modifier "cjksingle" is recognized to enforce single-width mode. */
601       /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
602          behaviour of wcwidth() and wcswidth() for East Asian languages.
603          For details see the comment at the end of this function. */
604       if (!strcmp (c + 1, "cjksingle"))
605 	cjksingle = 1;
606       else if (!strcmp (c + 1, "cjknarrow"))
607 	cjknarrow = 1;
608       else if (!strcmp (c + 1, "cjkwide"))
609 	cjkwide = 1;
610     }
611   /* We only support this subset of charsets. */
612   switch (charset[0])
613     {
614     case 'U':
615     case 'u':
616       if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
617 	FAIL;
618       strcpy (charset, "UTF-8");
619       mbc_max = 6;
620       l_wctomb = __utf8_wctomb;
621       l_mbtowc = __utf8_mbtowc;
622     break;
623 #ifndef __CYGWIN__
624     /* Cygwin does not support JIS at all. */
625     case 'J':
626     case 'j':
627       if (strcasecmp (charset, "JIS"))
628 	FAIL;
629       strcpy (charset, "JIS");
630       mbc_max = 8;
631       l_wctomb = __jis_wctomb;
632       l_mbtowc = __jis_mbtowc;
633     break;
634 #endif /* !__CYGWIN__ */
635     case 'E':
636     case 'e':
637       if (strncasecmp (charset, "EUC", 3))
638 	FAIL;
639       c = charset + 3;
640       if (*c == '-')
641 	++c;
642       if (!strcasecmp (c, "JP"))
643 	{
644 	  strcpy (charset, "EUCJP");
645 	  mbc_max = 3;
646 	  l_wctomb = __eucjp_wctomb;
647 	  l_mbtowc = __eucjp_mbtowc;
648 	}
649 #ifdef __CYGWIN__
650       /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
651 	 implementation requires Windows support. */
652       else if (!strcasecmp (c, "KR"))
653 	{
654 	  strcpy (charset, "EUCKR");
655 	  mbc_max = 2;
656 	  l_wctomb = __kr_wctomb;
657 	  l_mbtowc = __kr_mbtowc;
658 	}
659       else if (!strcasecmp (c, "CN"))
660 	{
661 	  strcpy (charset, "EUCCN");
662 	  mbc_max = 2;
663 	  l_wctomb = __gbk_wctomb;
664 	  l_mbtowc = __gbk_mbtowc;
665 	}
666 #endif /* __CYGWIN__ */
667       else
668 	FAIL;
669     break;
670     case 'S':
671     case 's':
672       if (strcasecmp (charset, "SJIS"))
673 	FAIL;
674       strcpy (charset, "SJIS");
675       mbc_max = 2;
676       l_wctomb = __sjis_wctomb;
677       l_mbtowc = __sjis_mbtowc;
678     break;
679     case 'I':
680     case 'i':
681       /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
682          ISO-8859-12.  This code also recognizes the aliases without dashes. */
683       if (strncasecmp (charset, "ISO", 3))
684 	FAIL;
685       c = charset + 3;
686       if (*c == '-')
687 	++c;
688       if (strncasecmp (c, "8859", 4))
689 	FAIL;
690       c += 4;
691       if (*c == '-')
692 	++c;
693       val = strtol (c, &end, 10);
694       if (val < 1 || val > 16 || val == 12 || *end)
695 	FAIL;
696       strcpy (charset, "ISO-8859-");
697       c = charset + 9;
698       if (val > 10)
699       	*c++ = '1';
700       *c++ = val % 10 + '0';
701       *c = '\0';
702       mbc_max = 1;
703 #ifdef _MB_EXTENDED_CHARSETS_ISO
704       l_wctomb = __iso_wctomb (val);
705       l_mbtowc = __iso_mbtowc (val);
706 #else /* !_MB_EXTENDED_CHARSETS_ISO */
707       l_wctomb = __ascii_wctomb;
708       l_mbtowc = __ascii_mbtowc;
709 #endif /* _MB_EXTENDED_CHARSETS_ISO */
710     break;
711     case 'C':
712     case 'c':
713       if (charset[1] != 'P' && charset[1] != 'p')
714 	FAIL;
715       memcpy (charset, "CP", 2);
716       val = strtol (charset + 2, &end, 10);
717       if (*end)
718 	FAIL;
719       switch (val)
720 	{
721 	case 437:
722 	case 720:
723 	case 737:
724 	case 775:
725 	case 850:
726 	case 852:
727 	case 855:
728 	case 857:
729 	case 858:
730 	case 862:
731 	case 866:
732 	case 874:
733 	case 1125:
734 	case 1250:
735 	case 1251:
736 	case 1252:
737 	case 1253:
738 	case 1254:
739 	case 1255:
740 	case 1256:
741 	case 1257:
742 	case 1258:
743 	  mbc_max = 1;
744 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
745 	  l_wctomb = __cp_wctomb (val);
746 	  l_mbtowc = __cp_mbtowc (val);
747 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
748 	  l_wctomb = __ascii_wctomb;
749 	  l_mbtowc = __ascii_mbtowc;
750 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
751 	  break;
752 	case 932:
753 	  mbc_max = 2;
754 	  l_wctomb = __sjis_wctomb;
755 	  l_mbtowc = __sjis_mbtowc;
756 	  break;
757 	default:
758 	  FAIL;
759 	}
760     break;
761     case 'K':
762     case 'k':
763       /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
764       if (strncasecmp (charset, "KOI8", 4))
765 	FAIL;
766       c = charset + 4;
767       if (*c == '-')
768 	++c;
769       if (*c == 'R' || *c == 'r')
770 	{
771 	  val = 20866;
772 	  strcpy (charset, "CP20866");
773 	}
774       else if (*c == 'U' || *c == 'u')
775 	{
776 	  val = 21866;
777 	  strcpy (charset, "CP21866");
778 	}
779       else if (*c == 'T' || *c == 't')
780 	{
781 	  val = 103;
782 	  strcpy (charset, "CP103");
783 	}
784       else
785 	FAIL;
786       mbc_max = 1;
787 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
788       l_wctomb = __cp_wctomb (val);
789       l_mbtowc = __cp_mbtowc (val);
790 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
791       l_wctomb = __ascii_wctomb;
792       l_mbtowc = __ascii_mbtowc;
793 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
794       break;
795     case 'A':
796     case 'a':
797       if (strcasecmp (charset, "ASCII"))
798 	FAIL;
799       strcpy (charset, "ASCII");
800       mbc_max = 1;
801       l_wctomb = __ascii_wctomb;
802       l_mbtowc = __ascii_mbtowc;
803       break;
804     case 'G':
805     case 'g':
806 #ifdef __CYGWIN__
807       /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
808 	 requires Windows support. */
809       if (!strcasecmp (charset, "GBK")
810 	  || !strcasecmp (charset, "GB2312"))
811 	{
812 	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
813 	  mbc_max = 2;
814 	  l_wctomb = __gbk_wctomb;
815 	  l_mbtowc = __gbk_mbtowc;
816 	}
817       else if (!strcasecmp (charset, "GB18030"))
818 	{
819 	  strcpy (charset, "GB18030");
820 	  mbc_max = 4;
821 	  l_wctomb = __gb18030_wctomb;
822 	  l_mbtowc = __gb18030_mbtowc;
823 	}
824       else
825 #endif /* __CYGWIN__ */
826       /* GEORGIAN-PS and the alias without dash */
827       if (!strncasecmp (charset, "GEORGIAN", 8))
828 	{
829 	  c = charset + 8;
830 	  if (*c == '-')
831 	    ++c;
832 	  if (strcasecmp (c, "PS"))
833 	    FAIL;
834 	  val = 101;
835 	  strcpy (charset, "CP101");
836 	  mbc_max = 1;
837 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
838 	  l_wctomb = __cp_wctomb (val);
839 	  l_mbtowc = __cp_mbtowc (val);
840 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
841 	  l_wctomb = __ascii_wctomb;
842 	  l_mbtowc = __ascii_mbtowc;
843 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
844 	}
845       else
846 	FAIL;
847       break;
848     case 'P':
849     case 'p':
850       /* PT154 */
851       if (strcasecmp (charset, "PT154"))
852 	FAIL;
853       val = 102;
854       strcpy (charset, "CP102");
855       mbc_max = 1;
856 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
857       l_wctomb = __cp_wctomb (val);
858       l_mbtowc = __cp_mbtowc (val);
859 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
860       l_wctomb = __ascii_wctomb;
861       l_mbtowc = __ascii_mbtowc;
862 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
863       break;
864     case 'T':
865     case 't':
866       if (strncasecmp (charset, "TIS", 3))
867       	FAIL;
868       c = charset + 3;
869       if (*c == '-')
870 	++c;
871       if (strcmp (c, "620"))
872       	FAIL;
873       val = 874;
874       strcpy (charset, "CP874");
875       mbc_max = 1;
876 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
877       l_wctomb = __cp_wctomb (val);
878       l_mbtowc = __cp_mbtowc (val);
879 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
880       l_wctomb = __ascii_wctomb;
881       l_mbtowc = __ascii_mbtowc;
882 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
883       break;
884 #ifdef __CYGWIN__
885     /* Newlib does not provide Big5 and Cygwin's implementation
886        requires Windows support. */
887     case 'B':
888     case 'b':
889       if (strcasecmp (charset, "BIG5"))
890       	FAIL;
891       strcpy (charset, "BIG5");
892       mbc_max = 2;
893       l_wctomb = __big5_wctomb;
894       l_mbtowc = __big5_mbtowc;
895       break;
896 #endif /* __CYGWIN__ */
897     default:
898       FAIL;
899     }
900   switch (category)
901     {
902     case LC_CTYPE:
903 #ifndef __HAVE_LOCALE_INFO__
904       strcpy (loc->ctype_codeset, charset);
905       loc->mb_cur_max[0] = mbc_max;
906 #endif
907 #ifdef __CYGWIN__
908       __mb_cur_max = mbc_max;	/* Only for backward compat */
909 #endif
910       loc->wctomb = l_wctomb;
911       loc->mbtowc = l_mbtowc;
912       __set_ctype (loc, charset);
913       /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
914       /* Determine the width for the "CJK Ambiguous Width" category of
915          characters. This is used in wcwidth(). Assume single width for
916          single-byte charsets, and double width for multi-byte charsets
917          other than UTF-8. For UTF-8, use single width.
918          Single width can also be forced with the "@cjknarrow" modifier.
919          Double width can also be forced with the "@cjkwide" modifier.
920        */
921       loc->cjk_lang = cjkwide ||
922 		      (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
923       if (cjksingle)
924 	loc->cjk_lang = -1;	/* Disable CJK dual-width */
925 #ifdef __CYGWIN__
926       ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
927 				 mbc_max);
928 #endif /* __CYGWIN__ */
929       break;
930 #ifdef __CYGWIN__
931   /* Right now only Cygwin supports a __messages_load_locale function at all. */
932     case LC_MESSAGES:
933 #ifdef __HAVE_LOCALE_INFO__
934       ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
935       if (!ret)
936 #else
937       strcpy (loc->message_codeset, charset);
938 #endif /* __HAVE_LOCALE_INFO__ */
939       break;
940 #endif
941 #ifdef __HAVE_LOCALE_INFO__
942 #ifdef __CYGWIN__
943   /* Right now only Cygwin supports a __collate_load_locale function at all. */
944     case LC_COLLATE:
945       ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
946       break;
947   /* Right now only Cygwin supports a __monetary_load_locale function at all. */
948     case LC_MONETARY:
949       ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
950       break;
951   /* Right now only Cygwin supports a __numeric_load_locale function at all. */
952     case LC_NUMERIC:
953       ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
954       break;
955   /* Right now only Cygwin supports a __time_load_locale function at all. */
956     case LC_TIME:
957       ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
958       break;
959 #endif
960 #endif /* __HAVE_LOCALE_INFO__ */
961     default:
962       break;
963     }
964 #ifdef __HAVE_LOCALE_INFO__
965   if (ret)
966     FAIL;
967 #endif /* __HAVE_LOCALE_INFO__ */
968   return strcpy(loc->categories[category], new_locale);
969 }
970 
971 const char *
__get_locale_env(int category)972 __get_locale_env (int category)
973 {
974   const char *env;
975 
976   /* 1. check LC_ALL. */
977   env = getenv (categories[0]);
978 
979   /* 2. check LC_* */
980   if (env == NULL || !*env)
981     env = getenv (categories[category]);
982 
983   /* 3. check LANG */
984   if (env == NULL || !*env)
985     env = getenv ("LANG");
986 
987   /* 4. if none is set, fall to default locale */
988   if (env == NULL || !*env)
989     env = __default_locale;
990 
991   return env;
992 }
993 #endif /* _MB_CAPABLE */
994 
995 size_t
__locale_mb_cur_max(void)996 __locale_mb_cur_max (void)
997 {
998 #ifdef __HAVE_LOCALE_INFO__
999   return __get_current_ctype_locale ()->mb_cur_max[0];
1000 #else
1001   return __get_current_locale ()->mb_cur_max[0];
1002 #endif
1003 }
1004 
1005 #ifdef __HAVE_LOCALE_INFO__
1006 const char *
__locale_ctype_ptr_l(struct __locale_t * locale)1007 __locale_ctype_ptr_l (struct __locale_t *locale)
1008 {
1009   return locale->ctype_ptr;
1010 }
1011 
1012 const char *
__locale_ctype_ptr(void)1013 __locale_ctype_ptr (void)
1014 {
1015   return __get_current_locale ()->ctype_ptr;
1016 }
1017 #endif /* __HAVE_LOCALE_INFO__ */
1018