1 /* Copyright (c) 2002,2004,2005 Joerg Wunsch
2 Copyright (c) 2008 Dmitry Xmelkov
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in
13 the documentation and/or other materials provided with the
14 distribution.
15
16 * Neither the name of the copyright holders nor the names of
17 contributors may be used to endorse or promote products derived
18 from this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* $Id: vfscanf.c 2191 2010-11-05 13:45:57Z arcanum $ */
34
35 #include "stdio_private.h"
36 #include "scanf_private.h"
37
38 /*
39 * Compute which features are required
40 */
41
42 #ifdef _NEED_IO_LONG_LONG
43 typedef unsigned long long uint_scanf_t;
44 typedef long long int_scanf_t;
45 #else
46 typedef unsigned long uint_scanf_t;
47 typedef long int_scanf_t;
48 #endif
49
50 #ifdef WIDE_CHARS
51 #define INT wint_t
52 #define CHAR wchar_t
53 #define UCHAR wchar_t
54 #define GETC(s) getwc(s)
55 #define UNGETC(c,s) ungetwc(c,s)
56 #undef vfscanf
57 #define vfscanf vfwscanf
58 #define IS_EOF(c) ((c) == WEOF)
59 #else
60 #define INT int
61 #define CHAR char
62 #define UCHAR unsigned char
63 #define GETC(s) getc(s)
64 #define UNGETC(c,s) ungetc(c,s)
65 #define IS_EOF(c) ((c) < 0)
66 #endif
67
68 static INT
scanf_getc(FILE * stream,int * lenp)69 scanf_getc(FILE *stream, int *lenp)
70 {
71 INT c = GETC(stream);
72 if (!IS_EOF(c))
73 ++(*lenp);
74 return c;
75 }
76
77 static INT
scanf_ungetc(INT c,FILE * stream,int * lenp)78 scanf_ungetc(INT c, FILE *stream, int *lenp)
79 {
80 c = UNGETC(c, stream);
81 if (!IS_EOF(c))
82 --(*lenp);
83 return c;
84 }
85
86 static void
putval(void * addr,int_scanf_t val,uint16_t flags)87 putval (void *addr, int_scanf_t val, uint16_t flags)
88 {
89 if (addr) {
90 if (flags & FL_CHAR)
91 *(char *)addr = val;
92 #ifdef _NEED_IO_LONG_LONG
93 else if (flags & FL_LONGLONG)
94 *(long long *)addr = val;
95 #endif
96 else if (flags & FL_LONG)
97 *(long *)addr = val;
98 else if (flags & FL_SHORT)
99 *(short *)addr = val;
100 else
101 *(int *)addr = val;
102 }
103 }
104
105 static unsigned char
conv_int(FILE * stream,int * lenp,width_t width,void * addr,uint16_t flags,unsigned int base)106 conv_int (FILE *stream, int *lenp, width_t width, void *addr, uint16_t flags, unsigned int base)
107 {
108 uint_scanf_t val;
109 INT i;
110
111 i = scanf_getc (stream, lenp); /* after scanf_ungetc() */
112
113 switch (i) {
114 case '-':
115 flags |= FL_MINUS;
116 __PICOLIBC_FALLTHROUGH;
117 case '+':
118 if (!--width || IS_EOF(i = scanf_getc(stream, lenp)))
119 goto err;
120 }
121
122 val = 0;
123
124 /* Leading '0' digit -- check for base indication */
125 if (i == '0') {
126 if (!--width || IS_EOF(i = scanf_getc (stream, lenp)))
127 goto putval;
128
129 flags |= FL_ANY;
130
131 if (TOLOWER(i) == 'x' && (base == 0 || base == 16)) {
132 base = 16;
133 if (!--width || IS_EOF(i = scanf_getc (stream, lenp)))
134 goto putval;
135 #ifdef _NEED_IO_PERCENT_B
136 } else if (i == 'b' && base <= 2) {
137 base = 2;
138 if (!--width || IS_EOF(i = scanf_getc (stream, lenp)))
139 goto putval;
140 #endif
141 } else if (base == 0 || base == 8) {
142 base = 8;
143 }
144 } else if (base == 0)
145 base = 10;
146
147 do {
148 unsigned int c = digit_to_val(i);
149 if (c >= base) {
150 scanf_ungetc (i, stream, lenp);
151 break;
152 }
153 flags |= FL_ANY;
154 val = val * base + c;
155 if (!--width) goto putval;
156 } while (!IS_EOF(i = scanf_getc(stream, lenp)));
157 if (!(flags & FL_ANY))
158 goto err;
159
160 putval:
161 if (flags & FL_MINUS) val = -val;
162 putval (addr, val, flags);
163 return 1;
164
165 err:
166 return 0;
167 }
168
169 #ifdef _NEED_IO_BRACKET
170 static const CHAR *
conv_brk(FILE * stream,int * lenp,width_t width,CHAR * addr,const CHAR * fmt)171 conv_brk (FILE *stream, int *lenp, width_t width, CHAR *addr, const CHAR *fmt)
172 {
173 unsigned char msk[32];
174 unsigned char fnegate;
175 unsigned char frange;
176 unsigned char cabove;
177 INT i;
178
179 memset (msk, 0, sizeof(msk));
180 fnegate = 0;
181 frange = 0;
182 cabove = 0; /* init to avoid compiler warning */
183
184 for (i = 0; ; i++) {
185 unsigned char c = *fmt++;
186
187 if (c == 0) {
188 return 0;
189 } else if (c == '^' && !i) {
190 fnegate = 1;
191 continue;
192 } else if (i > fnegate) {
193 if (c == ']') break;
194 if (c == '-' && !frange) {
195 frange = 1;
196 continue;
197 }
198 }
199
200 if (!frange) cabove = c;
201
202 for (;;) {
203 msk[c >> 3] |= 1 << (c & 7);
204 if (c == cabove) break;
205 if (c < cabove)
206 c++;
207 else
208 c--;
209 }
210
211 frange = 0;
212 }
213 if (frange)
214 msk['-'/8] |= 1 << ('-' & 7);
215
216 if (fnegate) {
217 unsigned char *p = msk;
218 do {
219 unsigned char c = *p;
220 *p++ = ~c;
221 } while (p != msk + sizeof(msk));
222 }
223
224 /* And now it is a flag of fault. */
225 fnegate = 1;
226
227 /* NUL ('\0') is consided as normal character. This is match to Glibc.
228 Note, there is no method to include NUL into symbol list. */
229 do {
230 i = scanf_getc (stream, lenp);
231 if (IS_EOF(i)) break;
232 if (!((msk[(unsigned char)i >> 3] >> (i & 7)) & 1)) {
233 scanf_ungetc (i, stream, lenp);
234 break;
235 }
236 if (addr) *addr++ = i;
237 fnegate = 0;
238 } while (--width);
239
240 if (fnegate) {
241 return 0;
242 } else {
243 if (addr) *addr = 0;
244 return fmt;
245 }
246 }
247 #endif /* _NEED_IO_BRACKET */
248
249 #if defined(_NEED_IO_FLOAT) || defined(_NEED_IO_DOUBLE)
250 #define FLT_STREAM FILE
251 #include "conv_flt.c"
252 #endif
253
skip_spaces(FILE * stream,int * lenp)254 static INT skip_spaces (FILE *stream, int *lenp)
255 {
256 INT i;
257 do {
258 if (IS_EOF(i = scanf_getc (stream, lenp)))
259 return i;
260 } while (isspace (i));
261 scanf_ungetc (i, stream, lenp);
262 return i;
263 }
264
265 #ifdef _NEED_IO_POS_ARGS
266
267 typedef struct {
268 va_list ap;
269 } my_va_list;
270
271 static void
skip_to_arg(my_va_list * ap,int target_argno)272 skip_to_arg(my_va_list *ap, int target_argno)
273 {
274 int current_argno = 1;
275
276 /*
277 * Fortunately, all scanf args are pointers,
278 * and so are the same size as void *
279 */
280 while (current_argno < target_argno) {
281 (void) va_arg(ap->ap, void *);
282 current_argno++;
283 }
284 }
285
286 #endif
287
288 /**
289 Formatted input. This function is the heart of the \b scanf family of
290 functions.
291
292 Characters are read from \a stream and processed in a way described by
293 \a fmt. Conversion results will be assigned to the parameters passed
294 via \a ap.
295
296 The format string \a fmt is scanned for conversion specifications.
297 Anything that doesn't comprise a conversion specification is taken as
298 text that is matched literally against the input. White space in the
299 format string will match any white space in the data (including none),
300 all other characters match only itself. Processing is aborted as soon
301 as the data and format string no longer match, or there is an error or
302 end-of-file condition on \a stream.
303
304 Most conversions skip leading white space before starting the actual
305 conversion.
306
307 Conversions are introduced with the character \b %. Possible options
308 can follow the \b %:
309
310 - a \c * indicating that the conversion should be performed but
311 the conversion result is to be discarded; no parameters will
312 be processed from \c ap,
313 - the character \c h indicating that the argument is a pointer
314 to <tt>short int</tt> (rather than <tt>int</tt>),
315 - the 2 characters \c hh indicating that the argument is a pointer
316 to <tt>char</tt> (rather than <tt>int</tt>).
317 - the character \c l indicating that the argument is a pointer
318 to <tt>long int</tt> (rather than <tt>int</tt>, for integer
319 type conversions), or a pointer to \c double (for floating
320 point conversions),
321
322 In addition, a maximal field width may be specified as a nonzero
323 positive decimal integer, which will restrict the conversion to at
324 most this many characters from the input stream. This field width is
325 limited to at most 255 characters which is also the default value
326 (except for the <tt>%c</tt> conversion that defaults to 1).
327
328 The following conversion flags are supported:
329
330 - \c % Matches a literal \c % character. This is not a conversion.
331 - \c d Matches an optionally signed decimal integer; the next
332 pointer must be a pointer to \c int.
333 - \c i Matches an optionally signed integer; the next pointer must
334 be a pointer to \c int. The integer is read in base 16 if it
335 begins with \b 0x or \b 0X, in base 8 if it begins with \b 0, and
336 in base 10 otherwise. Only characters that correspond to the
337 base are used.
338 - \c o Matches an octal integer; the next pointer must be a pointer to
339 <tt>unsigned int</tt>.
340 - \c u Matches an optionally signed decimal integer; the next
341 pointer must be a pointer to <tt>unsigned int</tt>.
342 - \c x Matches an optionally signed hexadecimal integer; the next
343 pointer must be a pointer to <tt>unsigned int</tt>.
344 - \c f Matches an optionally signed floating-point number; the next
345 pointer must be a pointer to \c float.
346 - <tt>e, g, F, E, G</tt> Equivalent to \c f.
347 - \c s
348 Matches a sequence of non-white-space characters; the next pointer
349 must be a pointer to \c char, and the array must be large enough to
350 accept all the sequence and the terminating \c NUL character. The
351 input string stops at white space or at the maximum field width,
352 whichever occurs first.
353 - \c c
354 Matches a sequence of width count characters (default 1); the next
355 pointer must be a pointer to \c char, and there must be enough room
356 for all the characters (no terminating \c NUL is added). The usual
357 skip of leading white space is suppressed. To skip white space
358 first, use an explicit space in the format.
359 - \c [
360 Matches a nonempty sequence of characters from the specified set
361 of accepted characters; the next pointer must be a pointer to \c
362 char, and there must be enough room for all the characters in the
363 string, plus a terminating \c NUL character. The usual skip of
364 leading white space is suppressed. The string is to be made up
365 of characters in (or not in) a particular set; the set is defined
366 by the characters between the open bracket \c [ character and a
367 close bracket \c ] character. The set excludes those characters
368 if the first character after the open bracket is a circumflex
369 \c ^. To include a close bracket in the set, make it the first
370 character after the open bracket or the circumflex; any other
371 position will end the set. The hyphen character \c - is also
372 special; when placed between two other characters, it adds all
373 intervening characters to the set. To include a hyphen, make it
374 the last character before the final close bracket. For instance,
375 <tt>[^]0-9-]</tt> means the set of <em>everything except close
376 bracket, zero through nine, and hyphen</em>. The string ends
377 with the appearance of a character not in the (or, with a
378 circumflex, in) set or when the field width runs out. Note that
379 usage of this conversion enlarges the stack expense.
380 - \c p
381 Matches a pointer value (as printed by <tt>%p</tt> in printf()); the
382 next pointer must be a pointer to \c void.
383 - \c n
384 Nothing is expected; instead, the number of characters consumed
385 thus far from the input is stored through the next pointer, which
386 must be a pointer to \c int. This is not a conversion, although it
387 can be suppressed with the \c * flag.
388
389 These functions return the number of input items assigned, which can
390 be fewer than provided for, or even zero, in the event of a matching
391 failure. Zero indicates that, while there was input available, no
392 conversions were assigned; typically this is due to an invalid input
393 character, such as an alphabetic character for a <tt>%d</tt>
394 conversion. The value \c EOF is returned if an input failure occurs
395 before any conversion such as an end-of-file occurs. If an error or
396 end-of-file occurs after conversion has begun, the number of
397 conversions which were successfully completed is returned.
398
399 By default, all the conversions described above are available except
400 the floating-point conversions and the width is limited to 255
401 characters. The float-point conversion will be available in the
402 extended version provided by the library \c libscanf_flt.a. Also in
403 this case the width is not limited (exactly, it is limited to 65535
404 characters). To link a program against the extended version, use the
405 following compiler flags in the link stage:
406
407 \code
408 -Wl,-u,vfscanf -lscanf_flt -lm
409 \endcode
410
411 A third version is available for environments that are tight on
412 space. In addition to the restrictions of the standard one, this
413 version implements no <tt>%[</tt> specification. This version is
414 provided in the library \c libscanf_min.a, and can be requested using
415 the following options in the link stage:
416
417 \code
418 -Wl,-u,vfscanf -lscanf_min -lm
419 \endcode
420 */
vfscanf(FILE * stream,const CHAR * fmt,va_list ap_orig)421 int vfscanf (FILE * stream, const CHAR *fmt, va_list ap_orig)
422 {
423 unsigned char nconvs;
424 UCHAR c;
425 width_t width;
426 void *addr;
427 #ifdef _NEED_IO_POS_ARGS
428 my_va_list my_ap;
429 #define ap my_ap.ap
430 va_copy(ap, ap_orig);
431 #else
432 #define ap ap_orig
433 #endif
434 uint16_t flags;
435 INT i;
436 int scanf_len = 0;
437 #define lenp (&scanf_len)
438
439 nconvs = 0;
440
441 /* Initialization of stream_flags at each pass simplifies the register
442 allocation with GCC 3.3 - 4.2. Only the GCC 4.3 is good to move it
443 to the begin. */
444 while ((c = *fmt++) != 0) {
445
446 if (isspace (c)) {
447 skip_spaces (stream, lenp);
448
449 } else if (c != '%'
450 || (c = *fmt++) == '%')
451 {
452 /* Ordinary character. */
453 if (IS_EOF(i = scanf_getc (stream, lenp)))
454 goto eof;
455 if ((UCHAR)i != c) {
456 scanf_ungetc (i, stream, lenp);
457 break;
458 }
459
460 } else {
461 flags = 0;
462
463 if (c == '*') {
464 flags = FL_STAR;
465 c = *fmt++;
466 }
467
468 for (;;) {
469 width = 0;
470 while ((c -= '0') < 10) {
471 flags |= FL_WIDTH;
472 width = width * 10 + c;
473 c = *fmt++;
474 }
475 c += '0';
476 if (flags & FL_WIDTH) {
477 #ifdef _NEED_IO_POS_ARGS
478 if (c == '$') {
479 flags &= ~FL_WIDTH;
480 va_end(ap);
481 va_copy(ap, ap_orig);
482 skip_to_arg(&my_ap, width);
483 c = *fmt++;
484 continue;
485 }
486 #endif
487 /* C99 says that width must be greater than zero.
488 To simplify program do treat 0 as error in format. */
489 if (!width) break;
490 } else {
491 width = ~0;
492 }
493 break;
494 }
495
496 switch (c) {
497 case 'h':
498 flags |= FL_SHORT;
499 c = *fmt++;
500 if (c == 'h') {
501 flags |= FL_CHAR;
502 c = *fmt++;
503 }
504 break;
505 case 'l':
506 flags |= FL_LONG;
507 c = *fmt++;
508 if (c == 'l') {
509 flags |= FL_LONGLONG;
510 c = *fmt++;
511 }
512 break;
513 case 'L':
514 flags |= FL_LONG|FL_LONGLONG;
515 c = *fmt++;
516 break;
517 #ifdef _NEED_IO_C99_FORMATS
518 #ifdef _NEED_IO_LONG_LONG
519 #define CHECK_LONGLONG(type) \
520 else if (sizeof(type) == sizeof(long long)) \
521 flags |= FL_LONGLONG
522 #else
523 #define CHECK_LONGLONG(type)
524 #endif
525
526 #define CHECK_INT_SIZE(letter, type) \
527 case letter: \
528 if (sizeof(type) != sizeof(int)) { \
529 if (sizeof(type) == sizeof(long)) \
530 flags |= FL_LONG; \
531 else if (sizeof(type) == sizeof(short)) \
532 flags |= FL_SHORT; \
533 CHECK_LONGLONG(type); \
534 } \
535 c = *fmt++; \
536 break;
537
538 CHECK_INT_SIZE('j', intmax_t);
539 CHECK_INT_SIZE('z', size_t);
540 CHECK_INT_SIZE('t', ptrdiff_t);
541 #endif
542 }
543
544 #ifdef _NEED_IO_PERCENT_B
545 #define CNV_BASE "cdinopsuxXb"
546 #else
547 #define CNV_BASE "cdinopsuxX"
548 #endif
549
550 #ifdef _NEED_IO_BRACKET
551 # define CNV_BRACKET "["
552 #else
553 # define CNV_BRACKET ""
554 #endif
555 #if defined(_NEED_IO_FLOAT) || defined(_NEED_IO_DOUBLE)
556 # define CNV_FLOAT "efgEFG"
557 #else
558 # define CNV_FLOAT ""
559 #endif
560 #define CNV_LIST CNV_BASE CNV_BRACKET CNV_FLOAT
561 if (!c || !strchr (CNV_LIST, c))
562 break;
563
564 addr = (flags & FL_STAR) ? 0 : va_arg (ap, void *);
565
566 if (c == 'n') {
567 putval (addr, (unsigned)(scanf_len), flags);
568 continue;
569 }
570
571 if (c == 'c') {
572 if (!(flags & FL_WIDTH)) width = 1;
573 do {
574 if (IS_EOF(i = scanf_getc (stream, lenp)))
575 goto eof;
576 if (addr) {
577 #ifdef WIDE_CHARS
578 if (flags & FL_LONG) {
579 *(wchar_t *)addr = i;
580 addr = (wchar_t *)addr + 1;
581 } else
582 #endif
583 {
584 *(char *)addr = i;
585 addr = (char*)addr + 1;
586 }
587 }
588 } while (--width);
589 c = 1; /* no matter with smart GCC */
590
591 #ifdef _NEED_IO_BRACKET
592 } else if (c == '[') {
593 fmt = conv_brk (stream, lenp, width, addr, fmt);
594 c = (fmt != 0);
595 #endif
596
597 } else {
598
599 unsigned int base = 0;
600
601 if (IS_EOF(skip_spaces (stream, lenp)))
602 goto eof;
603
604 switch (c) {
605
606 case 's':
607 /* Now we have 1 nospace symbol. */
608 do {
609 if (IS_EOF(i = scanf_getc (stream, lenp)))
610 break;
611 if (isspace (i)) {
612 scanf_ungetc (i, stream, lenp);
613 break;
614 }
615 if (addr) {
616 #ifdef WIDE_CHARS
617 if (flags & FL_LONG) {
618 *(wchar_t *)addr = i;
619 addr = (wchar_t *)addr + 1;
620 } else
621 #endif
622 {
623 *(char *)addr = i;
624 addr = (char*)addr + 1;
625 }
626 }
627 } while (--width);
628 if (addr){
629 #ifdef WIDE_CHARS
630 if (flags & FL_LONG)
631 *(wchar_t *)addr = 0;
632 else
633 #endif
634 *(char *)addr = 0;
635 }
636 c = 1; /* no matter with smart GCC */
637 break;
638
639 #if defined(_NEED_IO_FLOAT) || defined(_NEED_IO_DOUBLE)
640 case 'p':
641 if (sizeof(void *) > sizeof(int))
642 flags |= FL_LONG;
643 __PICOLIBC_FALLTHROUGH;
644 case 'x':
645 case 'X':
646 base = 16;
647 goto conv_int;
648
649 #ifdef _NEED_IO_PERCENT_B
650 case 'b':
651 base = 2;
652 goto conv_int;
653 #endif
654
655 case 'd':
656 case 'u':
657 base = 10;
658 goto conv_int;
659
660 case 'o':
661 base = 8;
662 __PICOLIBC_FALLTHROUGH;
663 case 'i':
664 conv_int:
665 c = conv_int (stream, lenp, width, addr, flags, base);
666 break;
667
668 default: /* e,E,f,F,g,G */
669 c = conv_flt (stream, lenp, width, addr, flags);
670 #else
671 case 'd':
672 case 'u':
673 base = 10;
674 goto conv_int;
675
676 #ifdef _NEED_IO_PERCENT_B
677 case 'b':
678 base = 2;
679 goto conv_int;
680 #endif
681
682 case 'o':
683 base = 8;
684 __PICOLIBC_FALLTHROUGH;
685 case 'i':
686 goto conv_int;
687
688 case 'p':
689 if (sizeof(void *) > sizeof(int))
690 flags |= FL_LONG;
691 __PICOLIBC_FALLTHROUGH;
692 default: /* p,x,X */
693 base = 16;
694 conv_int:
695 c = conv_int (stream, lenp, width, addr, flags, base);
696 #endif
697 }
698 } /* else */
699
700 if (!c) {
701 if (stream->flags & (__SERR | __SEOF))
702 goto eof;
703 break;
704 }
705 if (!(flags & FL_STAR)) nconvs += 1;
706 } /* else */
707 } /* while */
708 #ifdef _NEED_IO_POS_ARGS
709 va_end(ap);
710 #endif
711 return nconvs;
712
713 eof:
714 #ifdef _NEED_IO_POS_ARGS
715 va_end(ap);
716 #endif
717 #undef ap
718 return nconvs ? nconvs : EOF;
719 }
720
721 #if defined(_FORMAT_DEFAULT_DOUBLE) && !defined(vfscanf)
722 #ifdef _HAVE_ALIAS_ATTRIBUTE
723 __strong_reference(vfscanf, __d_vfscanf);
724 #else
__d_vfscanf(FILE * stream,const char * fmt,va_list ap)725 int __d_vfscanf (FILE * stream, const char *fmt, va_list ap) { return vfscanf(stream, fmt, ap); }
726 #endif
727 #endif
728