1 /* Copyright (c) 2002,2004,2005 Joerg Wunsch
2 Copyright (c) 2008 Dmitry Xmelkov
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in
13 the documentation and/or other materials provided with the
14 distribution.
15
16 * Neither the name of the copyright holders nor the names of
17 contributors may be used to endorse or promote products derived
18 from this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* $Id: vfscanf.c 2191 2010-11-05 13:45:57Z arcanum $ */
34
35 #include <ctype.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdarg.h>
39 #include <stddef.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include "stdio_private.h"
44 #include "scanf_private.h"
45
46 #ifndef SCANF_LONGLONG
47 # define SCANF_LONGLONG (SCANF_FLOAT || defined(_WANT_IO_LONG_LONG))
48 #endif
49
50 #if defined(SCANF_FLOAT) || defined(_WANT_IO_POS_ARGS)
51 #define SCANF_POSITIONAL
52 #endif
53
54 #ifdef SCANF_LONGLONG
55 typedef unsigned long long uint_scanf_t;
56 typedef long long int_scanf_t;
57 #else
58 typedef unsigned long uint_scanf_t;
59 typedef long int_scanf_t;
60 #endif
61
62 #ifdef WIDE_CHARS
63 #include <wchar.h>
64 #define INT wint_t
65 #define CHAR wchar_t
66 #define UCHAR wchar_t
67 #define GETC(s) getwc(s)
68 #define UNGETC(c,s) ungetwc(c,s)
69 #undef vfscanf
70 #define vfscanf vfwscanf
71 #define IS_EOF(c) ((c) == WEOF)
72 #else
73 #define INT int
74 #define CHAR char
75 #define UCHAR unsigned char
76 #define GETC(s) getc(s)
77 #define UNGETC(c,s) ungetc(c,s)
78 #define IS_EOF(c) ((c) < 0)
79 #endif
80
81 static INT
scanf_getc(FILE * stream,int * lenp)82 scanf_getc(FILE *stream, int *lenp)
83 {
84 INT c = GETC(stream);
85 if (!IS_EOF(c))
86 ++(*lenp);
87 return c;
88 }
89
90 static INT
scanf_ungetc(INT c,FILE * stream,int * lenp)91 scanf_ungetc(INT c, FILE *stream, int *lenp)
92 {
93 c = UNGETC(c, stream);
94 if (!IS_EOF(c))
95 --(*lenp);
96 return c;
97 }
98
99 static void
putval(void * addr,int_scanf_t val,uint16_t flags)100 putval (void *addr, int_scanf_t val, uint16_t flags)
101 {
102 if (!(flags & FL_STAR)) {
103 if (flags & FL_CHAR)
104 *(char *)addr = val;
105 #ifdef SCANF_LONGLONG
106 else if (flags & FL_LONGLONG)
107 *(long long *)addr = val;
108 #endif
109 else if (flags & FL_LONG)
110 *(long *)addr = val;
111 else if (flags & FL_SHORT)
112 *(short *)addr = val;
113 else
114 *(int *)addr = val;
115 }
116 }
117
118 static unsigned char
conv_int(FILE * stream,int * lenp,width_t width,void * addr,uint16_t flags,unsigned int base)119 conv_int (FILE *stream, int *lenp, width_t width, void *addr, uint16_t flags, unsigned int base)
120 {
121 uint_scanf_t val;
122 INT i;
123
124 i = scanf_getc (stream, lenp); /* after scanf_ungetc() */
125
126 switch (i) {
127 case '-':
128 flags |= FL_MINUS;
129 FALLTHROUGH;
130 case '+':
131 if (!--width || IS_EOF(i = scanf_getc(stream, lenp)))
132 goto err;
133 }
134
135 val = 0;
136
137 /* Leading '0' digit -- check for base indication */
138 if (i == '0') {
139 if (!--width || IS_EOF(i = scanf_getc (stream, lenp)))
140 goto putval;
141
142 flags |= FL_ANY;
143
144 if (TOLOW(i) == 'x' && (base == 0 || base == 16)) {
145 base = 16;
146 if (!--width || IS_EOF(i = scanf_getc (stream, lenp)))
147 goto putval;
148 #ifdef _WANT_IO_PERCENT_B
149 } else if (i == 'b' && base <= 2) {
150 base = 2;
151 if (!--width || IS_EOF(i = scanf_getc (stream, lenp)))
152 goto putval;
153 #endif
154 } else if (base == 0 || base == 8) {
155 base = 8;
156 }
157 } else if (base == 0)
158 base = 10;
159
160 do {
161 unsigned int c = digit_to_val(i);
162 if (c >= base) {
163 scanf_ungetc (i, stream, lenp);
164 break;
165 }
166 flags |= FL_ANY;
167 val = val * base + c;
168 if (!--width) goto putval;
169 } while (!IS_EOF(i = scanf_getc(stream, lenp)));
170 if (!(flags & FL_ANY))
171 goto err;
172
173 putval:
174 if (flags & FL_MINUS) val = -val;
175 putval (addr, val, flags);
176 return 1;
177
178 err:
179 return 0;
180 }
181
182 #if SCANF_BRACKET
183 static const CHAR *
conv_brk(FILE * stream,int * lenp,width_t width,CHAR * addr,const CHAR * fmt)184 conv_brk (FILE *stream, int *lenp, width_t width, CHAR *addr, const CHAR *fmt)
185 {
186 unsigned char msk[32];
187 unsigned char fnegate;
188 unsigned char frange;
189 unsigned char cabove;
190 INT i;
191
192 memset (msk, 0, sizeof(msk));
193 fnegate = 0;
194 frange = 0;
195 cabove = 0; /* init to avoid compiler warning */
196
197 for (i = 0; ; i++) {
198 unsigned char c = *fmt++;
199
200 if (c == 0) {
201 return 0;
202 } else if (c == '^' && !i) {
203 fnegate = 1;
204 continue;
205 } else if (i > fnegate) {
206 if (c == ']') break;
207 if (c == '-' && !frange) {
208 frange = 1;
209 continue;
210 }
211 }
212
213 if (!frange) cabove = c;
214
215 for (;;) {
216 msk[c >> 3] |= 1 << (c & 7);
217 if (c == cabove) break;
218 if (c < cabove)
219 c++;
220 else
221 c--;
222 }
223
224 frange = 0;
225 }
226 if (frange)
227 msk['-'/8] |= 1 << ('-' & 7);
228
229 if (fnegate) {
230 unsigned char *p = msk;
231 do {
232 unsigned char c = *p;
233 *p++ = ~c;
234 } while (p != msk + sizeof(msk));
235 }
236
237 /* And now it is a flag of fault. */
238 fnegate = 1;
239
240 /* NUL ('\0') is consided as normal character. This is match to Glibc.
241 Note, there is no method to include NUL into symbol list. */
242 do {
243 i = scanf_getc (stream, lenp);
244 if (IS_EOF(i)) break;
245 if (!((msk[(unsigned char)i >> 3] >> (i & 7)) & 1)) {
246 scanf_ungetc (i, stream, lenp);
247 break;
248 }
249 if (addr) *addr++ = i;
250 fnegate = 0;
251 } while (--width);
252
253 if (fnegate) {
254 return 0;
255 } else {
256 if (addr) *addr = 0;
257 return fmt;
258 }
259 }
260 #endif /* SCANF_BRACKET */
261
262 #if SCANF_FLOAT
263 #define FLT_STREAM FILE
264 #include "conv_flt.c"
265 #endif /* SCANF_FLOAT */
266
skip_spaces(FILE * stream,int * lenp)267 static INT skip_spaces (FILE *stream, int *lenp)
268 {
269 INT i;
270 do {
271 if (IS_EOF(i = scanf_getc (stream, lenp)))
272 return i;
273 } while (ISSPACE (i));
274 scanf_ungetc (i, stream, lenp);
275 return i;
276 }
277
278 #ifdef SCANF_POSITIONAL
279
280 typedef struct {
281 va_list ap;
282 } my_va_list;
283
284 static void
skip_to_arg(my_va_list * ap,int target_argno)285 skip_to_arg(my_va_list *ap, int target_argno)
286 {
287 int current_argno = 1;
288
289 /*
290 * Fortunately, all scanf args are pointers,
291 * and so are the same size as void *
292 */
293 while (current_argno < target_argno) {
294 (void) va_arg(ap->ap, void *);
295 current_argno++;
296 }
297 }
298
299 #endif
300
301 /**
302 Formatted input. This function is the heart of the \b scanf family of
303 functions.
304
305 Characters are read from \a stream and processed in a way described by
306 \a fmt. Conversion results will be assigned to the parameters passed
307 via \a ap.
308
309 The format string \a fmt is scanned for conversion specifications.
310 Anything that doesn't comprise a conversion specification is taken as
311 text that is matched literally against the input. White space in the
312 format string will match any white space in the data (including none),
313 all other characters match only itself. Processing is aborted as soon
314 as the data and format string no longer match, or there is an error or
315 end-of-file condition on \a stream.
316
317 Most conversions skip leading white space before starting the actual
318 conversion.
319
320 Conversions are introduced with the character \b %. Possible options
321 can follow the \b %:
322
323 - a \c * indicating that the conversion should be performed but
324 the conversion result is to be discarded; no parameters will
325 be processed from \c ap,
326 - the character \c h indicating that the argument is a pointer
327 to <tt>short int</tt> (rather than <tt>int</tt>),
328 - the 2 characters \c hh indicating that the argument is a pointer
329 to <tt>char</tt> (rather than <tt>int</tt>).
330 - the character \c l indicating that the argument is a pointer
331 to <tt>long int</tt> (rather than <tt>int</tt>, for integer
332 type conversions), or a pointer to \c double (for floating
333 point conversions),
334
335 In addition, a maximal field width may be specified as a nonzero
336 positive decimal integer, which will restrict the conversion to at
337 most this many characters from the input stream. This field width is
338 limited to at most 255 characters which is also the default value
339 (except for the <tt>%c</tt> conversion that defaults to 1).
340
341 The following conversion flags are supported:
342
343 - \c % Matches a literal \c % character. This is not a conversion.
344 - \c d Matches an optionally signed decimal integer; the next
345 pointer must be a pointer to \c int.
346 - \c i Matches an optionally signed integer; the next pointer must
347 be a pointer to \c int. The integer is read in base 16 if it
348 begins with \b 0x or \b 0X, in base 8 if it begins with \b 0, and
349 in base 10 otherwise. Only characters that correspond to the
350 base are used.
351 - \c o Matches an octal integer; the next pointer must be a pointer to
352 <tt>unsigned int</tt>.
353 - \c u Matches an optionally signed decimal integer; the next
354 pointer must be a pointer to <tt>unsigned int</tt>.
355 - \c x Matches an optionally signed hexadecimal integer; the next
356 pointer must be a pointer to <tt>unsigned int</tt>.
357 - \c f Matches an optionally signed floating-point number; the next
358 pointer must be a pointer to \c float.
359 - <tt>e, g, F, E, G</tt> Equivalent to \c f.
360 - \c s
361 Matches a sequence of non-white-space characters; the next pointer
362 must be a pointer to \c char, and the array must be large enough to
363 accept all the sequence and the terminating \c NUL character. The
364 input string stops at white space or at the maximum field width,
365 whichever occurs first.
366 - \c c
367 Matches a sequence of width count characters (default 1); the next
368 pointer must be a pointer to \c char, and there must be enough room
369 for all the characters (no terminating \c NUL is added). The usual
370 skip of leading white space is suppressed. To skip white space
371 first, use an explicit space in the format.
372 - \c [
373 Matches a nonempty sequence of characters from the specified set
374 of accepted characters; the next pointer must be a pointer to \c
375 char, and there must be enough room for all the characters in the
376 string, plus a terminating \c NUL character. The usual skip of
377 leading white space is suppressed. The string is to be made up
378 of characters in (or not in) a particular set; the set is defined
379 by the characters between the open bracket \c [ character and a
380 close bracket \c ] character. The set excludes those characters
381 if the first character after the open bracket is a circumflex
382 \c ^. To include a close bracket in the set, make it the first
383 character after the open bracket or the circumflex; any other
384 position will end the set. The hyphen character \c - is also
385 special; when placed between two other characters, it adds all
386 intervening characters to the set. To include a hyphen, make it
387 the last character before the final close bracket. For instance,
388 <tt>[^]0-9-]</tt> means the set of <em>everything except close
389 bracket, zero through nine, and hyphen</em>. The string ends
390 with the appearance of a character not in the (or, with a
391 circumflex, in) set or when the field width runs out. Note that
392 usage of this conversion enlarges the stack expense.
393 - \c p
394 Matches a pointer value (as printed by <tt>%p</tt> in printf()); the
395 next pointer must be a pointer to \c void.
396 - \c n
397 Nothing is expected; instead, the number of characters consumed
398 thus far from the input is stored through the next pointer, which
399 must be a pointer to \c int. This is not a conversion, although it
400 can be suppressed with the \c * flag.
401
402 These functions return the number of input items assigned, which can
403 be fewer than provided for, or even zero, in the event of a matching
404 failure. Zero indicates that, while there was input available, no
405 conversions were assigned; typically this is due to an invalid input
406 character, such as an alphabetic character for a <tt>%d</tt>
407 conversion. The value \c EOF is returned if an input failure occurs
408 before any conversion such as an end-of-file occurs. If an error or
409 end-of-file occurs after conversion has begun, the number of
410 conversions which were successfully completed is returned.
411
412 By default, all the conversions described above are available except
413 the floating-point conversions and the width is limited to 255
414 characters. The float-point conversion will be available in the
415 extended version provided by the library \c libscanf_flt.a. Also in
416 this case the width is not limited (exactly, it is limited to 65535
417 characters). To link a program against the extended version, use the
418 following compiler flags in the link stage:
419
420 \code
421 -Wl,-u,vfscanf -lscanf_flt -lm
422 \endcode
423
424 A third version is available for environments that are tight on
425 space. In addition to the restrictions of the standard one, this
426 version implements no <tt>%[</tt> specification. This version is
427 provided in the library \c libscanf_min.a, and can be requested using
428 the following options in the link stage:
429
430 \code
431 -Wl,-u,vfscanf -lscanf_min -lm
432 \endcode
433 */
vfscanf(FILE * stream,const CHAR * fmt,va_list ap_orig)434 int vfscanf (FILE * stream, const CHAR *fmt, va_list ap_orig)
435 {
436 unsigned char nconvs;
437 UCHAR c;
438 width_t width;
439 void *addr;
440 #ifdef SCANF_POSITIONAL
441 my_va_list my_ap;
442 #define ap my_ap.ap
443 va_copy(ap, ap_orig);
444 #else
445 #define ap ap_orig
446 #endif
447 uint16_t flags;
448 INT i;
449 int scanf_len = 0;
450 #define lenp (&scanf_len)
451
452 nconvs = 0;
453
454 /* Initialization of stream_flags at each pass simplifies the register
455 allocation with GCC 3.3 - 4.2. Only the GCC 4.3 is good to move it
456 to the begin. */
457 while ((c = *fmt++) != 0) {
458
459 if (ISSPACE (c)) {
460 skip_spaces (stream, lenp);
461
462 } else if (c != '%'
463 || (c = *fmt++) == '%')
464 {
465 /* Ordinary character. */
466 if (IS_EOF(i = scanf_getc (stream, lenp)))
467 goto eof;
468 if ((UCHAR)i != c) {
469 scanf_ungetc (i, stream, lenp);
470 break;
471 }
472
473 } else {
474 flags = 0;
475
476 if (c == '*') {
477 flags = FL_STAR;
478 c = *fmt++;
479 }
480
481 for (;;) {
482 width = 0;
483 while ((c -= '0') < 10) {
484 flags |= FL_WIDTH;
485 width = width * 10 + c;
486 c = *fmt++;
487 }
488 c += '0';
489 if (flags & FL_WIDTH) {
490 #ifdef SCANF_POSITIONAL
491 if (c == '$') {
492 flags &= ~FL_WIDTH;
493 va_end(ap);
494 va_copy(ap, ap_orig);
495 skip_to_arg(&my_ap, width);
496 c = *fmt++;
497 continue;
498 }
499 #endif
500 /* C99 says that width must be greater than zero.
501 To simplify program do treat 0 as error in format. */
502 if (!width) break;
503 } else {
504 width = ~0;
505 }
506 break;
507 }
508
509 switch (c) {
510 case 'h':
511 flags |= FL_SHORT;
512 c = *fmt++;
513 if (c == 'h') {
514 flags |= FL_CHAR;
515 c = *fmt++;
516 }
517 break;
518 case 'l':
519 flags |= FL_LONG;
520 c = *fmt++;
521 #ifdef SCANF_LONGLONG
522 if (c == 'l') {
523 flags |= FL_LONGLONG;
524 c = *fmt++;
525 }
526 #endif
527 break;
528 #ifdef SCANF_LONGLONG
529 case 'L':
530 flags |= FL_LONGLONG;
531 c = *fmt++;
532 break;
533 #endif
534 #ifdef _WANT_IO_C99_FORMATS
535 #ifdef SCANF_LONGLONG
536 #define CHECK_LONGLONG(type) \
537 else if (sizeof(type) == sizeof(long long)) \
538 flags |= FL_LONGLONG
539 #else
540 #define CHECK_LONGLONG(type
541 #endif
542
543 #define CHECK_INT_SIZE(letter, type) \
544 case letter: \
545 if (sizeof(type) != sizeof(int)) { \
546 if (sizeof(type) == sizeof(long)) \
547 flags |= FL_LONG; \
548 else if (sizeof(type) == sizeof(short)) \
549 flags |= FL_SHORT; \
550 CHECK_LONGLONG(type); \
551 } \
552 c = *fmt++; \
553 break;
554
555 CHECK_INT_SIZE('j', intmax_t);
556 CHECK_INT_SIZE('z', size_t);
557 CHECK_INT_SIZE('t', ptrdiff_t);
558 #endif
559 }
560
561 #ifdef _WANT_IO_PERCENT_B
562 #define CNV_BASE "cdinopsuxXb"
563 #else
564 #define CNV_BASE "cdinopsuxX"
565 #endif
566
567 #if SCANF_BRACKET
568 # define CNV_BRACKET "["
569 #else
570 # define CNV_BRACKET ""
571 #endif
572 #if SCANF_FLOAT
573 # define CNV_FLOAT "efgEFG"
574 #else
575 # define CNV_FLOAT ""
576 #endif
577 #define CNV_LIST CNV_BASE CNV_BRACKET CNV_FLOAT
578 if (!c || !strchr (CNV_LIST, c))
579 break;
580
581 addr = (flags & FL_STAR) ? 0 : va_arg (ap, void *);
582
583 if (c == 'n') {
584 putval (addr, (unsigned)(scanf_len), flags);
585 continue;
586 }
587
588 if (c == 'c') {
589 if (!(flags & FL_WIDTH)) width = 1;
590 do {
591 if (IS_EOF(i = scanf_getc (stream, lenp)))
592 goto eof;
593 if (addr) {
594 #ifdef WIDE_CHARS
595 if (flags & FL_LONG) {
596 *(wchar_t *)addr = i;
597 addr = (wchar_t *)addr + 1;
598 } else
599 #endif
600 {
601 *(char *)addr = i;
602 addr = (char*)addr + 1;
603 }
604 }
605 } while (--width);
606 c = 1; /* no matter with smart GCC */
607
608 #if SCANF_BRACKET
609 } else if (c == '[') {
610 fmt = conv_brk (stream, lenp, width, addr, fmt);
611 c = (fmt != 0);
612 #endif
613
614 } else {
615
616 unsigned int base = 0;
617
618 if (IS_EOF(skip_spaces (stream, lenp)))
619 goto eof;
620
621 switch (c) {
622
623 case 's':
624 /* Now we have 1 nospace symbol. */
625 do {
626 if (IS_EOF(i = scanf_getc (stream, lenp)))
627 break;
628 if (ISSPACE (i)) {
629 scanf_ungetc (i, stream, lenp);
630 break;
631 }
632 if (addr) {
633 #ifdef WIDE_CHARS
634 if (flags & FL_LONG) {
635 *(wchar_t *)addr = i;
636 addr = (wchar_t *)addr + 1;
637 } else
638 #endif
639 {
640 *(char *)addr = i;
641 addr = (char*)addr + 1;
642 }
643 }
644 } while (--width);
645 if (addr){
646 #ifdef WIDE_CHARS
647 if (flags & FL_LONG)
648 *(wchar_t *)addr = 0;
649 else
650 #endif
651 *(char *)addr = 0;
652 }
653 c = 1; /* no matter with smart GCC */
654 break;
655
656 #if SCANF_FLOAT
657 case 'p':
658 if (sizeof(void *) > sizeof(int))
659 flags |= FL_LONG;
660 FALLTHROUGH;
661 case 'x':
662 case 'X':
663 base = 16;
664 goto conv_int;
665
666 #ifdef _WANT_IO_PERCENT_B
667 case 'b':
668 base = 2;
669 goto conv_int;
670 #endif
671
672 case 'd':
673 case 'u':
674 base = 10;
675 goto conv_int;
676
677 case 'o':
678 base = 8;
679 FALLTHROUGH;
680 case 'i':
681 conv_int:
682 c = conv_int (stream, lenp, width, addr, flags, base);
683 break;
684
685 default: /* e,E,f,F,g,G */
686 c = conv_flt (stream, lenp, width, addr, flags);
687 #else
688 case 'd':
689 case 'u':
690 base = 10;
691 goto conv_int;
692
693 #ifdef _WANT_IO_PERCENT_B
694 case 'b':
695 base = 2;
696 goto conv_int;
697 #endif
698
699 case 'o':
700 base = 8;
701 FALLTHROUGH;
702 case 'i':
703 goto conv_int;
704
705 case 'p':
706 if (sizeof(void *) > sizeof(int))
707 flags |= FL_LONG;
708 FALLTHROUGH;
709 default: /* p,x,X */
710 base = 16;
711 conv_int:
712 c = conv_int (stream, lenp, width, addr, flags, base);
713 #endif
714 }
715 } /* else */
716
717 if (!c) {
718 if (stream->flags & (__SERR | __SEOF))
719 goto eof;
720 break;
721 }
722 if (!(flags & FL_STAR)) nconvs += 1;
723 } /* else */
724 } /* while */
725 #ifdef PRINTF_POSITIONAL
726 va_end(ap);
727 #endif
728 return nconvs;
729
730 eof:
731 #ifdef PRINTF_POSITIONAL
732 va_end(ap);
733 #endif
734 #undef ap
735 return nconvs ? nconvs : EOF;
736 }
737
738 #if defined(FORMAT_DEFAULT_DOUBLE) && !defined(vfscanf)
739 #ifdef _HAVE_ALIAS_ATTRIBUTE
740 __strong_reference(vfscanf, __d_vfscanf);
741 #else
__d_vfscanf(FILE * stream,const char * fmt,va_list ap)742 int __d_vfscanf (FILE * stream, const char *fmt, va_list ap) { return vfscanf(stream, fmt, ap); }
743 #endif
744 #endif
745