1 /*
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms are permitted
6 * provided that the above copyright notice and this paragraph are
7 * duplicated in all such forms and that any documentation,
8 * and/or other materials related to such
9 * distribution and use acknowledge that the software was developed
10 * by the University of California, Berkeley. The name of the
11 * University may not be used to endorse or promote products derived
12 * from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 */
17
18 /*
19 FUNCTION
20 <<swscanf>>, <<fwscanf>>, <<wscanf>>---scan and format wide character input
21
22 INDEX
23 wscanf
24 INDEX
25 _wscanf_r
26 INDEX
27 fwscanf
28 INDEX
29 _fwscanf_r
30 INDEX
31 swscanf
32 INDEX
33 _swscanf_r
34
35 SYNOPSIS
36 #include <stdio.h>
37
38 int wscanf(const wchar_t *__restrict <[format]>, ...);
39 int fwscanf(FILE *__restrict <[fd]>,
40 const wchar_t *__restrict <[format]>, ...);
41 int swscanf(const wchar_t *__restrict <[str]>,
42 const wchar_t *__restrict <[format]>, ...);
43
44 int wscanf( const wchar_t *<[format]>, ...);
45 int fwscanf( FILE *<[fd]>,
46 const wchar_t *<[format]>, ...);
47 int swscanf( const wchar_t *<[str]>,
48 const wchar_t *<[format]>, ...);
49
50 DESCRIPTION
51 <<wscanf>> scans a series of input fields from standard input,
52 one wide character at a time. Each field is interpreted according to
53 a format specifier passed to <<wscanf>> in the format string at
54 <<*<[format]>>>. <<wscanf>> stores the interpreted input from
55 each field at the address passed to it as the corresponding argument
56 following <[format]>. You must supply the same number of
57 format specifiers and address arguments as there are input fields.
58
59 There must be sufficient address arguments for the given format
60 specifiers; if not the results are unpredictable and likely
61 disasterous. Excess address arguments are merely ignored.
62
63 <<wscanf>> often produces unexpected results if the input diverges from
64 an expected pattern. Since the combination of <<gets>> or <<fgets>>
65 followed by <<swscanf>> is safe and easy, that is the preferred way
66 to be certain that a program is synchronized with input at the end
67 of a line.
68
69 <<fwscanf>> and <<swscanf>> are identical to <<wscanf>>, other than the
70 source of input: <<fwscanf>> reads from a file, and <<swscanf>>
71 from a string.
72
73 The routines <<_wscanf_r>>, <<_fwscanf_r>>, and <<_swscanf_r>> are reentrant
74 versions of <<wscanf>>, <<fwscanf>>, and <<swscanf>> that take an additional
75 first argument pointing to a reentrancy structure.
76
77 The string at <<*<[format]>>> is a wide character sequence composed
78 of zero or more directives. Directives are composed of
79 one or more whitespace characters, non-whitespace characters,
80 and format specifications.
81
82 Whitespace characters are blank (<< >>), tab (<<\t>>), or
83 newline (<<\n>>).
84 When <<wscanf>> encounters a whitespace character in the format string
85 it will read (but not store) all consecutive whitespace characters
86 up to the next non-whitespace character in the input.
87
88 Non-whitespace characters are all other ASCII characters except the
89 percent sign (<<%>>). When <<wscanf>> encounters a non-whitespace
90 character in the format string it will read, but not store
91 a matching non-whitespace character.
92
93 Format specifications tell <<wscanf>> to read and convert characters
94 from the input field into specific types of values, and store then
95 in the locations specified by the address arguments.
96
97 Trailing whitespace is left unread unless explicitly
98 matched in the format string.
99
100 The format specifiers must begin with a percent sign (<<%>>)
101 and have the following form:
102
103 . %[*][<[width]>][<[size]>]<[type]>
104
105 Each format specification begins with the percent character (<<%>>).
106 The other fields are:
107 O+
108 o *
109
110 an optional marker; if present, it suppresses interpretation and
111 assignment of this input field.
112
113 o <[width]>
114
115 an optional maximum field width: a decimal integer,
116 which controls the maximum number of characters that
117 will be read before converting the current input field. If the
118 input field has fewer than <[width]> characters, <<wscanf>>
119 reads all the characters in the field, and then
120 proceeds with the next field and its format specification.
121
122 If a whitespace or a non-convertable wide character occurs
123 before <[width]> character are read, the characters up
124 to that character are read, converted, and stored.
125 Then <<wscanf>> proceeds to the next format specification.
126
127 o <[size]>
128
129 <<h>>, <<j>>, <<l>>, <<L>>, <<t>>, and <<z>> are optional size
130 characters which override the default way that <<wscanf>>
131 interprets the data type of the corresponding argument.
132
133 @multitable @columnfractions 0.18 0.30 0.52
134 @headitem
135 Modifier
136 @tab
137 Type(s)
138 @tab
139 @item
140 hh
141 @tab
142 d, i, o, u, x, n
143 @tab
144 convert input to char, store in char object
145 @item
146 h
147 @tab
148 d, i, o, u, x, n
149 @tab
150 convert input to short, store in short object
151 @item
152 h
153 @tab
154 e, f, c, s, p
155 @tab
156 no effect
157 @item
158 j
159 @tab
160 d, i, o, u, x, n
161 @tab
162 convert input to intmax_t, store in intmax_t object
163 @item
164 j
165 @tab
166 all others
167 @tab
168 no effect
169 @item
170 l
171 @tab
172 d, i, o, u, x, n
173 @tab
174 convert input to long, store in long object
175 @item
176 l
177 @tab
178 e, f, g
179 @tab
180 convert input to double, store in a double object
181 @item
182 l
183 @tab
184 c, s, [
185 @tab
186 the input is stored in a wchar_t object
187 @item
188 l
189 @tab
190 p
191 @tab
192 no effect
193 @item
194 ll
195 @tab
196 d, i, o, u, x, n
197 @tab
198 convert to long long, store in long long object
199 @item
200 L
201 @tab
202 d, i, o, u, x, n
203 @tab
204 convert to long long, store in long long object
205 @item
206 L
207 @tab
208 e, f, g, E, G
209 @tab
210 convert to long double, store in long double object
211 @item
212 L
213 @tab
214 all others
215 @tab
216 no effect
217 @item
218 t
219 @tab
220 d, i, o, u, x, n
221 @tab
222 convert input to ptrdiff_t, store in ptrdiff_t object
223 @item
224 t
225 @tab
226 all others
227 @tab
228 no effect
229 @item
230 z
231 @tab
232 d, i, o, u, x, n
233 @tab
234 convert input to size_t, store in size_t object
235 @item
236 z
237 @tab
238 all others
239 @tab
240 no effect
241 @end multitable
242
243 o <[type]>
244
245 A character to specify what kind of conversion
246 <<wscanf>> performs. Here is a table of the conversion
247 characters:
248
249 o+
250 o %
251 No conversion is done; the percent character (<<%>>) is stored.
252
253 o c
254 Scans one wide character. Corresponding <[arg]>: <<(char *arg)>>.
255 Otherwise, if an <<l>> specifier is present, the corresponding
256 <[arg]> is a <<(wchar_t *arg)>>.
257
258 o s
259 Reads a character string into the array supplied.
260 Corresponding <[arg]>: <<(char arg[])>>.
261 If an <<l>> specifier is present, the corresponding <[arg]> is a <<(wchar_t *arg)>>.
262
263 o [<[pattern]>]
264 Reads a non-empty character string into memory
265 starting at <[arg]>. This area must be large
266 enough to accept the sequence and a
267 terminating null character which will be added
268 automatically. (<[pattern]> is discussed in the paragraph following
269 this table). Corresponding <[arg]>: <<(char *arg)>>.
270 If an <<l>> specifier is present, the corresponding <[arg]> is
271 a <<(wchar_t *arg)>>.
272
273 o d
274 Reads a decimal integer into the corresponding <[arg]>: <<(int *arg)>>.
275
276 o o
277 Reads an octal integer into the corresponding <[arg]>: <<(int *arg)>>.
278
279 o u
280 Reads an unsigned decimal integer into the corresponding
281 <[arg]>: <<(unsigned int *arg)>>.
282
283 o x,X
284 Read a hexadecimal integer into the corresponding <[arg]>:
285 <<(int *arg)>>.
286
287 o e, f, g
288 Read a floating-point number into the corresponding <[arg]>:
289 <<(float *arg)>>.
290
291 o E, F, G
292 Read a floating-point number into the corresponding <[arg]>:
293 <<(double *arg)>>.
294
295 o i
296 Reads a decimal, octal or hexadecimal integer into the
297 corresponding <[arg]>: <<(int *arg)>>.
298
299 o n
300 Stores the number of characters read in the corresponding
301 <[arg]>: <<(int *arg)>>.
302
303 o p
304 Stores a scanned pointer. ANSI C leaves the details
305 to each implementation; this implementation treats
306 <<%p>> exactly the same as <<%U>>. Corresponding
307 <[arg]>: <<(void **arg)>>.
308 o-
309
310 A <[pattern]> of characters surrounded by square brackets can be used
311 instead of the <<s>> type character. <[pattern]> is a set of
312 characters which define a search set of possible characters making up
313 the <<wscanf>> input field. If the first character in the brackets is a
314 caret (<<^>>), the search set is inverted to include all ASCII characters
315 except those between the brackets. There is no range facility as is
316 defined in the corresponding non-wide character scanf functions.
317 Ranges are not part of the POSIX standard.
318
319 Here are some <[pattern]> examples:
320 o+
321 o %[abcd]
322 matches wide character strings containing only
323 <<a>>, <<b>>, <<c>>, and <<d>>.
324
325 o %[^abcd]
326 matches wide character strings containing any characters except
327 <<a>>, <<b>>, <<c>>, or <<d>>.
328
329 o %[A-DW-Z]
330 Note: No wide character ranges, so this expression matches wide
331 character strings containing <<A>>, <<->>, <<D>>, <<W>>, <<Z>>.
332 o-
333
334 Floating point numbers (for field types <<e>>, <<f>>, <<g>>, <<E>>,
335 <<F>>, <<G>>) must correspond to the following general form:
336
337 . [+/-] ddddd[.]ddd [E|e[+|-]ddd]
338
339 where objects inclosed in square brackets are optional, and <<ddd>>
340 represents decimal, octal, or hexadecimal digits.
341 O-
342
343 RETURNS
344 <<wscanf>> returns the number of input fields successfully
345 scanned, converted and stored; the return value does
346 not include scanned fields which were not stored.
347
348 If <<wscanf>> attempts to read at end-of-file, the return
349 value is <<EOF>>.
350
351 If no fields were stored, the return value is <<0>>.
352
353 <<wscanf>> might stop scanning a particular field before
354 reaching the normal field end character, or may
355 terminate entirely.
356
357 <<wscanf>> stops scanning and storing the current field
358 and moves to the next input field (if any)
359 in any of the following situations:
360
361 O+
362 o The assignment suppressing character (<<*>>) appears
363 after the <<%>> in the format specification; the current
364 input field is scanned but not stored.
365
366 o <[width]> characters have been read (<[width]> is a
367 width specification, a positive decimal integer).
368
369 o The next wide character read cannot be converted
370 under the the current format (for example,
371 if a <<Z>> is read when the format is decimal).
372
373 o The next wide character in the input field does not appear
374 in the search set (or does appear in the inverted search set).
375 O-
376
377 When <<wscanf>> stops scanning the current input field for one of
378 these reasons, the next character is considered unread and
379 used as the first character of the following input field, or the
380 first character in a subsequent read operation on the input.
381
382 <<wscanf>> will terminate under the following circumstances:
383
384 O+
385 o The next wide character in the input field conflicts
386 with a corresponding non-whitespace character in the
387 format string.
388
389 o The next wide character in the input field is <<WEOF>>.
390
391 o The format string has been exhausted.
392 O-
393
394 When the format string contains a wide character sequence that is
395 not part of a format specification, the same wide character
396 sequence must appear in the input; <<wscanf>> will
397 scan but not store the matched characters. If a
398 conflict occurs, the first conflicting wide character remains in the
399 input as if it had never been read.
400
401 PORTABILITY
402 <<wscanf>> is C99, POSIX-1.2008.
403
404 Supporting OS subroutines required: <<close>>, <<fstat>>, <<isatty>>,
405 <<lseek>>, <<read>>, <<sbrk>>, <<write>>.
406 */
407
408 #define _DEFAULT_SOURCE
409 #include <_ansi.h>
410 #include <stdio.h>
411 #include <wchar.h>
412 #include <stdarg.h>
413 #include "local.h"
414
415 int
swscanf(const wchar_t * __restrict str,const wchar_t * __restrict fmt,...)416 swscanf (const wchar_t *__restrict str, const wchar_t *__restrict fmt, ...)
417 {
418 int ret;
419 va_list ap;
420 FILE f;
421
422 f._flags = __SRD | __SSTR;
423 f._bf._base = f._p = (unsigned char *) str;
424 f._bf._size = f._r = wcslen (str) * sizeof (wchar_t);
425 f._read = __seofread;
426 f._ub._base = NULL;
427 f._lb._base = NULL;
428 f._flags2 = 0;
429 f._ur = 0;
430 f._file = -1; /* No file. */
431 va_start (ap, fmt);
432 ret = _ssvfwscanf ( &f, fmt, ap);
433 va_end (ap);
434 return ret;
435 }
436