1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 Intel Corporation
4 **
5 ** Permission is hereby granted, free of charge, to any person obtaining a copy
6 ** of this software and associated documentation files (the "Software"), to deal
7 ** in the Software without restriction, including without limitation the rights
8 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 ** copies of the Software, and to permit persons to whom the Software is
10 ** furnished to do so, subject to the following conditions:
11 **
12 ** The above copyright notice and this permission notice shall be included in
13 ** all copies or substantial portions of the Software.
14 **
15 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 ** THE SOFTWARE.
22 **
23 ****************************************************************************/
24 
25 #define _BSD_SOURCE 1
26 #define _DEFAULT_SOURCE 1
27 #ifndef __STDC_LIMIT_MACROS
28 #  define __STDC_LIMIT_MACROS 1
29 #endif
30 
31 #include "tinycbor/cbor.h"
32 #include "tinycbor/compilersupport_p.h"
33 
34 #include <inttypes.h>
35 #include <float.h>
36 #ifndef CBOR_NO_FLOATING_POINT
37 #include <math.h>
38 #endif
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 
43 #include "tinycbor/math_support_p.h"
44 
45 /**
46  * \defgroup CborPretty Converting CBOR to text
47  * \brief Group of functions used to convert CBOR to text form.
48  *
49  * This group contains two functions that are can be used to convert one
50  * CborValue object to a text representation. This module attempts to follow
51  * the recommendations from RFC 7049 section 6 "Diagnostic Notation", though it
52  * has a few differences. They are noted below.
53  *
54  * TinyCBOR does not provide a way to convert from the text representation back
55  * to encoded form. To produce a text form meant to be parsed, CborToJson is
56  * recommended instead.
57  *
58  * Either of the functions in this section will attempt to convert exactly one
59  * CborValue object to text. Those functions may return any error documented
60  * for the functions for CborParsing. In addition, if the C standard library
61  * stream functions return with error, the text conversion will return with
62  * error CborErrorIO.
63  *
64  * These functions also perform UTF-8 validation in CBOR text strings. If they
65  * encounter a sequence of bytes that not permitted in UTF-8, they will return
66  * CborErrorInvalidUtf8TextString. That includes encoding of surrogate points
67  * in UTF-8.
68  *
69  * \warning The output type produced by these functions is not guaranteed to
70  * remain stable. A future update of TinyCBOR may produce different output for
71  * the same input and parsers may be unable to handle them.
72  *
73  * \sa CborParsing, CborToJson, cbor_parser_init()
74  */
75 
76 /**
77  * \addtogroup CborPretty
78  * @{
79  * <h2 class="groupheader">Text format</h2>
80  *
81  * As described in RFC 7049 section 6 "Diagnostic Notation", the format is
82  * largely borrowed from JSON, but modified to suit CBOR's different data
83  * types. TinyCBOR makes further modifications to distinguish different, but
84  * similar values.
85  *
86  * CBOR values are currently encoded as follows:
87  * \par Integrals (unsigned and negative)
88  *      Base-10 (decimal) text representation of the value
89  * \par Byte strings:
90  *      <tt>"h'"</tt> followed by the Base16 (hex) representation of the binary data, followed by an ending quote (')
91  * \par Text strings:
92  *      C-style escaped string in quotes, with C11/C++11 escaping of Unicode codepoints above U+007F.
93  * \par Tags:
94  *      Tag value, with the tagged value in parentheses. No special encoding of the tagged value is performed.
95  * \par Simple types:
96  *      <tt>"simple(nn)"</tt> where \c nn is the simple value
97  * \par Null:
98  *      \c null
99  * \par Undefined:
100  *      \c undefined
101  * \par Booleans:
102  *      \c true or \c false
103  * \par Floating point:
104  *      If NaN or infinite, the actual words \c NaN or \c infinite.
105  *      Otherwise, the decimal representation with as many digits as necessary to ensure no loss of information,
106  *      with float values suffixed by "f" and half-float values suffixed by "f16" (doubles have no suffix). A dot is always present.
107  * \par Arrays:
108  *      Comma-separated list of elements, enclosed in square brackets ("[" and "]").
109  *     If the array length is indeterminate, an underscore ("_") appears immediately after the opening bracket.
110  * \par Maps:
111  *      Comma-separated list of key-value pairs, with the key and value separated
112  *      by a colon (":"), enclosed in curly braces ("{" and "}").
113  *      If the map length is indeterminate, an underscore ("_") appears immediately after the opening brace.
114  */
115 
hexDump(FILE * out,const uint8_t * buffer,size_t n)116 static int hexDump(FILE *out, const uint8_t *buffer, size_t n)
117 {
118     while (n--) {
119         int r = fprintf(out, "%02" PRIx8, *buffer++);
120         if (r < 0)
121             return r;
122     }
123     return 0;   /* should be n * 2, but we don't have the original n anymore */
124 }
125 
126 /* This function decodes buffer as UTF-8 and prints as escaped UTF-16.
127  * On UTF-8 decoding error, it returns CborErrorInvalidUtf8TextString */
utf8EscapedDump(FILE * out,const char * buffer,size_t n)128 static int utf8EscapedDump(FILE *out, const char *buffer, size_t n)
129 {
130     uint32_t uc;
131     while (n--) {
132         uc = (uint8_t)*buffer++;
133         if (uc < 0x80) {
134             /* single-byte UTF-8 */
135             if (uc < 0x7f && uc >= 0x20 && uc != '\\' && uc != '"') {
136                 if (fprintf(out, "%c", (char)uc) < 0)
137                     return CborErrorIO;
138                 continue;
139             }
140 
141             /* print as an escape sequence */
142             char escaped = (char)uc;
143             switch (uc) {
144             case '"':
145             case '\\':
146                 break;
147             case '\b':
148                 escaped = 'b';
149                 break;
150             case '\f':
151                 escaped = 'f';
152                 break;
153             case '\n':
154                 escaped = 'n';
155                 break;
156             case '\r':
157                 escaped = 'r';
158                 break;
159             case '\t':
160                 escaped = 't';
161                 break;
162             default:
163                 goto print_utf16;
164             }
165             if (fprintf(out, "\\%c", escaped) < 0)
166                 return CborErrorIO;
167             continue;
168         }
169 
170         /* multi-byte UTF-8, decode it */
171         unsigned charsNeeded;
172         uint32_t min_uc;
173         if (unlikely(uc <= 0xC1))
174             return CborErrorInvalidUtf8TextString;
175         if (uc < 0xE0) {
176             /* two-byte UTF-8 */
177             charsNeeded = 2;
178             min_uc = 0x80;
179             uc &= 0x1f;
180         } else if (uc < 0xF0) {
181             /* three-byte UTF-8 */
182             charsNeeded = 3;
183             min_uc = 0x800;
184             uc &= 0x0f;
185         } else if (uc < 0xF5) {
186             /* four-byte UTF-8 */
187             charsNeeded = 4;
188             min_uc = 0x10000;
189             uc &= 0x07;
190         } else {
191             return CborErrorInvalidUtf8TextString;
192         }
193 
194         if (n < charsNeeded - 1)
195             return CborErrorInvalidUtf8TextString;
196 
197         /* first continuation character */
198         uint8_t b = (uint8_t)*buffer++;
199         if ((b & 0xc0) != 0x80)
200             return CborErrorInvalidUtf8TextString;
201         uc <<= 6;
202         uc |= b & 0x3f;
203 
204         if (charsNeeded > 2) {
205             /* second continuation character */
206             b = (uint8_t)*buffer++;
207             if ((b & 0xc0) != 0x80)
208                 return CborErrorInvalidUtf8TextString;
209             uc <<= 6;
210             uc |= b & 0x3f;
211 
212             if (charsNeeded > 3) {
213                 /* third continuation character */
214                 b = (uint8_t)*buffer++;
215                 if ((b & 0xc0) != 0x80)
216                     return CborErrorInvalidUtf8TextString;
217                 uc <<= 6;
218                 uc |= b & 0x3f;
219             }
220         }
221 
222         /* overlong sequence? surrogate pair? out or range? */
223         if (uc < min_uc || uc - 0xd800U < 2048U || uc > 0x10ffff)
224             return CborErrorInvalidUtf8TextString;
225 
226         /* now print the sequence */
227         if (charsNeeded > 3) {
228             /* needs surrogate pairs */
229             if (fprintf(out, "\\u%04" PRIX32 "\\u%04" PRIX32,
230                         (uc >> 10) + 0xd7c0,    /* high surrogate */
231                         (uc % 0x0400) + 0xdc00) < 0)
232                 return CborErrorIO;
233         } else {
234 print_utf16:
235             /* no surrogate pair needed */
236             if (fprintf(out, "\\u%04" PRIX32, uc) < 0)
237                 return CborErrorIO;
238         }
239     }
240     return CborNoError;
241 }
242 
243 static CborError value_to_pretty(FILE *out, CborValue *it);
container_to_pretty(FILE * out,CborValue * it,CborType containerType)244 static CborError container_to_pretty(FILE *out, CborValue *it, CborType containerType)
245 {
246     const char *comma = "";
247     while (!cbor_value_at_end(it)) {
248         if (fprintf(out, "%s", comma) < 0)
249             return CborErrorIO;
250         comma = ", ";
251 
252         CborError err = value_to_pretty(out, it);
253         if (err)
254             return err;
255 
256         if (containerType == CborArrayType)
257             continue;
258 
259         /* map: that was the key, so get the value */
260         if (fprintf(out, ": ") < 0)
261             return CborErrorIO;
262         err = value_to_pretty(out, it);
263         if (err)
264             return err;
265     }
266     return CborNoError;
267 }
268 
value_to_pretty(FILE * out,CborValue * it)269 static CborError value_to_pretty(FILE *out, CborValue *it)
270 {
271     CborError err = CborNoError;
272     CborType type = cbor_value_get_type(it);
273     switch (type) {
274     case CborArrayType:
275     case CborMapType: {
276         /* recursive type */
277         CborValue recursed;
278 
279         if (fprintf(out, type == CborArrayType ? "[" : "{") < 0)
280             return CborErrorIO;
281         if (!cbor_value_is_length_known(it)) {
282             if (fprintf(out, "_ ") < 0)
283                 return CborErrorIO;
284         }
285 
286         err = cbor_value_enter_container(it, &recursed);
287         if (err) {
288             it->offset = recursed.offset;
289             return err;       /* parse error */
290         }
291         err = container_to_pretty(out, &recursed, type);
292         if (err) {
293             it->offset = recursed.offset;
294             return err;       /* parse error */
295         }
296         err = cbor_value_leave_container(it, &recursed);
297         if (err)
298             return err;       /* parse error */
299 
300         if (fprintf(out, type == CborArrayType ? "]" : "}") < 0)
301             return CborErrorIO;
302         return CborNoError;
303     }
304 
305     case CborIntegerType: {
306         uint64_t val;
307         cbor_value_get_raw_integer(it, &val);    /* can't fail */
308 
309         if (cbor_value_is_unsigned_integer(it)) {
310             if (fprintf(out, "%" PRIu64, val) < 0)
311                 return CborErrorIO;
312         } else {
313             /* CBOR stores the negative number X as -1 - X
314              * (that is, -1 is stored as 0, -2 as 1 and so forth) */
315             if (++val) {                /* unsigned overflow may happen */
316                 if (fprintf(out, "-%" PRIu64, val) < 0)
317                     return CborErrorIO;
318             } else {
319                 /* overflown
320                  *   0xffff`ffff`ffff`ffff + 1 =
321                  * 0x1`0000`0000`0000`0000 = 18446744073709551616 (2^64) */
322                 if (fprintf(out, "-18446744073709551616") < 0)
323                     return CborErrorIO;
324             }
325         }
326         break;
327     }
328 
329     case CborByteStringType:{
330         size_t n = 0;
331         uint8_t *buffer;
332         err = cbor_value_dup_byte_string(it, &buffer, &n, it);
333         if (err)
334             return err;
335 
336         bool failed = fprintf(out, "h'") < 0 || hexDump(out, buffer, n) < 0 || fprintf(out, "'") < 0;
337         free(buffer);
338         return failed ? CborErrorIO : CborNoError;
339     }
340 
341     case CborTextStringType: {
342         size_t n = 0;
343         char *buffer;
344         err = cbor_value_dup_text_string(it, &buffer, &n, it);
345         if (err)
346             return err;
347 
348         err = CborNoError;
349         bool failed = fprintf(out, "\"") < 0
350                       || (err = utf8EscapedDump(out, buffer, n)) != CborNoError
351                       || fprintf(out, "\"") < 0;
352         free(buffer);
353         return err != CborNoError ? err :
354                                     failed ? CborErrorIO : CborNoError;
355     }
356 
357     case CborTagType: {
358         CborTag tag;
359         cbor_value_get_tag(it, &tag);       /* can't fail */
360         if (fprintf(out, "%" PRIu64 "(", tag) < 0)
361             return CborErrorIO;
362         err = cbor_value_advance_fixed(it);
363         if (err)
364             return err;
365         err = value_to_pretty(out, it);
366         if (err)
367             return err;
368         if (fprintf(out, ")") < 0)
369             return CborErrorIO;
370         return CborNoError;
371     }
372 
373     case CborSimpleType: {
374         uint8_t simple_type;
375         cbor_value_get_simple_type(it, &simple_type);  /* can't fail */
376         if (fprintf(out, "simple(%" PRIu8 ")", simple_type) < 0)
377             return CborErrorIO;
378         break;
379     }
380 
381     case CborNullType:
382         if (fprintf(out, "null") < 0)
383             return CborErrorIO;
384         break;
385 
386     case CborUndefinedType:
387         if (fprintf(out, "undefined") < 0)
388             return CborErrorIO;
389         break;
390 
391     case CborBooleanType: {
392         bool val;
393         cbor_value_get_boolean(it, &val);       /* can't fail */
394         if (fprintf(out, val ? "true" : "false") < 0)
395             return CborErrorIO;
396         break;
397     }
398 #ifndef CBOR_NO_FLOATING_POINT
399     case CborDoubleType: {
400         const char *suffix;
401         double val;
402         if (false) {
403             float f;
404     case CborFloatType:
405             cbor_value_get_float(it, &f);
406             val = f;
407             suffix = "f";
408 #ifndef CBOR_NO_HALF_FLOAT_TYPE
409         } else if (false) {
410             uint16_t f16;
411     case CborHalfFloatType:
412             cbor_value_get_half_float(it, &f16);
413             val = decode_half(f16);
414             suffix = "f16";
415 #endif
416         } else {
417             cbor_value_get_double(it, &val);
418             suffix = "";
419         }
420 
421         int r = fpclassify(val);
422         if (r == FP_NAN || r == FP_INFINITE)
423             suffix = "";
424 
425         uint64_t ival = (uint64_t)fabs(val);
426         if (ival == fabs(val)) {
427             /* this double value fits in a 64-bit integer, so show it as such
428              * (followed by a floating point suffix, to disambiguate) */
429             r = fprintf(out, "%s%" PRIu64 ".%s", val < 0 ? "-" : "", ival, suffix);
430         } else {
431             /* this number is definitely not a 64-bit integer */
432             r = fprintf(out, "%." DBL_DECIMAL_DIG_STR "g%s", val, suffix);
433         }
434         if (r < 0)
435             return CborErrorIO;
436         break;
437     }
438 #endif
439     case CborInvalidType:
440     default:
441         if (fprintf(out, "invalid") < 0)
442             return CborErrorIO;
443         return CborErrorUnknownType;
444     }
445 
446     if (err == CborNoError)
447 	err = cbor_value_advance_fixed(it);
448     return err;
449 }
450 
451 /**
452  * \fn CborError cbor_value_to_pretty(FILE *out, const CborValue *value)
453  *
454  * Converts the current CBOR type pointed by \a value to its textual
455  * representation and writes it to the \a out stream. If an error occurs, this
456  * function returns an error code similar to CborParsing.
457  *
458  * \sa cbor_value_to_pretty_advance(), cbor_value_to_json_advance()
459  */
460 
461 /**
462  * Converts the current CBOR type pointed by \a value to its textual
463  * representation and writes it to the \a out stream. If an error occurs, this
464  * function returns an error code similar to CborParsing.
465  *
466  * If no error ocurred, this function advances \a value to the next element.
467  * Often, concatenating the text representation of multiple elements can be
468  * done by appending a comma to the output stream.
469  *
470  * \sa cbor_value_to_pretty(), cbor_value_to_json_advance()
471  */
cbor_value_to_pretty_advance(FILE * out,CborValue * value)472 CborError cbor_value_to_pretty_advance(FILE *out, CborValue *value)
473 {
474     return value_to_pretty(out, value);
475 }
476 
477 /** @} */
478