/* * Encoding and decoding basic formats: hex, base64. * * These are in-place operations which may allow an optimized implementation. * * Base-64: https://tools.ietf.org/html/rfc4648#section-4 */ #include "duk_internal.h" /* Shared handling for encode/decode argument. Fast path handling for * buffer and string values because they're the most common. In particular, * avoid creating a temporary string or buffer when possible. */ DUK_LOCAL const duk_uint8_t *duk__prep_codec_arg(duk_context *ctx, duk_idx_t index, duk_size_t *out_len) { DUK_ASSERT(duk_is_valid_index(ctx, index)); /* checked by caller */ if (duk_is_buffer(ctx, index)) { return (const duk_uint8_t *) duk_get_buffer(ctx, index, out_len); } else { return (const duk_uint8_t *) duk_to_lstring(ctx, index, out_len); } } #if defined(DUK_USE_BASE64_FASTPATH) DUK_LOCAL void duk__base64_encode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst) { duk_uint_t t; duk_size_t n_full, n_full3, n_final; const duk_uint8_t *src_end_fast; n_full = srclen / 3; /* full 3-byte -> 4-char conversions */ n_full3 = n_full * 3; n_final = srclen - n_full3; DUK_ASSERT_DISABLE(n_final >= 0); DUK_ASSERT(n_final <= 2); src_end_fast = src + n_full3; while (DUK_UNLIKELY(src != src_end_fast)) { t = (duk_uint_t) (*src++); t = (t << 8) + (duk_uint_t) (*src++); t = (t << 8) + (duk_uint_t) (*src++); *dst++ = duk_base64_enctab[t >> 18]; *dst++ = duk_base64_enctab[(t >> 12) & 0x3f]; *dst++ = duk_base64_enctab[(t >> 6) & 0x3f]; *dst++ = duk_base64_enctab[t & 0x3f]; #if 0 /* Tested: not faster on x64 */ /* aaaaaabb bbbbcccc ccdddddd */ dst[0] = duk_base64_enctab[(src[0] >> 2) & 0x3f]; dst[1] = duk_base64_enctab[((src[0] << 4) & 0x30) | ((src[1] >> 4) & 0x0f)]; dst[2] = duk_base64_enctab[((src[1] << 2) & 0x3f) | ((src[2] >> 6) & 0x03)]; dst[3] = duk_base64_enctab[src[2] & 0x3f]; src += 3; dst += 4; #endif } switch (n_final) { /* case 0: nop */ case 1: { /* XX== */ t = (duk_uint_t) (*src++); *dst++ = duk_base64_enctab[t >> 2]; /* XXXXXX-- */ *dst++ = duk_base64_enctab[(t << 4) & 0x3f]; /* ------XX */ *dst++ = DUK_ASC_EQUALS; *dst++ = DUK_ASC_EQUALS; break; } case 2: { /* XXX= */ t = (duk_uint_t) (*src++); t = (t << 8) + (duk_uint_t) (*src++); *dst++ = duk_base64_enctab[t >> 10]; /* XXXXXX-- -------- */ *dst++ = duk_base64_enctab[(t >> 4) & 0x3f]; /* ------XX XXXX---- */ *dst++ = duk_base64_enctab[(t << 2) & 0x3f]; /* -------- ----XXXX */ *dst++ = DUK_ASC_EQUALS; break; } } } #else /* DUK_USE_BASE64_FASTPATH */ DUK_LOCAL void duk__base64_encode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst) { duk_small_uint_t i, snip; duk_uint_t t; duk_uint_fast8_t x, y; const duk_uint8_t *src_end; src_end = src + srclen; while (src < src_end) { /* read 3 bytes into 't', padded by zero */ snip = 4; t = 0; for (i = 0; i < 3; i++) { t = t << 8; if (src >= src_end) { snip--; } else { t += (duk_uint_t) (*src++); } } /* * Missing bytes snip base64 example * 0 4 XXXX * 1 3 XXX= * 2 2 XX== */ DUK_ASSERT(snip >= 2 && snip <= 4); for (i = 0; i < 4; i++) { x = (duk_uint_fast8_t) ((t >> 18) & 0x3f); t = t << 6; /* A straightforward 64-byte lookup would be faster * and cleaner, but this is shorter. */ if (i >= snip) { y = '='; } else if (x <= 25) { y = x + 'A'; } else if (x <= 51) { y = x - 26 + 'a'; } else if (x <= 61) { y = x - 52 + '0'; } else if (x == 62) { y = '+'; } else { y = '/'; } *dst++ = (duk_uint8_t) y; } } } #endif /* DUK_USE_BASE64_FASTPATH */ #if defined(DUK_USE_BASE64_FASTPATH) DUK_LOCAL duk_bool_t duk__base64_decode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst, duk_uint8_t **out_dst_final) { duk_int_t x; duk_int_t t; duk_small_uint_t n_equal; duk_small_uint_t n_chars; const duk_uint8_t *src_end; const duk_uint8_t *src_end_safe; src_end = src + srclen; src_end_safe = src_end - 4; /* if 'src < src_end_safe', safe to read 4 bytes */ /* Innermost fast path processes 4 valid base-64 characters at a time * but bails out on whitespace, padding chars ('=') and invalid chars. * Once the slow path segment has been processed, we return to the * inner fast path again. This handles e.g. base64 with newlines * reasonably well because the majority of a line is in the fast path. */ for (;;) { /* Fast path, handle units with just actual encoding characters. */ while (src <= src_end_safe) { /* The lookup byte is intentionally sign extended to (at least) * 32 bits and then ORed. This ensures that is at least 1 byte * is negative, the highest bit of 't' will be set at the end * and we don't need to check every byte. */ DUK_DDD(DUK_DDDPRINT("fast loop: src=%p, src_end_safe=%p, src_end=%p", (const void *) src, (const void *) src_end_safe, (const void *) src_end)); t = (duk_int_t) duk_base64_dectab[*src++]; t = (t << 6) | (duk_int_t) duk_base64_dectab[*src++]; t = (t << 6) | (duk_int_t) duk_base64_dectab[*src++]; t = (t << 6) | (duk_int_t) duk_base64_dectab[*src++]; if (DUK_UNLIKELY(t < 0)) { DUK_DDD(DUK_DDDPRINT("fast loop unit was not clean, process one slow path unit")); src -= 4; break; } DUK_ASSERT(t <= 0xffffffL); DUK_ASSERT((t >> 24) == 0); *dst++ = (duk_uint8_t) (t >> 16); *dst++ = (duk_uint8_t) ((t >> 8) & 0xff); *dst++ = (duk_uint8_t) (t & 0xff); } /* Handle one slow path unit (or finish if we're done). */ n_equal = 0; n_chars = 0; t = 0; for (;;) { DUK_DDD(DUK_DDDPRINT("slow loop: src=%p, src_end=%p, n_chars=%ld, n_equal=%ld, t=%ld", (const void *) src, (const void *) src_end, (long) n_chars, (long) n_equal, (long) t)); if (DUK_UNLIKELY(src >= src_end)) { goto done; /* two level break */ } x = duk_base64_dectab[*src++]; if (DUK_UNLIKELY(x < 0)) { if (x == -2) { continue; /* allowed ascii whitespace */ } else if (x == -3) { n_equal++; t <<= 6; } else { DUK_ASSERT(x == -1); goto error; } } else { DUK_ASSERT(x >= 0 && x <= 63); if (n_equal > 0) { /* Don't allow actual chars after equal sign. */ goto error; } t = (t << 6) + x; } if (DUK_UNLIKELY(n_chars == 3)) { /* Emit 3 bytes and backtrack if there was padding. There's * always space for the whole 3 bytes so no check needed. */ DUK_ASSERT(t <= 0xffffffL); DUK_ASSERT((t >> 24) == 0); *dst++ = (duk_uint8_t) (t >> 16); *dst++ = (duk_uint8_t) ((t >> 8) & 0xff); *dst++ = (duk_uint8_t) (t & 0xff); if (DUK_UNLIKELY(n_equal > 0)) { DUK_ASSERT(n_equal <= 4); /* There may be whitespace between the equal signs. */ if (n_equal == 1) { /* XXX= */ dst -= 1; } else if (n_equal == 2) { /* XX== */ dst -= 2; } else { goto error; /* invalid padding */ } /* Continue parsing after padding, allows concatenated, * padded base64. */ } break; /* back to fast loop */ } else { n_chars++; } } } done: DUK_DDD(DUK_DDDPRINT("done; src=%p, src_end=%p, n_chars=%ld", (const void *) src, (const void *) src_end, (long) n_chars)); DUK_ASSERT(src == src_end); if (n_chars != 0) { /* Here we'd have the option of decoding unpadded base64 * (e.g. "xxxxyy" instead of "xxxxyy==". Currently not * accepted. */ goto error; } *out_dst_final = dst; return 1; error: return 0; } #else /* DUK_USE_BASE64_FASTPATH */ DUK_LOCAL duk_bool_t duk__base64_decode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst, duk_uint8_t **out_dst_final) { duk_uint_t t; duk_uint_fast8_t x, y; duk_small_uint_t group_idx; duk_small_uint_t n_equal; const duk_uint8_t *src_end; src_end = src + srclen; t = 0; group_idx = 0; n_equal = 0; while (src < src_end) { x = *src++; if (x >= 'A' && x <= 'Z') { y = x - 'A' + 0; } else if (x >= 'a' && x <= 'z') { y = x - 'a' + 26; } else if (x >= '0' && x <= '9') { y = x - '0' + 52; } else if (x == '+') { y = 62; } else if (x == '/') { y = 63; } else if (x == '=') { /* We don't check the zero padding bytes here right now * (that they're actually zero). This seems to be common * behavior for base-64 decoders. */ n_equal++; t <<= 6; /* shift in zeroes */ goto skip_add; } else if (x == 0x09 || x == 0x0a || x == 0x0d || x == 0x20) { /* allow basic ASCII whitespace */ continue; } else { goto error; } if (n_equal > 0) { /* Don't allow mixed padding and actual chars. */ goto error; } t = (t << 6) + y; skip_add: if (group_idx == 3) { /* output 3 bytes from 't' */ *dst++ = (duk_uint8_t) ((t >> 16) & 0xff); *dst++ = (duk_uint8_t) ((t >> 8) & 0xff); *dst++ = (duk_uint8_t) (t & 0xff); if (DUK_UNLIKELY(n_equal > 0)) { /* Backtrack. */ DUK_ASSERT(n_equal <= 4); if (n_equal == 1) { dst -= 1; } else if (n_equal == 2) { dst -= 2; } else { goto error; /* invalid padding */ } /* Here we can choose either to end parsing and ignore * whatever follows, or to continue parsing in case * multiple (possibly padded) base64 strings have been * concatenated. Currently, keep on parsing. */ n_equal = 0; } t = 0; group_idx = 0; } else { group_idx++; } } if (group_idx != 0) { /* Here we'd have the option of decoding unpadded base64 * (e.g. "xxxxyy" instead of "xxxxyy==". Currently not * accepted. */ goto error; } *out_dst_final = dst; return 1; error: return 0; } #endif /* DUK_USE_BASE64_FASTPATH */ DUK_EXTERNAL const char *duk_base64_encode(duk_context *ctx, duk_idx_t index) { duk_hthread *thr = (duk_hthread *) ctx; const duk_uint8_t *src; duk_size_t srclen; duk_size_t dstlen; duk_uint8_t *dst; const char *ret; DUK_ASSERT_CTX_VALID(ctx); /* XXX: optimize for string inputs: no need to coerce to a buffer * which makes a copy of the input. */ index = duk_require_normalize_index(ctx, index); src = duk__prep_codec_arg(ctx, index, &srclen); /* Note: for srclen=0, src may be NULL */ /* Computation must not wrap; this limit works for 32-bit size_t: * >>> srclen = 3221225469 * >>> '%x' % ((srclen + 2) / 3 * 4) * 'fffffffc' */ if (srclen > 3221225469UL) { goto type_error; } dstlen = (srclen + 2) / 3 * 4; dst = (duk_uint8_t *) duk_push_fixed_buffer(ctx, dstlen); duk__base64_encode_helper((const duk_uint8_t *) src, srclen, dst); ret = duk_to_string(ctx, -1); duk_replace(ctx, index); return ret; type_error: DUK_ERROR_TYPE(thr, DUK_STR_ENCODE_FAILED); return NULL; /* never here */ } DUK_EXTERNAL void duk_base64_decode(duk_context *ctx, duk_idx_t index) { duk_hthread *thr = (duk_hthread *) ctx; const duk_uint8_t *src; duk_size_t srclen; duk_size_t dstlen; duk_uint8_t *dst; duk_uint8_t *dst_final; duk_bool_t retval; DUK_ASSERT_CTX_VALID(ctx); /* XXX: optimize for buffer inputs: no need to coerce to a string * which causes an unnecessary interning. */ index = duk_require_normalize_index(ctx, index); src = duk__prep_codec_arg(ctx, index, &srclen); /* Computation must not wrap, only srclen + 3 is at risk of * wrapping because after that the number gets smaller. * This limit works for 32-bit size_t: * 0x100000000 - 3 - 1 = 4294967292 */ if (srclen > 4294967292UL) { goto type_error; } dstlen = (srclen + 3) / 4 * 3; /* upper limit, assuming no whitespace etc */ dst = (duk_uint8_t *) duk_push_dynamic_buffer(ctx, dstlen); /* Note: for dstlen=0, dst may be NULL */ retval = duk__base64_decode_helper((const duk_uint8_t *) src, srclen, dst, &dst_final); if (!retval) { goto type_error; } /* XXX: convert to fixed buffer? */ (void) duk_resize_buffer(ctx, -1, (duk_size_t) (dst_final - dst)); duk_replace(ctx, index); return; type_error: DUK_ERROR_TYPE(thr, DUK_STR_DECODE_FAILED); } DUK_EXTERNAL const char *duk_hex_encode(duk_context *ctx, duk_idx_t index) { const duk_uint8_t *inp; duk_size_t len; duk_size_t i; duk_uint8_t *buf; const char *ret; #if defined(DUK_USE_HEX_FASTPATH) duk_size_t len_safe; duk_uint16_t *p16; #endif DUK_ASSERT_CTX_VALID(ctx); index = duk_require_normalize_index(ctx, index); inp = duk__prep_codec_arg(ctx, index, &len); DUK_ASSERT(inp != NULL || len == 0); /* Fixed buffer, no zeroing because we'll fill all the data. */ buf = (duk_uint8_t *) duk_push_buffer_raw(ctx, len * 2, DUK_BUF_FLAG_NOZERO /*flags*/); DUK_ASSERT(buf != NULL); #if defined(DUK_USE_HEX_FASTPATH) DUK_ASSERT((((duk_size_t) buf) & 0x01U) == 0); /* pointer is aligned, guaranteed for fixed buffer */ p16 = (duk_uint16_t *) (void *) buf; len_safe = len & ~0x03U; for (i = 0; i < len_safe; i += 4) { p16[0] = duk_hex_enctab[inp[i]]; p16[1] = duk_hex_enctab[inp[i + 1]]; p16[2] = duk_hex_enctab[inp[i + 2]]; p16[3] = duk_hex_enctab[inp[i + 3]]; p16 += 4; } for (; i < len; i++) { *p16++ = duk_hex_enctab[inp[i]]; } #else /* DUK_USE_HEX_FASTPATH */ for (i = 0; i < len; i++) { duk_small_uint_t t; t = (duk_small_uint_t) inp[i]; buf[i*2 + 0] = duk_lc_digits[t >> 4]; buf[i*2 + 1] = duk_lc_digits[t & 0x0f]; } #endif /* DUK_USE_HEX_FASTPATH */ /* XXX: Using a string return value forces a string intern which is * not always necessary. As a rough performance measure, hex encode * time for tests/perf/test-hex-encode.js dropped from ~35s to ~15s * without string coercion. Change to returning a buffer and let the * caller coerce to string if necessary? */ ret = duk_to_string(ctx, -1); duk_replace(ctx, index); return ret; } DUK_EXTERNAL void duk_hex_decode(duk_context *ctx, duk_idx_t index) { duk_hthread *thr = (duk_hthread *) ctx; const duk_uint8_t *inp; duk_size_t len; duk_size_t i; duk_int_t t; duk_uint8_t *buf; #if defined(DUK_USE_HEX_FASTPATH) duk_int_t chk; duk_uint8_t *p; duk_size_t len_safe; #endif DUK_ASSERT_CTX_VALID(ctx); index = duk_require_normalize_index(ctx, index); inp = duk__prep_codec_arg(ctx, index, &len); DUK_ASSERT(inp != NULL || len == 0); if (len & 0x01) { goto type_error; } /* Fixed buffer, no zeroing because we'll fill all the data. */ buf = (duk_uint8_t *) duk_push_buffer_raw(ctx, len / 2, DUK_BUF_FLAG_NOZERO /*flags*/); DUK_ASSERT(buf != NULL); #if defined(DUK_USE_HEX_FASTPATH) p = buf; len_safe = len & ~0x07U; for (i = 0; i < len_safe; i += 8) { t = ((duk_int_t) duk_hex_dectab_shift4[inp[i]]) | ((duk_int_t) duk_hex_dectab[inp[i + 1]]); chk = t; p[0] = (duk_uint8_t) t; t = ((duk_int_t) duk_hex_dectab_shift4[inp[i + 2]]) | ((duk_int_t) duk_hex_dectab[inp[i + 3]]); chk |= t; p[1] = (duk_uint8_t) t; t = ((duk_int_t) duk_hex_dectab_shift4[inp[i + 4]]) | ((duk_int_t) duk_hex_dectab[inp[i + 5]]); chk |= t; p[2] = (duk_uint8_t) t; t = ((duk_int_t) duk_hex_dectab_shift4[inp[i + 6]]) | ((duk_int_t) duk_hex_dectab[inp[i + 7]]); chk |= t; p[3] = (duk_uint8_t) t; p += 4; /* Check if any lookup above had a negative result. */ if (DUK_UNLIKELY(chk < 0)) { goto type_error; } } for (; i < len; i += 2) { t = (((duk_int_t) duk_hex_dectab[inp[i]]) << 4) | ((duk_int_t) duk_hex_dectab[inp[i + 1]]); if (DUK_UNLIKELY(t < 0)) { goto type_error; } *p++ = (duk_uint8_t) t; } #else /* DUK_USE_HEX_FASTPATH */ for (i = 0; i < len; i += 2) { /* For invalid characters the value -1 gets extended to * at least 16 bits. If either nybble is invalid, the * resulting 't' will be < 0. */ t = (((duk_int_t) duk_hex_dectab[inp[i]]) << 4) | ((duk_int_t) duk_hex_dectab[inp[i + 1]]); if (DUK_UNLIKELY(t < 0)) { goto type_error; } buf[i >> 1] = (duk_uint8_t) t; } #endif /* DUK_USE_HEX_FASTPATH */ duk_replace(ctx, index); return; type_error: DUK_ERROR_TYPE(thr, DUK_STR_DECODE_FAILED); } DUK_EXTERNAL const char *duk_json_encode(duk_context *ctx, duk_idx_t index) { #ifdef DUK_USE_ASSERTIONS duk_idx_t top_at_entry; #endif const char *ret; DUK_ASSERT_CTX_VALID(ctx); #ifdef DUK_USE_ASSERTIONS top_at_entry = duk_get_top(ctx); #endif index = duk_require_normalize_index(ctx, index); duk_bi_json_stringify_helper(ctx, index /*idx_value*/, DUK_INVALID_INDEX /*idx_replacer*/, DUK_INVALID_INDEX /*idx_space*/, 0 /*flags*/); DUK_ASSERT(duk_is_string(ctx, -1)); duk_replace(ctx, index); ret = duk_get_string(ctx, index); DUK_ASSERT(duk_get_top(ctx) == top_at_entry); return ret; } DUK_EXTERNAL void duk_json_decode(duk_context *ctx, duk_idx_t index) { #ifdef DUK_USE_ASSERTIONS duk_idx_t top_at_entry; #endif DUK_ASSERT_CTX_VALID(ctx); #ifdef DUK_USE_ASSERTIONS top_at_entry = duk_get_top(ctx); #endif index = duk_require_normalize_index(ctx, index); duk_bi_json_parse_helper(ctx, index /*idx_value*/, DUK_INVALID_INDEX /*idx_reviver*/, 0 /*flags*/); duk_replace(ctx, index); DUK_ASSERT(duk_get_top(ctx) == top_at_entry); }