1 /* 2 * Copyright (c) 2021 Nordic Semiconductor ASA 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <zephyr/sys/__assert.h> 10 11 #define ASCII_CHAR 0x7F 12 #define SEQUENCE_FIRST_MASK 0xC0 13 #define SEQUENCE_LEN_2_BYTE 0xC0 14 #define SEQUENCE_LEN_3_BYTE 0xE0 15 #define SEQUENCE_LEN_4_BYTE 0xF0 16 utf8_trunc(char * utf8_str)17char *utf8_trunc(char *utf8_str) 18 { 19 const size_t len = strlen(utf8_str); 20 21 if (len == 0U) { 22 /* no-op */ 23 return utf8_str; 24 } 25 26 char *last_byte_p = utf8_str + len - 1U; 27 uint8_t bytes_truncated; 28 char seq_start_byte; 29 30 if ((*last_byte_p & ASCII_CHAR) == *last_byte_p) { 31 /* Not part of an UTF8 sequence, return */ 32 return utf8_str; 33 } 34 35 /* Find the starting byte and NULL-terminate other bytes */ 36 bytes_truncated = 0; 37 while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK && 38 last_byte_p > utf8_str) { 39 last_byte_p--; 40 bytes_truncated++; 41 } 42 bytes_truncated++; /* include the starting byte */ 43 44 /* Verify if the last character actually need to be truncated 45 * Handles the case where the number of bytes in the last UTF8-char 46 * matches the number of bytes we searched for the starting byte 47 */ 48 seq_start_byte = *last_byte_p; 49 if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) { 50 if (bytes_truncated == 4) { 51 return utf8_str; 52 } 53 } else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) { 54 if (bytes_truncated == 3) { 55 return utf8_str; 56 } 57 } else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) { 58 if (bytes_truncated == 2) { 59 return utf8_str; 60 } 61 } 62 63 /* NULL-terminate the unterminated starting byte */ 64 *last_byte_p = '\0'; 65 66 return utf8_str; 67 } 68 utf8_lcpy(char * dst,const char * src,size_t n)69char *utf8_lcpy(char *dst, const char *src, size_t n) 70 { 71 if (n > 0) { 72 strncpy(dst, src, n - 1); 73 dst[n - 1] = '\0'; 74 75 if (n != 1) { 76 utf8_trunc(dst); 77 } 78 } 79 80 return dst; 81 } 82