1 /* 2 * Copyright (c) 2021 Nordic Semiconductor ASA 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <zephyr/sys/__assert.h> 10 11 #define ASCII_CHAR 0x7F 12 #define SEQUENCE_FIRST_MASK 0xC0 13 #define SEQUENCE_LEN_2_BYTE 0xC0 14 #define SEQUENCE_LEN_3_BYTE 0xE0 15 #define SEQUENCE_LEN_4_BYTE 0xF0 16 utf8_trunc(char * utf8_str)17char *utf8_trunc(char *utf8_str) 18 { 19 char *last_byte_p = utf8_str + strlen(utf8_str) - 1; 20 uint8_t bytes_truncated; 21 char seq_start_byte; 22 23 if ((*last_byte_p & ASCII_CHAR) == *last_byte_p) { 24 /* Not part of an UTF8 sequence, return */ 25 return utf8_str; 26 } 27 28 /* Find the starting byte and NULL-terminate other bytes */ 29 bytes_truncated = 0; 30 while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK && 31 last_byte_p > utf8_str) { 32 last_byte_p--; 33 bytes_truncated++; 34 } 35 bytes_truncated++; /* include the starting byte */ 36 37 /* Verify if the the last character actually need to be truncated 38 * Handles the case where the number of bytes in the last UTF8-char 39 * matches the number of bytes we searched for the starting byte 40 */ 41 seq_start_byte = *last_byte_p; 42 if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) { 43 if (bytes_truncated == 4) { 44 return utf8_str; 45 } 46 } else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) { 47 if (bytes_truncated == 3) { 48 return utf8_str; 49 } 50 } else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) { 51 if (bytes_truncated == 2) { 52 return utf8_str; 53 } 54 } 55 56 /* NULL-terminate the unterminated starting byte */ 57 *last_byte_p = '\0'; 58 59 return utf8_str; 60 } 61 utf8_lcpy(char * dst,const char * src,size_t n)62char *utf8_lcpy(char *dst, const char *src, size_t n) 63 { 64 if (n > 0) { 65 strncpy(dst, src, n - 1); 66 dst[n - 1] = '\0'; 67 68 if (n != 1) { 69 utf8_trunc(dst); 70 } 71 } 72 73 return dst; 74 } 75