1 /*
2  * Copyright (c) 2021 Nordic Semiconductor ASA
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <stdint.h>
8 #include <string.h>
9 #include <zephyr/sys/__assert.h>
10 
11 #define ASCII_CHAR 0x7F
12 #define SEQUENCE_FIRST_MASK 0xC0
13 #define SEQUENCE_LEN_2_BYTE 0xC0
14 #define SEQUENCE_LEN_3_BYTE 0xE0
15 #define SEQUENCE_LEN_4_BYTE 0xF0
16 
utf8_trunc(char * utf8_str)17 char *utf8_trunc(char *utf8_str)
18 {
19 	const size_t len = strlen(utf8_str);
20 
21 	if (len == 0U) {
22 		/* no-op */
23 		return utf8_str;
24 	}
25 
26 	char *last_byte_p = utf8_str + len - 1U;
27 	uint8_t bytes_truncated;
28 	char seq_start_byte;
29 
30 	if ((*last_byte_p & ASCII_CHAR) == *last_byte_p) {
31 		/* Not part of an UTF8 sequence, return */
32 		return utf8_str;
33 	}
34 
35 	/* Find the starting byte and NULL-terminate other bytes */
36 	bytes_truncated = 0;
37 	while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK &&
38 	       last_byte_p > utf8_str) {
39 		last_byte_p--;
40 		bytes_truncated++;
41 	}
42 	bytes_truncated++; /* include the starting byte */
43 
44 	/* Verify if the last character actually need to be truncated
45 	 * Handles the case where the number of bytes in the last UTF8-char
46 	 * matches the number of bytes we searched for the starting byte
47 	 */
48 	seq_start_byte = *last_byte_p;
49 	if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) {
50 		if (bytes_truncated == 4) {
51 			return utf8_str;
52 		}
53 	} else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) {
54 		if (bytes_truncated == 3) {
55 			return utf8_str;
56 		}
57 	} else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) {
58 		if (bytes_truncated == 2) {
59 			return utf8_str;
60 		}
61 	}
62 
63 	/* NULL-terminate the unterminated starting byte */
64 	*last_byte_p = '\0';
65 
66 	return utf8_str;
67 }
68 
utf8_lcpy(char * dst,const char * src,size_t n)69 char *utf8_lcpy(char *dst, const char *src, size_t n)
70 {
71 	if (n > 0) {
72 		strncpy(dst, src, n - 1);
73 		dst[n - 1] = '\0';
74 
75 		if (n != 1) {
76 			utf8_trunc(dst);
77 		}
78 	}
79 
80 	return dst;
81 }
82