1 /**
2 * @file lv_text.c
3 *
4 */
5
6 /*********************
7 * INCLUDES
8 *********************/
9 #include "lv_txt.h"
10 #include "lv_math.h"
11 #include "lv_log.h"
12
13 /*********************
14 * DEFINES
15 *********************/
16 #define NO_BREAK_FOUND UINT32_MAX
17
18 /**********************
19 * TYPEDEFS
20 **********************/
21
22 /**********************
23 * STATIC PROTOTYPES
24 **********************/
25 static inline bool is_break_char(uint32_t letter);
26
27 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
28 static uint8_t lv_txt_utf8_size(const char * str);
29 static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni);
30 static uint32_t lv_txt_utf8_conv_wc(uint32_t c);
31 static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i);
32 static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i_start);
33 static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id);
34 static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id);
35 static uint32_t lv_txt_utf8_get_length(const char * txt);
36 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
37 static uint8_t lv_txt_iso8859_1_size(const char * str);
38 static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni);
39 static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c);
40 static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i);
41 static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i_start);
42 static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id);
43 static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id);
44 static uint32_t lv_txt_iso8859_1_get_length(const char * txt);
45 #endif
46 /**********************
47 * STATIC VARIABLES
48 **********************/
49
50 /**********************
51 * GLOBAL VARIABLES
52 **********************/
53 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
54 uint8_t (*_lv_txt_encoded_size)(const char *) = lv_txt_utf8_size;
55 uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t) = lv_txt_unicode_to_utf8;
56 uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t) = lv_txt_utf8_conv_wc;
57 uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *) = lv_txt_utf8_next;
58 uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *) = lv_txt_utf8_prev;
59 uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_utf8_get_byte_id;
60 uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_utf8_get_char_id;
61 uint32_t (*_lv_txt_get_encoded_length)(const char *) = lv_txt_utf8_get_length;
62 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
63 uint8_t (*_lv_txt_encoded_size)(const char *) = lv_txt_iso8859_1_size;
64 uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t) = lv_txt_unicode_to_iso8859_1;
65 uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t) = lv_txt_iso8859_1_conv_wc;
66 uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *) = lv_txt_iso8859_1_next;
67 uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *) = lv_txt_iso8859_1_prev;
68 uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_byte_id;
69 uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_char_id;
70 uint32_t (*_lv_txt_get_encoded_length)(const char *) = lv_txt_iso8859_1_get_length;
71
72 #endif
73
74 /**********************
75 * MACROS
76 **********************/
77
78 /**********************
79 * GLOBAL FUNCTIONS
80 **********************/
81
82 /**
83 * Get size of a text
84 * @param size_res pointer to a 'point_t' variable to store the result
85 * @param text pointer to a text
86 * @param font pointer to font of the text
87 * @param letter_space letter space of the text
88 * @param txt.line_space line space of the text
89 * @param flags settings for the text from 'txt_flag_t' enum
90 * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid
91 * line breaks
92 */
_lv_txt_get_size(lv_point_t * size_res,const char * text,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t line_space,lv_coord_t max_width,lv_txt_flag_t flag)93 void _lv_txt_get_size(lv_point_t * size_res, const char * text, const lv_font_t * font, lv_coord_t letter_space,
94 lv_coord_t line_space, lv_coord_t max_width, lv_txt_flag_t flag)
95 {
96 size_res->x = 0;
97 size_res->y = 0;
98
99 if(text == NULL) return;
100 if(font == NULL) return;
101
102 if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
103
104 uint32_t line_start = 0;
105 uint32_t new_line_start = 0;
106 uint16_t letter_height = lv_font_get_line_height(font);
107
108 /*Calc. the height and longest line*/
109 while(text[line_start] != '\0') {
110 new_line_start += _lv_txt_get_next_line(&text[line_start], font, letter_space, max_width, flag);
111
112 if((unsigned long)size_res->y + (unsigned long)letter_height + (unsigned long)line_space > LV_MAX_OF(lv_coord_t)) {
113 LV_LOG_WARN("lv_txt_get_size: integer overflow while calculating text height");
114 return;
115 }
116 else {
117 size_res->y += letter_height;
118 size_res->y += line_space;
119 }
120
121 /*Calculate the the longest line*/
122 lv_coord_t act_line_length = _lv_txt_get_width(&text[line_start], new_line_start - line_start, font, letter_space,
123 flag);
124
125 size_res->x = LV_MATH_MAX(act_line_length, size_res->x);
126 line_start = new_line_start;
127 }
128
129 /*Make the text one line taller if the last character is '\n' or '\r'*/
130 if((line_start != 0) && (text[line_start - 1] == '\n' || text[line_start - 1] == '\r')) {
131 size_res->y += letter_height + line_space;
132 }
133
134 /*Correction with the last line space or set the height manually if the text is empty*/
135 if(size_res->y == 0)
136 size_res->y = letter_height;
137 else
138 size_res->y -= line_space;
139 }
140
141 /**
142 * Get the next word of text. A word is delimited by break characters.
143 *
144 * If the word cannot fit in the max_width space, obey LV_TXT_LINE_BREAK_LONG_* rules.
145 *
146 * If the next word cannot fit anything, return 0.
147 *
148 * If the first character is a break character, returns the next index.
149 *
150 * Example calls from lv_txt_get_next_line() assuming sufficient max_width and
151 * txt = "Test text\n"
152 * 0123456789
153 *
154 * Calls would be as follows:
155 * 1. Return i=4, pointing at breakchar ' ', for the string "Test"
156 * 2. Return i=5, since i=4 was a breakchar.
157 * 3. Return i=9, pointing at breakchar '\n'
158 * 4. Parenting lv_txt_get_next_line() would detect subsequent '\0'
159 *
160 * TODO: Returned word_w_ptr may overestimate the returned word's width when
161 * max_width is reached. In current usage, this has no impact.
162 *
163 * @param txt a '\0' terminated string
164 * @param font pointer to a font
165 * @param letter_space letter space
166 * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
167 * @param flags settings for the text from 'txt_flag_type' enum
168 * @param[out] word_w_ptr width (in pixels) of the parsed word. May be NULL.
169 * @param force Force return the fraction of the word that can fit in the provided space.
170 * @return the index of the first char of the next word (in byte index not letter index. With UTF-8 they are different)
171 */
lv_txt_get_next_word(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_txt_flag_t flag,uint32_t * word_w_ptr,lv_txt_cmd_state_t * cmd_state,bool force)172 static uint32_t lv_txt_get_next_word(const char * txt, const lv_font_t * font,
173 lv_coord_t letter_space, lv_coord_t max_width,
174 lv_txt_flag_t flag, uint32_t * word_w_ptr, lv_txt_cmd_state_t * cmd_state, bool force)
175 {
176 if(txt == NULL || txt[0] == '\0') return 0;
177 if(font == NULL) return 0;
178
179 if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
180
181 uint32_t i = 0, i_next = 0, i_next_next = 0; /* Iterating index into txt */
182 uint32_t letter = 0; /* Letter at i */
183 uint32_t letter_next = 0; /* Letter at i_next */
184 lv_coord_t letter_w;
185 lv_coord_t cur_w = 0; /* Pixel Width of transversed string */
186 uint32_t word_len = 0; /* Number of characters in the transversed word */
187 uint32_t break_index = NO_BREAK_FOUND; /* only used for "long" words */
188 uint32_t break_letter_count = 0; /* Number of characters up to the long word break point */
189
190 letter = _lv_txt_encoded_next(txt, &i_next);
191 i_next_next = i_next;
192
193 /* Obtain the full word, regardless if it fits or not in max_width */
194 while(txt[i] != '\0') {
195 letter_next = _lv_txt_encoded_next(txt, &i_next_next);
196 word_len++;
197
198 /*Handle the recolor command*/
199 if((flag & LV_TXT_FLAG_RECOLOR) != 0) {
200 if(_lv_txt_is_cmd(cmd_state, letter) != false) {
201 i = i_next;
202 i_next = i_next_next;
203 letter = letter_next;
204 continue; /*Skip the letter is it is part of a command*/
205 }
206 }
207
208 letter_w = lv_font_get_glyph_width(font, letter, letter_next);
209 cur_w += letter_w;
210
211 if(letter_w > 0) {
212 cur_w += letter_space;
213 }
214
215 /* Test if this character fits within max_width */
216 if(break_index == NO_BREAK_FOUND && (cur_w - letter_space) > max_width) {
217 break_index = i;
218 break_letter_count = word_len - 1;
219 /* break_index is now pointing at the character that doesn't fit */
220 }
221
222 /*Check for new line chars and breakchars*/
223 if(letter == '\n' || letter == '\r' || is_break_char(letter)) {
224 /* Update the output width on the first character if it fits.
225 * Must do this here incase first letter is a break character. */
226 if(i == 0 && break_index == NO_BREAK_FOUND && word_w_ptr != NULL) *word_w_ptr = cur_w;
227 word_len--;
228 break;
229 }
230
231 /* Update the output width */
232 if(word_w_ptr != NULL && break_index == NO_BREAK_FOUND) *word_w_ptr = cur_w;
233
234
235 i = i_next;
236 i_next = i_next_next;
237 letter = letter_next;
238 }
239
240 /* Entire Word fits in the provided space */
241 if(break_index == NO_BREAK_FOUND) {
242 if(word_len == 0 || (letter == '\r' && letter_next == '\n')) i = i_next;
243 return i;
244 }
245
246 #if LV_TXT_LINE_BREAK_LONG_LEN > 0
247 /* Word doesn't fit in provided space, but isn't "long" */
248 if(word_len < LV_TXT_LINE_BREAK_LONG_LEN) {
249 if(force) return break_index;
250 if(word_w_ptr != NULL) *word_w_ptr = 0; /* Return no word */
251 return 0;
252 }
253
254 /* Word is "long," but insufficient amounts can fit in provided space */
255 if(break_letter_count < LV_TXT_LINE_BREAK_LONG_PRE_MIN_LEN) {
256 if(force) return break_index;
257 if(word_w_ptr != NULL) *word_w_ptr = 0;
258 return 0;
259 }
260
261 /* Word is a "long", but letters may need to be better distributed */
262 {
263 i = break_index;
264 int32_t n_move = LV_TXT_LINE_BREAK_LONG_POST_MIN_LEN - (word_len - break_letter_count);
265 /* Move pointer "i" backwards */
266 for(; n_move > 0; n_move--) {
267 _lv_txt_encoded_prev(txt, &i);
268 // TODO: it would be appropriate to update the returned word width here
269 // However, in current usage, this doesn't impact anything.
270 }
271 }
272 return i;
273 #else
274 if(force) return break_index;
275 if(word_w_ptr != NULL) *word_w_ptr = 0; /* Return no word */
276 (void) break_letter_count;
277 return 0;
278 #endif
279 }
280
281 /**
282 * Get the next line of text. Check line length and break chars too.
283 *
284 * A line of txt includes the \n character.
285 *
286 * @param txt a '\0' terminated string
287 * @param font pointer to a font
288 * @param letter_space letter space
289 * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
290 * @param flags settings for the text from 'txt_flag_type' enum
291 * @return the index of the first char of the new line (in byte index not letter index. With UTF-8 they are different)
292 */
_lv_txt_get_next_line(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_txt_flag_t flag)293 uint32_t _lv_txt_get_next_line(const char * txt, const lv_font_t * font,
294 lv_coord_t letter_space, lv_coord_t max_width, lv_txt_flag_t flag)
295 {
296 if(txt == NULL) return 0;
297 if(font == NULL) return 0;
298
299 /* If max_width doesn't mater simply find the new line character
300 * without thinking about word wrapping*/
301 if((flag & LV_TXT_FLAG_EXPAND) || (flag & LV_TXT_FLAG_FIT)) {
302 uint32_t i;
303 for(i = 0; txt[i] != '\n' && txt[i] != '\r' && txt[i] != '\0'; i++) {
304 /*Just find the new line chars or string ends by incrementing `i`*/
305 }
306 if(txt[i] != '\0') i++; /*To go beyond `\n`*/
307 return i;
308 }
309
310 if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
311 lv_txt_cmd_state_t cmd_state = LV_TXT_CMD_STATE_WAIT;
312 uint32_t i = 0; /* Iterating index into txt */
313
314 while(txt[i] != '\0' && max_width > 0) {
315 uint32_t word_w = 0;
316 uint32_t advance = lv_txt_get_next_word(&txt[i], font, letter_space, max_width, flag, &word_w, &cmd_state, i == 0);
317 max_width -= word_w;
318
319 if(advance == 0) {
320 if(i == 0) _lv_txt_encoded_next(txt, &i); // prevent inf loops
321 break;
322 }
323
324 i += advance;
325
326 if(txt[0] == '\n' || txt[0] == '\r') break;
327
328 if(txt[i] == '\n' || txt[i] == '\r') {
329 i++; /* Include the following newline in the current line */
330 break;
331 }
332
333 }
334
335 /* Always step at least one to avoid infinite loops */
336 if(i == 0) {
337 _lv_txt_encoded_next(txt, &i);
338 }
339
340 return i;
341 }
342
343 /**
344 * Give the length of a text with a given font
345 * @param txt a '\0' terminate string
346 * @param length length of 'txt' in byte count and not characters (Á is 1 character but 2 bytes in
347 * UTF-8)
348 * @param font pointer to a font
349 * @param letter_space letter space
350 * @param flags settings for the text from 'txt_flag_t' enum
351 * @return length of a char_num long text
352 */
_lv_txt_get_width(const char * txt,uint32_t length,const lv_font_t * font,lv_coord_t letter_space,lv_txt_flag_t flag)353 lv_coord_t _lv_txt_get_width(const char * txt, uint32_t length, const lv_font_t * font, lv_coord_t letter_space,
354 lv_txt_flag_t flag)
355 {
356 if(txt == NULL) return 0;
357 if(font == NULL) return 0;
358
359 uint32_t i = 0;
360 lv_coord_t width = 0;
361 lv_txt_cmd_state_t cmd_state = LV_TXT_CMD_STATE_WAIT;
362
363 if(length != 0) {
364 while(i < length) {
365 uint32_t letter = _lv_txt_encoded_next(txt, &i);
366 uint32_t letter_next = _lv_txt_encoded_next(&txt[i], NULL);
367 if((flag & LV_TXT_FLAG_RECOLOR) != 0) {
368 if(_lv_txt_is_cmd(&cmd_state, letter) != false) {
369 continue;
370 }
371 }
372
373 lv_coord_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
374 if(char_width > 0) {
375 width += char_width;
376 width += letter_space;
377 }
378 }
379
380 if(width > 0) {
381 width -= letter_space; /*Trim the last letter space. Important if the text is center
382 aligned */
383 }
384 }
385
386 return width;
387 }
388
389 /**
390 * Check next character in a string and decide if the character is part of the command or not
391 * @param state pointer to a txt_cmd_state_t variable which stores the current state of command
392 * processing (Initied. to TXT_CMD_STATE_WAIT )
393 * @param c the current character
394 * @return true: the character is part of a command and should not be written,
395 * false: the character should be written
396 */
_lv_txt_is_cmd(lv_txt_cmd_state_t * state,uint32_t c)397 bool _lv_txt_is_cmd(lv_txt_cmd_state_t * state, uint32_t c)
398 {
399 bool ret = false;
400
401 if(c == (uint32_t)LV_TXT_COLOR_CMD[0]) {
402 if(*state == LV_TXT_CMD_STATE_WAIT) { /*Start char*/
403 *state = LV_TXT_CMD_STATE_PAR;
404 ret = true;
405 }
406 /*Other start char in parameter is escaped cmd. char */
407 else if(*state == LV_TXT_CMD_STATE_PAR) {
408 *state = LV_TXT_CMD_STATE_WAIT;
409 }
410 /*Command end */
411 else if(*state == LV_TXT_CMD_STATE_IN) {
412 *state = LV_TXT_CMD_STATE_WAIT;
413 ret = true;
414 }
415 }
416
417 /*Skip the color parameter and wait the space after it*/
418 if(*state == LV_TXT_CMD_STATE_PAR) {
419 if(c == ' ') {
420 *state = LV_TXT_CMD_STATE_IN; /*After the parameter the text is in the command*/
421 }
422 ret = true;
423 }
424
425 return ret;
426 }
427
428 /**
429 * Insert a string into an other
430 * @param txt_buf the original text (must be big enough for the result text)
431 * @param pos position to insert. Expressed in character index and not byte index (Different in
432 * UTF-8) 0: before the original text, 1: after the first char etc.
433 * @param ins_txt text to insert
434 */
_lv_txt_ins(char * txt_buf,uint32_t pos,const char * ins_txt)435 void _lv_txt_ins(char * txt_buf, uint32_t pos, const char * ins_txt)
436 {
437 size_t old_len = strlen(txt_buf);
438 size_t ins_len = strlen(ins_txt);
439 if(ins_len == 0) return;
440
441 size_t new_len = ins_len + old_len;
442 pos = _lv_txt_encoded_get_byte_id(txt_buf, pos); /*Convert to byte index instead of letter index*/
443
444 /*Copy the second part into the end to make place to text to insert*/
445 size_t i;
446 for(i = new_len; i >= pos + ins_len; i--) {
447 txt_buf[i] = txt_buf[i - ins_len];
448 }
449
450 /* Copy the text into the new space*/
451 _lv_memcpy_small(txt_buf + pos, ins_txt, ins_len);
452 }
453
454 /**
455 * Delete a part of a string
456 * @param txt string to modify
457 * @param pos position where to start the deleting (0: before the first char, 1: after the first
458 * char etc.)
459 * @param len number of characters to delete
460 */
_lv_txt_cut(char * txt,uint32_t pos,uint32_t len)461 void _lv_txt_cut(char * txt, uint32_t pos, uint32_t len)
462 {
463
464 size_t old_len = strlen(txt);
465
466 pos = _lv_txt_encoded_get_byte_id(txt, pos); /*Convert to byte index instead of letter index*/
467 len = _lv_txt_encoded_get_byte_id(&txt[pos], len);
468
469 /*Copy the second part into the end to make place to text to insert*/
470 uint32_t i;
471 for(i = pos; i <= old_len - len; i++) {
472 txt[i] = txt[i + len];
473 }
474 }
475
476 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
477 /*******************************
478 * UTF-8 ENCODER/DECOER
479 ******************************/
480
481 /**
482 * Give the size of an UTF-8 coded character
483 * @param str pointer to a character in a string
484 * @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
485 */
lv_txt_utf8_size(const char * str)486 static uint8_t lv_txt_utf8_size(const char * str)
487 {
488 if((str[0] & 0x80) == 0)
489 return 1;
490 else if((str[0] & 0xE0) == 0xC0)
491 return 2;
492 else if((str[0] & 0xF0) == 0xE0)
493 return 3;
494 else if((str[0] & 0xF8) == 0xF0)
495 return 4;
496 return 0; /*If the char was invalid tell it's 1 byte long*/
497 }
498
499 /**
500 * Convert an Unicode letter to UTF-8.
501 * @param letter_uni an Unicode letter
502 * @return UTF-8 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
503 */
lv_txt_unicode_to_utf8(uint32_t letter_uni)504 static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni)
505 {
506 if(letter_uni < 128) return letter_uni;
507 uint8_t bytes[4];
508
509 if(letter_uni < 0x0800) {
510 bytes[0] = ((letter_uni >> 6) & 0x1F) | 0xC0;
511 bytes[1] = ((letter_uni >> 0) & 0x3F) | 0x80;
512 bytes[2] = 0;
513 bytes[3] = 0;
514 }
515 else if(letter_uni < 0x010000) {
516 bytes[0] = ((letter_uni >> 12) & 0x0F) | 0xE0;
517 bytes[1] = ((letter_uni >> 6) & 0x3F) | 0x80;
518 bytes[2] = ((letter_uni >> 0) & 0x3F) | 0x80;
519 bytes[3] = 0;
520 }
521 else if(letter_uni < 0x110000) {
522 bytes[0] = ((letter_uni >> 18) & 0x07) | 0xF0;
523 bytes[1] = ((letter_uni >> 12) & 0x3F) | 0x80;
524 bytes[2] = ((letter_uni >> 6) & 0x3F) | 0x80;
525 bytes[3] = ((letter_uni >> 0) & 0x3F) | 0x80;
526 }
527
528 uint32_t * res_p = (uint32_t *)bytes;
529 return *res_p;
530 }
531
532 /**
533 * Convert a wide character, e.g. 'Á' little endian to be UTF-8 compatible
534 * @param c a wide character or a Little endian number
535 * @return `c` in big endian
536 */
lv_txt_utf8_conv_wc(uint32_t c)537 static uint32_t lv_txt_utf8_conv_wc(uint32_t c)
538 {
539 #if LV_BIG_ENDIAN_SYSTEM == 0
540 /*Swap the bytes (UTF-8 is big endian, but the MCUs are little endian)*/
541 if((c & 0x80) != 0) {
542 uint32_t swapped;
543 uint8_t c8[4];
544 _lv_memcpy_small(c8, &c, 4);
545 swapped = (c8[0] << 24) + (c8[1] << 16) + (c8[2] << 8) + (c8[3]);
546 uint8_t i;
547 for(i = 0; i < 4; i++) {
548 if((swapped & 0xFF) == 0)
549 swapped = (swapped >> 8); /*Ignore leading zeros (they were in the end originally)*/
550 }
551 c = swapped;
552 }
553 #endif
554 return c;
555 }
556
557 /**
558 * Decode an UTF-8 character from a string.
559 * @param txt pointer to '\0' terminated string
560 * @param i start byte index in 'txt' where to start.
561 * After call it will point to the next UTF-8 char in 'txt'.
562 * NULL to use txt[0] as index
563 * @return the decoded Unicode character or 0 on invalid UTF-8 code
564 */
lv_txt_utf8_next(const char * txt,uint32_t * i)565 static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i)
566 {
567 /* Unicode to UTF-8
568 * 00000000 00000000 00000000 0xxxxxxx -> 0xxxxxxx
569 * 00000000 00000000 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
570 * 00000000 00000000 zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
571 * 00000000 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
572 * */
573
574 uint32_t result = 0;
575
576 /*Dummy 'i' pointer is required*/
577 uint32_t i_tmp = 0;
578 if(i == NULL) i = &i_tmp;
579
580 /*Normal ASCII*/
581 if((txt[*i] & 0x80) == 0) {
582 result = txt[*i];
583 (*i)++;
584 }
585 /*Real UTF-8 decode*/
586 else {
587 /*2 bytes UTF-8 code*/
588 if((txt[*i] & 0xE0) == 0xC0) {
589 result = (uint32_t)(txt[*i] & 0x1F) << 6;
590 (*i)++;
591 if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
592 result += (txt[*i] & 0x3F);
593 (*i)++;
594 }
595 /*3 bytes UTF-8 code*/
596 else if((txt[*i] & 0xF0) == 0xE0) {
597 result = (uint32_t)(txt[*i] & 0x0F) << 12;
598 (*i)++;
599
600 if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
601 result += (uint32_t)(txt[*i] & 0x3F) << 6;
602 (*i)++;
603
604 if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
605 result += (txt[*i] & 0x3F);
606 (*i)++;
607 }
608 /*4 bytes UTF-8 code*/
609 else if((txt[*i] & 0xF8) == 0xF0) {
610 result = (uint32_t)(txt[*i] & 0x07) << 18;
611 (*i)++;
612
613 if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
614 result += (uint32_t)(txt[*i] & 0x3F) << 12;
615 (*i)++;
616
617 if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
618 result += (uint32_t)(txt[*i] & 0x3F) << 6;
619 (*i)++;
620
621 if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
622 result += txt[*i] & 0x3F;
623 (*i)++;
624 }
625 else {
626 (*i)++; /*Not UTF-8 char. Go the next.*/
627 }
628 }
629 return result;
630 }
631
632 /**
633 * Get previous UTF-8 character form a string.
634 * @param txt pointer to '\0' terminated string
635 * @param i start byte index in 'txt' where to start. After the call it will point to the previous
636 * UTF-8 char in 'txt'.
637 * @return the decoded Unicode character or 0 on invalid UTF-8 code
638 */
lv_txt_utf8_prev(const char * txt,uint32_t * i)639 static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i)
640 {
641 uint8_t c_size;
642 uint8_t cnt = 0;
643
644 /*Try to find a !0 long UTF-8 char by stepping one character back*/
645 (*i)--;
646 do {
647 if(cnt >= 4) return 0; /*No UTF-8 char found before the initial*/
648
649 c_size = _lv_txt_encoded_size(&txt[*i]);
650 if(c_size == 0) {
651 if(*i != 0)
652 (*i)--;
653 else
654 return 0;
655 }
656 cnt++;
657 } while(c_size == 0);
658
659 uint32_t i_tmp = *i;
660 uint32_t letter = _lv_txt_encoded_next(txt, &i_tmp); /*Character found, get it*/
661
662 return letter;
663 }
664
665 /**
666 * Convert a character index (in an UTF-8 text) to byte index.
667 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
668 * @param txt a '\0' terminated UTF-8 string
669 * @param utf8_id character index
670 * @return byte index of the 'utf8_id'th letter
671 */
lv_txt_utf8_get_byte_id(const char * txt,uint32_t utf8_id)672 static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id)
673 {
674 uint32_t i;
675 uint32_t byte_cnt = 0;
676 for(i = 0; i < utf8_id; i++) {
677 uint8_t c_size = _lv_txt_encoded_size(&txt[byte_cnt]);
678 byte_cnt += c_size > 0 ? c_size : 1;
679 }
680
681 return byte_cnt;
682 }
683
684 /**
685 * Convert a byte index (in an UTF-8 text) to character index.
686 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
687 * @param txt a '\0' terminated UTF-8 string
688 * @param byte_id byte index
689 * @return character index of the letter at 'byte_id'th position
690 */
lv_txt_utf8_get_char_id(const char * txt,uint32_t byte_id)691 static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id)
692 {
693 uint32_t i = 0;
694 uint32_t char_cnt = 0;
695
696 while(i < byte_id) {
697 _lv_txt_encoded_next(txt, &i); /*'i' points to the next letter so use the prev. value*/
698 char_cnt++;
699 }
700
701 return char_cnt;
702 }
703
704 /**
705 * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
706 * E.g.: "ÁBC" is 3 characters (but 4 bytes)
707 * @param txt a '\0' terminated char string
708 * @return number of characters
709 */
lv_txt_utf8_get_length(const char * txt)710 static uint32_t lv_txt_utf8_get_length(const char * txt)
711 {
712 uint32_t len = 0;
713 uint32_t i = 0;
714
715 while(txt[i] != '\0') {
716 _lv_txt_encoded_next(txt, &i);
717 len++;
718 }
719
720 return len;
721 }
722
723 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
724 /*******************************
725 * ASCII ENCODER/DECOER
726 ******************************/
727
728 /**
729 * Give the size of an ISO8859-1 coded character
730 * @param str pointer to a character in a string
731 * @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
732 */
lv_txt_iso8859_1_size(const char * str)733 static uint8_t lv_txt_iso8859_1_size(const char * str)
734 {
735 (void)str; /*Unused*/
736 return 1;
737 }
738
739 /**
740 * Convert an Unicode letter to ISO8859-1.
741 * @param letter_uni an Unicode letter
742 * @return ISO8859-1 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
743 */
lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)744 static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)
745 {
746 if(letter_uni < 128)
747 return letter_uni;
748 else
749 return ' ';
750 }
751
752 /**
753 * Convert wide characters to ASCII, however wide characters in ASCII range (e.g. 'A') are ASCII compatible by default.
754 * So this function does nothing just returns with `c`.
755 * @param c a character, e.g. 'A'
756 * @return same as `c`
757 */
lv_txt_iso8859_1_conv_wc(uint32_t c)758 static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c)
759 {
760 return c;
761 }
762
763 /**
764 * Decode an ISO8859-1 character from a string.
765 * @param txt pointer to '\0' terminated string
766 * @param i start byte index in 'txt' where to start.
767 * After call it will point to the next UTF-8 char in 'txt'.
768 * NULL to use txt[0] as index
769 * @return the decoded Unicode character or 0 on invalid UTF-8 code
770 */
lv_txt_iso8859_1_next(const char * txt,uint32_t * i)771 static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i)
772 {
773 if(i == NULL) return txt[1]; /*Get the next char */
774
775 uint8_t letter = txt[*i];
776 (*i)++;
777 return letter;
778 }
779
780 /**
781 * Get previous ISO8859-1 character form a string.
782 * @param txt pointer to '\0' terminated string
783 * @param i start byte index in 'txt' where to start. After the call it will point to the previous UTF-8 char in 'txt'.
784 * @return the decoded Unicode character or 0 on invalid UTF-8 code
785 */
lv_txt_iso8859_1_prev(const char * txt,uint32_t * i)786 static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i)
787 {
788 if(i == NULL) return *(txt - 1); /*Get the prev. char */
789
790 (*i)--;
791 uint8_t letter = txt[*i];
792
793 return letter;
794 }
795
796 /**
797 * Convert a character index (in an ISO8859-1 text) to byte index.
798 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
799 * @param txt a '\0' terminated UTF-8 string
800 * @param utf8_id character index
801 * @return byte index of the 'utf8_id'th letter
802 */
lv_txt_iso8859_1_get_byte_id(const char * txt,uint32_t utf8_id)803 static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id)
804 {
805 (void)txt; /*Unused*/
806 return utf8_id; /*In Non encoded no difference*/
807 }
808
809 /**
810 * Convert a byte index (in an ISO8859-1 text) to character index.
811 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
812 * @param txt a '\0' terminated UTF-8 string
813 * @param byte_id byte index
814 * @return character index of the letter at 'byte_id'th position
815 */
lv_txt_iso8859_1_get_char_id(const char * txt,uint32_t byte_id)816 static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id)
817 {
818 (void)txt; /*Unused*/
819 return byte_id; /*In Non encoded no difference*/
820 }
821
822 /**
823 * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
824 * E.g.: "ÁBC" is 3 characters (but 4 bytes)
825 * @param txt a '\0' terminated char string
826 * @return number of characters
827 */
lv_txt_iso8859_1_get_length(const char * txt)828 static uint32_t lv_txt_iso8859_1_get_length(const char * txt)
829 {
830 return strlen(txt);
831 }
832 #else
833
834 #error "Invalid character encoding. See `LV_TXT_ENC` in `lv_conf.h`"
835
836 #endif
837
838 /**********************
839 * STATIC FUNCTIONS
840 **********************/
841
842 /**
843 * Test if char is break char or not (a text can broken here or not)
844 * @param letter a letter
845 * @return false: 'letter' is not break char
846 */
is_break_char(uint32_t letter)847 static inline bool is_break_char(uint32_t letter)
848 {
849 uint8_t i;
850 bool ret = false;
851
852 /*Compare the letter to TXT_BREAK_CHARS*/
853 for(i = 0; LV_TXT_BREAK_CHARS[i] != '\0'; i++) {
854 if(letter == (uint32_t)LV_TXT_BREAK_CHARS[i]) {
855 ret = true; /*If match then it is break char*/
856 break;
857 }
858 }
859
860 return ret;
861 }
862