1 /**
2  * @file lv_txt.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include <stdarg.h>
10 #include "lv_txt.h"
11 #include "lv_txt_ap.h"
12 #include "lv_math.h"
13 #include "lv_log.h"
14 #include "lv_mem.h"
15 #include "lv_assert.h"
16 
17 /*********************
18  *      DEFINES
19  *********************/
20 #define NO_BREAK_FOUND UINT32_MAX
21 
22 /**********************
23  *      TYPEDEFS
24  **********************/
25 
26 /**********************
27  *  STATIC PROTOTYPES
28  **********************/
29 
30 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
31     static uint8_t lv_txt_utf8_size(const char * str);
32     static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni);
33     static uint32_t lv_txt_utf8_conv_wc(uint32_t c);
34     static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i);
35     static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i_start);
36     static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id);
37     static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id);
38     static uint32_t lv_txt_utf8_get_length(const char * txt);
39 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
40     static uint8_t lv_txt_iso8859_1_size(const char * str);
41     static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni);
42     static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c);
43     static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i);
44     static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i_start);
45     static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id);
46     static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id);
47     static uint32_t lv_txt_iso8859_1_get_length(const char * txt);
48 #endif
49 /**********************
50  *  STATIC VARIABLES
51  **********************/
52 
53 /**********************
54  *  GLOBAL VARIABLES
55  **********************/
56 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
57     uint8_t (*_lv_txt_encoded_size)(const char *)                   = lv_txt_utf8_size;
58     uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t)                = lv_txt_unicode_to_utf8;
59     uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t)                   = lv_txt_utf8_conv_wc;
60     uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *)      = lv_txt_utf8_next;
61     uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *)      = lv_txt_utf8_prev;
62     uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_utf8_get_byte_id;
63     uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_utf8_get_char_id;
64     uint32_t (*_lv_txt_get_encoded_length)(const char *)            = lv_txt_utf8_get_length;
65 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
66     uint8_t (*_lv_txt_encoded_size)(const char *)                   = lv_txt_iso8859_1_size;
67     uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t)                = lv_txt_unicode_to_iso8859_1;
68     uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t)                   = lv_txt_iso8859_1_conv_wc;
69     uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *)      = lv_txt_iso8859_1_next;
70     uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *)      = lv_txt_iso8859_1_prev;
71     uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_byte_id;
72     uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t)     = lv_txt_iso8859_1_get_char_id;
73     uint32_t (*_lv_txt_get_encoded_length)(const char *)            = lv_txt_iso8859_1_get_length;
74 
75 #endif
76 
77 /**********************
78  *      MACROS
79  **********************/
80 
81 #define LV_IS_ASCII(value)              ((value & 0x80U) == 0x00U)
82 #define LV_IS_2BYTES_UTF8_CODE(value)   ((value & 0xE0U) == 0xC0U)
83 #define LV_IS_3BYTES_UTF8_CODE(value)   ((value & 0xF0U) == 0xE0U)
84 #define LV_IS_4BYTES_UTF8_CODE(value)   ((value & 0xF8U) == 0xF0U)
85 #define LV_IS_INVALID_UTF8_CODE(value)  ((value & 0xC0U) != 0x80U)
86 
87 /**********************
88  *   GLOBAL FUNCTIONS
89  **********************/
90 
lv_txt_get_size(lv_point_t * size_res,const char * text,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t line_space,lv_coord_t max_width,lv_text_flag_t flag)91 void lv_txt_get_size(lv_point_t * size_res, const char * text, const lv_font_t * font, lv_coord_t letter_space,
92                      lv_coord_t line_space, lv_coord_t max_width, lv_text_flag_t flag)
93 {
94     size_res->x = 0;
95     size_res->y = 0;
96 
97     if(text == NULL) return;
98     if(font == NULL) return;
99 
100     if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
101 
102     uint32_t line_start     = 0;
103     uint32_t new_line_start = 0;
104     uint16_t letter_height = lv_font_get_line_height(font);
105 
106     /*Calc. the height and longest line*/
107     while(text[line_start] != '\0') {
108         new_line_start += _lv_txt_get_next_line(&text[line_start], font, letter_space, max_width, NULL, flag);
109 
110         if((unsigned long)size_res->y + (unsigned long)letter_height + (unsigned long)line_space > LV_MAX_OF(lv_coord_t)) {
111             LV_LOG_WARN("lv_txt_get_size: integer overflow while calculating text height");
112             return;
113         }
114         else {
115             size_res->y += letter_height;
116             size_res->y += line_space;
117         }
118 
119         /*Calculate the longest line*/
120         lv_coord_t act_line_length = lv_txt_get_width(&text[line_start], new_line_start - line_start, font, letter_space,
121                                                       flag);
122 
123         size_res->x = LV_MAX(act_line_length, size_res->x);
124         line_start  = new_line_start;
125     }
126 
127     /*Make the text one line taller if the last character is '\n' or '\r'*/
128     if((line_start != 0) && (text[line_start - 1] == '\n' || text[line_start - 1] == '\r')) {
129         size_res->y += letter_height + line_space;
130     }
131 
132     /*Correction with the last line space or set the height manually if the text is empty*/
133     if(size_res->y == 0)
134         size_res->y = letter_height;
135     else
136         size_res->y -= line_space;
137 }
138 
139 /**
140  * Get the next word of text. A word is delimited by break characters.
141  *
142  * If the word cannot fit in the max_width space, obey LV_TXT_LINE_BREAK_LONG_* rules.
143  *
144  * If the next word cannot fit anything, return 0.
145  *
146  * If the first character is a break character, returns the next index.
147  *
148  * Example calls from lv_txt_get_next_line() assuming sufficient max_width and
149  * txt = "Test text\n"
150  *        0123456789
151  *
152  * Calls would be as follows:
153  *     1. Return i=4, pointing at breakchar ' ', for the string "Test"
154  *     2. Return i=5, since i=4 was a breakchar.
155  *     3. Return i=9, pointing at breakchar '\n'
156  *     4. Parenting lv_txt_get_next_line() would detect subsequent '\0'
157  *
158  * TODO: Returned word_w_ptr may overestimate the returned word's width when
159  * max_width is reached. In current usage, this has no impact.
160  *
161  * @param txt a '\0' terminated string
162  * @param font pointer to a font
163  * @param letter_space letter space
164  * @param max_width max width of the text (break the lines to fit this size). Set COORD_MAX to avoid line breaks
165  * @param flags settings for the text from 'txt_flag_type' enum
166  * @param[out] word_w_ptr width (in pixels) of the parsed word. May be NULL.
167  * @param cmd_state pointer to a txt_cmd_state_t variable which stores the current state of command processing
168  * @param force Force return the fraction of the word that can fit in the provided space.
169  * @return the index of the first char of the next word (in byte index not letter index. With UTF-8 they are different)
170  */
lv_txt_get_next_word(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_text_flag_t flag,uint32_t * word_w_ptr,lv_text_cmd_state_t * cmd_state,bool force)171 static uint32_t lv_txt_get_next_word(const char * txt, const lv_font_t * font,
172                                      lv_coord_t letter_space, lv_coord_t max_width,
173                                      lv_text_flag_t flag, uint32_t * word_w_ptr, lv_text_cmd_state_t * cmd_state, bool force)
174 {
175     if(txt == NULL || txt[0] == '\0') return 0;
176     if(font == NULL) return 0;
177 
178     if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
179 
180     uint32_t i = 0, i_next = 0, i_next_next = 0;  /*Iterating index into txt*/
181     uint32_t letter = 0;      /*Letter at i*/
182     uint32_t letter_next = 0; /*Letter at i_next*/
183     lv_coord_t letter_w;
184     lv_coord_t cur_w = 0;  /*Pixel Width of transversed string*/
185     uint32_t word_len = 0;   /*Number of characters in the transversed word*/
186     uint32_t break_index = NO_BREAK_FOUND; /*only used for "long" words*/
187     uint32_t break_letter_count = 0; /*Number of characters up to the long word break point*/
188 
189     letter = _lv_txt_encoded_next(txt, &i_next);
190     i_next_next = i_next;
191 
192     /*Obtain the full word, regardless if it fits or not in max_width*/
193     while(txt[i] != '\0') {
194         letter_next = _lv_txt_encoded_next(txt, &i_next_next);
195         word_len++;
196 
197         /*Handle the recolor command*/
198         if((flag & LV_TEXT_FLAG_RECOLOR) != 0) {
199             if(_lv_txt_is_cmd(cmd_state, letter) != false) {
200                 i = i_next;
201                 i_next = i_next_next;
202                 letter = letter_next;
203                 continue;   /*Skip the letter if it is part of a command*/
204             }
205         }
206 
207         letter_w = lv_font_get_glyph_width(font, letter, letter_next);
208         cur_w += letter_w;
209 
210         if(letter_w > 0) {
211             cur_w += letter_space;
212         }
213 
214         /*Test if this character fits within max_width*/
215         if(break_index == NO_BREAK_FOUND && (cur_w - letter_space) > max_width) {
216             break_index = i;
217             break_letter_count = word_len - 1;
218             /*break_index is now pointing at the character that doesn't fit*/
219         }
220 
221         /*Check for new line chars and breakchars*/
222         if(letter == '\n' || letter == '\r' || _lv_txt_is_break_char(letter)) {
223             /*Update the output width on the first character if it fits.
224              *Must do this here in case first letter is a break character.*/
225             if(i == 0 && break_index == NO_BREAK_FOUND && word_w_ptr != NULL) *word_w_ptr = cur_w;
226             word_len--;
227             break;
228         }
229 
230         /*Update the output width*/
231         if(word_w_ptr != NULL && break_index == NO_BREAK_FOUND) *word_w_ptr = cur_w;
232 
233         i = i_next;
234         i_next = i_next_next;
235         letter = letter_next;
236     }
237 
238     /*Entire Word fits in the provided space*/
239     if(break_index == NO_BREAK_FOUND) {
240         if(word_len == 0 || (letter == '\r' && letter_next == '\n')) i = i_next;
241         return i;
242     }
243 
244 #if LV_TXT_LINE_BREAK_LONG_LEN > 0
245     /*Word doesn't fit in provided space, but isn't "long"*/
246     if(word_len < LV_TXT_LINE_BREAK_LONG_LEN) {
247         if(force) return break_index;
248         if(word_w_ptr != NULL) *word_w_ptr = 0; /*Return no word*/
249         return 0;
250     }
251 
252     /*Word is "long," but insufficient amounts can fit in provided space*/
253     if(break_letter_count < LV_TXT_LINE_BREAK_LONG_PRE_MIN_LEN) {
254         if(force) return break_index;
255         if(word_w_ptr != NULL) *word_w_ptr = 0;
256         return 0;
257     }
258 
259     /*Word is a "long", but letters may need to be better distributed*/
260     {
261         i = break_index;
262         int32_t n_move = LV_TXT_LINE_BREAK_LONG_POST_MIN_LEN - (word_len - break_letter_count);
263         /*Move pointer "i" backwards*/
264         for(; n_move > 0; n_move--) {
265             _lv_txt_encoded_prev(txt, &i);
266             // TODO: it would be appropriate to update the returned word width here
267             // However, in current usage, this doesn't impact anything.
268         }
269     }
270     return i;
271 #else
272     if(force) return break_index;
273     if(word_w_ptr != NULL) *word_w_ptr = 0; /*Return no word*/
274     (void) break_letter_count;
275     return 0;
276 #endif
277 }
278 
_lv_txt_get_next_line(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_coord_t * used_width,lv_text_flag_t flag)279 uint32_t _lv_txt_get_next_line(const char * txt, const lv_font_t * font,
280                                lv_coord_t letter_space, lv_coord_t max_width,
281                                lv_coord_t * used_width, lv_text_flag_t flag)
282 {
283     if(used_width) *used_width = 0;
284 
285     if(txt == NULL) return 0;
286     if(txt[0] == '\0') return 0;
287     if(font == NULL) return 0;
288 
289     lv_coord_t line_w = 0;
290 
291     /*If max_width doesn't mater simply find the new line character
292      *without thinking about word wrapping*/
293     if((flag & LV_TEXT_FLAG_EXPAND) || (flag & LV_TEXT_FLAG_FIT)) {
294         uint32_t i;
295         for(i = 0; txt[i] != '\n' && txt[i] != '\r' && txt[i] != '\0'; i++) {
296             /*Just find the new line chars or string ends by incrementing `i`*/
297         }
298         if(txt[i] != '\0') i++;    /*To go beyond `\n`*/
299         if(used_width) *used_width = -1;
300         return i;
301     }
302 
303     if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
304     lv_text_cmd_state_t cmd_state = LV_TEXT_CMD_STATE_WAIT;
305     uint32_t i = 0;                                        /*Iterating index into txt*/
306 
307     while(txt[i] != '\0' && max_width > 0) {
308         uint32_t word_w = 0;
309         uint32_t advance = lv_txt_get_next_word(&txt[i], font, letter_space, max_width, flag, &word_w, &cmd_state, i == 0);
310         max_width -= word_w;
311         line_w += word_w;
312 
313         if(advance == 0) {
314             break;
315         }
316 
317         i += advance;
318 
319         if(txt[0] == '\n' || txt[0] == '\r') break;
320 
321         if(txt[i] == '\n' || txt[i] == '\r') {
322             i++;  /*Include the following newline in the current line*/
323             break;
324         }
325 
326     }
327 
328     /*Always step at least one to avoid infinite loops*/
329     if(i == 0) {
330         uint32_t letter = _lv_txt_encoded_next(txt, &i);
331         if(used_width != NULL) {
332             line_w = lv_font_get_glyph_width(font, letter, '\0');
333         }
334     }
335 
336     if(used_width != NULL) {
337         *used_width = line_w;
338     }
339 
340     return i;
341 }
342 
lv_txt_get_width(const char * txt,uint32_t length,const lv_font_t * font,lv_coord_t letter_space,lv_text_flag_t flag)343 lv_coord_t lv_txt_get_width(const char * txt, uint32_t length, const lv_font_t * font, lv_coord_t letter_space,
344                             lv_text_flag_t flag)
345 {
346     if(txt == NULL) return 0;
347     if(font == NULL) return 0;
348     if(txt[0] == '\0') return 0;
349 
350     uint32_t i                   = 0;
351     lv_coord_t width             = 0;
352     lv_text_cmd_state_t cmd_state = LV_TEXT_CMD_STATE_WAIT;
353 
354     if(length != 0) {
355         while(i < length) {
356             uint32_t letter;
357             uint32_t letter_next;
358             _lv_txt_encoded_letter_next_2(txt, &letter, &letter_next, &i);
359 
360             if((flag & LV_TEXT_FLAG_RECOLOR) != 0) {
361                 if(_lv_txt_is_cmd(&cmd_state, letter) != false) {
362                     continue;
363                 }
364             }
365 
366             lv_coord_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
367             if(char_width > 0) {
368                 width += char_width;
369                 width += letter_space;
370             }
371         }
372 
373         if(width > 0) {
374             width -= letter_space; /*Trim the last letter space. Important if the text is center
375                                       aligned*/
376         }
377     }
378 
379     return width;
380 }
381 
_lv_txt_is_cmd(lv_text_cmd_state_t * state,uint32_t c)382 bool _lv_txt_is_cmd(lv_text_cmd_state_t * state, uint32_t c)
383 {
384     bool ret = false;
385 
386     if(c == (uint32_t)LV_TXT_COLOR_CMD[0]) {
387         if(*state == LV_TEXT_CMD_STATE_WAIT) { /*Start char*/
388             *state = LV_TEXT_CMD_STATE_PAR;
389             ret    = true;
390         }
391         /*Other start char in parameter is escaped cmd. char*/
392         else if(*state == LV_TEXT_CMD_STATE_PAR) {
393             *state = LV_TEXT_CMD_STATE_WAIT;
394         }
395         /*Command end*/
396         else if(*state == LV_TEXT_CMD_STATE_IN) {
397             *state = LV_TEXT_CMD_STATE_WAIT;
398             ret    = true;
399         }
400     }
401 
402     /*Skip the color parameter and wait the space after it*/
403     if(*state == LV_TEXT_CMD_STATE_PAR) {
404         if(c == ' ') {
405             *state = LV_TEXT_CMD_STATE_IN; /*After the parameter the text is in the command*/
406         }
407         ret = true;
408     }
409 
410     return ret;
411 }
412 
_lv_txt_ins(char * txt_buf,uint32_t pos,const char * ins_txt)413 void _lv_txt_ins(char * txt_buf, uint32_t pos, const char * ins_txt)
414 {
415     if(txt_buf == NULL || ins_txt == NULL) return;
416 
417     size_t old_len = strlen(txt_buf);
418     size_t ins_len = strlen(ins_txt);
419     if(ins_len == 0) return;
420 
421     size_t new_len = ins_len + old_len;
422     pos              = _lv_txt_encoded_get_byte_id(txt_buf, pos); /*Convert to byte index instead of letter index*/
423 
424     /*Copy the second part into the end to make place to text to insert*/
425     size_t i;
426     for(i = new_len; i >= pos + ins_len; i--) {
427         txt_buf[i] = txt_buf[i - ins_len];
428     }
429 
430     /*Copy the text into the new space*/
431     lv_memcpy_small(txt_buf + pos, ins_txt, ins_len);
432 }
433 
_lv_txt_cut(char * txt,uint32_t pos,uint32_t len)434 void _lv_txt_cut(char * txt, uint32_t pos, uint32_t len)
435 {
436     if(txt == NULL) return;
437 
438     size_t old_len = strlen(txt);
439 
440     pos = _lv_txt_encoded_get_byte_id(txt, pos); /*Convert to byte index instead of letter index*/
441     len = _lv_txt_encoded_get_byte_id(&txt[pos], len);
442 
443     /*Copy the second part into the end to make place to text to insert*/
444     uint32_t i;
445     for(i = pos; i <= old_len - len; i++) {
446         txt[i] = txt[i + len];
447     }
448 }
449 
_lv_txt_set_text_vfmt(const char * fmt,va_list ap)450 char * _lv_txt_set_text_vfmt(const char * fmt, va_list ap)
451 {
452     /*Allocate space for the new text by using trick from C99 standard section 7.19.6.12*/
453     va_list ap_copy;
454     va_copy(ap_copy, ap);
455     uint32_t len = lv_vsnprintf(NULL, 0, fmt, ap_copy);
456     va_end(ap_copy);
457 
458     char * text = 0;
459 #if LV_USE_ARABIC_PERSIAN_CHARS
460     /*Put together the text according to the format string*/
461     char * raw_txt = lv_mem_buf_get(len + 1);
462     LV_ASSERT_MALLOC(raw_txt);
463     if(raw_txt == NULL) {
464         return NULL;
465     }
466 
467     lv_vsnprintf(raw_txt, len + 1, fmt, ap);
468 
469     /*Get the size of the Arabic text and process it*/
470     size_t len_ap = _lv_txt_ap_calc_bytes_cnt(raw_txt);
471     text = lv_mem_alloc(len_ap + 1);
472     LV_ASSERT_MALLOC(text);
473     if(text == NULL) {
474         return NULL;
475     }
476     _lv_txt_ap_proc(raw_txt, text);
477 
478     lv_mem_buf_release(raw_txt);
479 #else
480     text = lv_mem_alloc(len + 1);
481     LV_ASSERT_MALLOC(text);
482     if(text == NULL) {
483         return NULL;
484     }
485     text[len] = 0; /*Ensure NULL termination*/
486 
487     lv_vsnprintf(text, len + 1, fmt, ap);
488 #endif
489 
490     return text;
491 }
492 
_lv_txt_encoded_letter_next_2(const char * txt,uint32_t * letter,uint32_t * letter_next,uint32_t * ofs)493 void _lv_txt_encoded_letter_next_2(const char * txt, uint32_t * letter, uint32_t * letter_next, uint32_t * ofs)
494 {
495     *letter = _lv_txt_encoded_next(txt, ofs);
496     *letter_next = *letter != '\0' ? _lv_txt_encoded_next(&txt[*ofs], NULL) : 0;
497 }
498 
499 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
500 /*******************************
501  *   UTF-8 ENCODER/DECODER
502  ******************************/
503 
504 /**
505  * Give the size of an UTF-8 coded character
506  * @param str pointer to a character in a string
507  * @return length of the UTF-8 character (1,2,3 or 4), 0 on invalid code.
508  */
lv_txt_utf8_size(const char * str)509 static uint8_t lv_txt_utf8_size(const char * str)
510 {
511     if(LV_IS_ASCII(str[0]))
512         return 1;
513     else if(LV_IS_2BYTES_UTF8_CODE(str[0]))
514         return 2;
515     else if(LV_IS_3BYTES_UTF8_CODE(str[0]))
516         return 3;
517     else if(LV_IS_4BYTES_UTF8_CODE(str[0]))
518         return 4;
519     return 0;
520 }
521 
522 /**
523  * Convert a Unicode letter to UTF-8.
524  * @param letter_uni a Unicode letter
525  * @return UTF-8 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
526  */
lv_txt_unicode_to_utf8(uint32_t letter_uni)527 static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni)
528 {
529     if(letter_uni < 128) return letter_uni;
530     uint8_t bytes[4];
531 
532     if(letter_uni < 0x0800) {
533         bytes[0] = ((letter_uni >> 6) & 0x1F) | 0xC0;
534         bytes[1] = ((letter_uni >> 0) & 0x3F) | 0x80;
535         bytes[2] = 0;
536         bytes[3] = 0;
537     }
538     else if(letter_uni < 0x010000) {
539         bytes[0] = ((letter_uni >> 12) & 0x0F) | 0xE0;
540         bytes[1] = ((letter_uni >> 6) & 0x3F) | 0x80;
541         bytes[2] = ((letter_uni >> 0) & 0x3F) | 0x80;
542         bytes[3] = 0;
543     }
544     else if(letter_uni < 0x110000) {
545         bytes[0] = ((letter_uni >> 18) & 0x07) | 0xF0;
546         bytes[1] = ((letter_uni >> 12) & 0x3F) | 0x80;
547         bytes[2] = ((letter_uni >> 6) & 0x3F) | 0x80;
548         bytes[3] = ((letter_uni >> 0) & 0x3F) | 0x80;
549     }
550     else {
551         return 0;
552     }
553 
554     uint32_t * res_p = (uint32_t *)bytes;
555     return *res_p;
556 }
557 
558 /**
559  * Convert a wide character, e.g. 'Á' little endian to be UTF-8 compatible
560  * @param c a wide character or a  Little endian number
561  * @return `c` in big endian
562  */
lv_txt_utf8_conv_wc(uint32_t c)563 static uint32_t lv_txt_utf8_conv_wc(uint32_t c)
564 {
565 #if LV_BIG_ENDIAN_SYSTEM == 0
566     /*Swap the bytes (UTF-8 is big endian, but the MCUs are little endian)*/
567     if((c & 0x80) != 0) {
568         uint32_t swapped;
569         uint8_t c8[4];
570         lv_memcpy_small(c8, &c, 4);
571         swapped = (c8[0] << 24) + (c8[1] << 16) + (c8[2] << 8) + (c8[3]);
572         uint8_t i;
573         for(i = 0; i < 4; i++) {
574             if((swapped & 0xFF) == 0)
575                 swapped = (swapped >> 8); /*Ignore leading zeros (they were in the end originally)*/
576         }
577         c = swapped;
578     }
579 #endif
580     return c;
581 }
582 
583 /**
584  * Decode an UTF-8 character from a string.
585  * @param txt pointer to '\0' terminated string
586  * @param i start byte index in 'txt' where to start.
587  *          After call it will point to the next UTF-8 char in 'txt'.
588  *          NULL to use txt[0] as index
589  * @return the decoded Unicode character or 0 on invalid UTF-8 code
590  */
lv_txt_utf8_next(const char * txt,uint32_t * i)591 static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i)
592 {
593     /**
594      * Unicode to UTF-8
595      * 00000000 00000000 00000000 0xxxxxxx -> 0xxxxxxx
596      * 00000000 00000000 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
597      * 00000000 00000000 zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
598      * 00000000 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
599      */
600 
601     uint32_t result = 0;
602 
603     /*Dummy 'i' pointer is required*/
604     uint32_t i_tmp = 0;
605     if(i == NULL) i = &i_tmp;
606 
607     /*Normal ASCII*/
608     if(LV_IS_ASCII(txt[*i])) {
609         result = txt[*i];
610         (*i)++;
611     }
612     /*Real UTF-8 decode*/
613     else {
614         /*2 bytes UTF-8 code*/
615         if(LV_IS_2BYTES_UTF8_CODE(txt[*i])) {
616             result = (uint32_t)(txt[*i] & 0x1F) << 6;
617             (*i)++;
618             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
619             result += (txt[*i] & 0x3F);
620             (*i)++;
621         }
622         /*3 bytes UTF-8 code*/
623         else if(LV_IS_3BYTES_UTF8_CODE(txt[*i])) {
624             result = (uint32_t)(txt[*i] & 0x0F) << 12;
625             (*i)++;
626 
627             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
628             result += (uint32_t)(txt[*i] & 0x3F) << 6;
629             (*i)++;
630 
631             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
632             result += (txt[*i] & 0x3F);
633             (*i)++;
634         }
635         /*4 bytes UTF-8 code*/
636         else if(LV_IS_4BYTES_UTF8_CODE(txt[*i])) {
637             result = (uint32_t)(txt[*i] & 0x07) << 18;
638             (*i)++;
639 
640             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
641             result += (uint32_t)(txt[*i] & 0x3F) << 12;
642             (*i)++;
643 
644             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
645             result += (uint32_t)(txt[*i] & 0x3F) << 6;
646             (*i)++;
647 
648             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
649             result += txt[*i] & 0x3F;
650             (*i)++;
651         }
652         else {
653             (*i)++; /*Not UTF-8 char. Go the next.*/
654         }
655     }
656     return result;
657 }
658 
659 /**
660  * Get previous UTF-8 character form a string.
661  * @param txt pointer to '\0' terminated string
662  * @param i start byte index in 'txt' where to start. After the call it will point to the previous
663  * UTF-8 char in 'txt'.
664  * @return the decoded Unicode character or 0 on invalid UTF-8 code
665  */
lv_txt_utf8_prev(const char * txt,uint32_t * i)666 static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i)
667 {
668     uint8_t c_size;
669     uint8_t cnt = 0;
670 
671     /*Try to find a !0 long UTF-8 char by stepping one character back*/
672     (*i)--;
673     do {
674         if(cnt >= 4) return 0; /*No UTF-8 char found before the initial*/
675 
676         c_size = _lv_txt_encoded_size(&txt[*i]);
677         if(c_size == 0) {
678             if(*i != 0)
679                 (*i)--;
680             else
681                 return 0;
682         }
683         cnt++;
684     } while(c_size == 0);
685 
686     uint32_t i_tmp  = *i;
687     uint32_t letter = _lv_txt_encoded_next(txt, &i_tmp); /*Character found, get it*/
688 
689     return letter;
690 }
691 
692 /**
693  * Convert a character index (in an UTF-8 text) to byte index.
694  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
695  * @param txt a '\0' terminated UTF-8 string
696  * @param utf8_id character index
697  * @return byte index of the 'utf8_id'th letter
698  */
lv_txt_utf8_get_byte_id(const char * txt,uint32_t utf8_id)699 static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id)
700 {
701     uint32_t i;
702     uint32_t byte_cnt = 0;
703     for(i = 0; i < utf8_id && txt[byte_cnt] != '\0'; i++) {
704         uint8_t c_size = _lv_txt_encoded_size(&txt[byte_cnt]);
705         /* If the char was invalid tell it's 1 byte long*/
706         byte_cnt += c_size ? c_size : 1;
707     }
708 
709     return byte_cnt;
710 }
711 
712 /**
713  * Convert a byte index (in an UTF-8 text) to character index.
714  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
715  * @param txt a '\0' terminated UTF-8 string
716  * @param byte_id byte index
717  * @return character index of the letter at 'byte_id'th position
718  */
lv_txt_utf8_get_char_id(const char * txt,uint32_t byte_id)719 static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id)
720 {
721     uint32_t i        = 0;
722     uint32_t char_cnt = 0;
723 
724     while(i < byte_id) {
725         _lv_txt_encoded_next(txt, &i); /*'i' points to the next letter so use the prev. value*/
726         char_cnt++;
727     }
728 
729     return char_cnt;
730 }
731 
732 /**
733  * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
734  * E.g.: "ÁBC" is 3 characters (but 4 bytes)
735  * @param txt a '\0' terminated char string
736  * @return number of characters
737  */
lv_txt_utf8_get_length(const char * txt)738 static uint32_t lv_txt_utf8_get_length(const char * txt)
739 {
740     uint32_t len = 0;
741     uint32_t i   = 0;
742 
743     while(txt[i] != '\0') {
744         _lv_txt_encoded_next(txt, &i);
745         len++;
746     }
747 
748     return len;
749 }
750 
751 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
752 /*******************************
753  *  ASCII ENCODER/DECODER
754  ******************************/
755 
756 /**
757  * Give the size of an ISO8859-1 coded character
758  * @param str pointer to a character in a string
759  * @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
760  */
lv_txt_iso8859_1_size(const char * str)761 static uint8_t lv_txt_iso8859_1_size(const char * str)
762 {
763     LV_UNUSED(str); /*Unused*/
764     return 1;
765 }
766 
767 /**
768  * Convert a Unicode letter to ISO8859-1.
769  * @param letter_uni a Unicode letter
770  * @return ISO8859-1 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
771  */
lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)772 static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)
773 {
774     if(letter_uni < 256)
775         return letter_uni;
776     else
777         return ' ';
778 }
779 
780 /**
781  * Convert wide characters to ASCII, however wide characters in ASCII range (e.g. 'A') are ASCII compatible by default.
782  * So this function does nothing just returns with `c`.
783  * @param c a character, e.g. 'A'
784  * @return same as `c`
785  */
lv_txt_iso8859_1_conv_wc(uint32_t c)786 static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c)
787 {
788     return c;
789 }
790 
791 /**
792  * Decode an ISO8859-1 character from a string.
793  * @param txt pointer to '\0' terminated string
794  * @param i start byte index in 'txt' where to start.
795  *          After call it will point to the next UTF-8 char in 'txt'.
796  *          NULL to use txt[0] as index
797  * @return the decoded Unicode character or 0 on invalid UTF-8 code
798  */
lv_txt_iso8859_1_next(const char * txt,uint32_t * i)799 static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i)
800 {
801     if(i == NULL) return txt[0]; /*Get the next char*/
802 
803     uint8_t letter = txt[*i];
804     (*i)++;
805     return letter;
806 }
807 
808 /**
809  * Get previous ISO8859-1 character form a string.
810  * @param txt pointer to '\0' terminated string
811  * @param i start byte index in 'txt' where to start. After the call it will point to the previous UTF-8 char in 'txt'.
812  * @return the decoded Unicode character or 0 on invalid UTF-8 code
813  */
lv_txt_iso8859_1_prev(const char * txt,uint32_t * i)814 static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i)
815 {
816     if(i == NULL) return *(txt - 1); /*Get the prev. char*/
817 
818     (*i)--;
819     uint8_t letter = txt[*i];
820 
821     return letter;
822 }
823 
824 /**
825  * Convert a character index (in an ISO8859-1 text) to byte index.
826  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
827  * @param txt a '\0' terminated UTF-8 string
828  * @param utf8_id character index
829  * @return byte index of the 'utf8_id'th letter
830  */
lv_txt_iso8859_1_get_byte_id(const char * txt,uint32_t utf8_id)831 static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id)
832 {
833     LV_UNUSED(txt); /*Unused*/
834     return utf8_id; /*In Non encoded no difference*/
835 }
836 
837 /**
838  * Convert a byte index (in an ISO8859-1 text) to character index.
839  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
840  * @param txt a '\0' terminated UTF-8 string
841  * @param byte_id byte index
842  * @return character index of the letter at 'byte_id'th position
843  */
lv_txt_iso8859_1_get_char_id(const char * txt,uint32_t byte_id)844 static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id)
845 {
846     LV_UNUSED(txt); /*Unused*/
847     return byte_id; /*In Non encoded no difference*/
848 }
849 
850 /**
851  * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
852  * E.g.: "ÁBC" is 3 characters (but 4 bytes)
853  * @param txt a '\0' terminated char string
854  * @return number of characters
855  */
lv_txt_iso8859_1_get_length(const char * txt)856 static uint32_t lv_txt_iso8859_1_get_length(const char * txt)
857 {
858     return strlen(txt);
859 }
860 #else
861 
862 #error "Invalid character encoding. See `LV_TXT_ENC` in `lv_conf.h`"
863 
864 #endif
865