1 /**
2  * @file lv_text.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include "lv_text_private.h"
10 #include "lv_text_ap.h"
11 #include "lv_math.h"
12 #include "lv_log.h"
13 #include "lv_assert.h"
14 #include "../stdlib/lv_mem.h"
15 #include "../stdlib/lv_string.h"
16 #include "../misc/lv_types.h"
17 
18 /*********************
19  *      DEFINES
20  *********************/
21 #define NO_BREAK_FOUND UINT32_MAX
22 
23 /**********************
24  *      TYPEDEFS
25  **********************/
26 
27 /**********************
28  *  STATIC PROTOTYPES
29  **********************/
30 
31 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
32     static uint8_t lv_text_utf8_size(const char * str);
33     static uint32_t lv_text_unicode_to_utf8(uint32_t letter_uni);
34     static uint32_t lv_text_utf8_conv_wc(uint32_t c);
35     static uint32_t lv_text_utf8_next(const char * txt, uint32_t * i);
36     static uint32_t lv_text_utf8_prev(const char * txt, uint32_t * i_start);
37     static uint32_t lv_text_utf8_get_byte_id(const char * txt, uint32_t utf8_id);
38     static uint32_t lv_text_utf8_get_char_id(const char * txt, uint32_t byte_id);
39     static uint32_t lv_text_utf8_get_length(const char * txt);
40 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
41     static uint8_t lv_text_iso8859_1_size(const char * str);
42     static uint32_t lv_text_unicode_to_iso8859_1(uint32_t letter_uni);
43     static uint32_t lv_text_iso8859_1_conv_wc(uint32_t c);
44     static uint32_t lv_text_iso8859_1_next(const char * txt, uint32_t * i);
45     static uint32_t lv_text_iso8859_1_prev(const char * txt, uint32_t * i_start);
46     static uint32_t lv_text_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id);
47     static uint32_t lv_text_iso8859_1_get_char_id(const char * txt, uint32_t byte_id);
48     static uint32_t lv_text_iso8859_1_get_length(const char * txt);
49 #endif
50 /**********************
51  *  STATIC VARIABLES
52  **********************/
53 
54 /**********************
55  *  GLOBAL VARIABLES
56  **********************/
57 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
58     uint8_t (*const lv_text_encoded_size)(const char *)                   = lv_text_utf8_size;
59     uint32_t (*const lv_text_unicode_to_encoded)(uint32_t)                = lv_text_unicode_to_utf8;
60     uint32_t (*const lv_text_encoded_conv_wc)(uint32_t)                   = lv_text_utf8_conv_wc;
61     uint32_t (*const lv_text_encoded_next)(const char *, uint32_t *)      = lv_text_utf8_next;
62     uint32_t (*const lv_text_encoded_prev)(const char *, uint32_t *)      = lv_text_utf8_prev;
63     uint32_t (*const lv_text_encoded_get_byte_id)(const char *, uint32_t) = lv_text_utf8_get_byte_id;
64     uint32_t (*const lv_text_encoded_get_char_id)(const char *, uint32_t) = lv_text_utf8_get_char_id;
65     uint32_t (*const lv_text_get_encoded_length)(const char *)            = lv_text_utf8_get_length;
66 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
67     uint8_t (*const lv_text_encoded_size)(const char *)                   = lv_text_iso8859_1_size;
68     uint32_t (*const lv_text_unicode_to_encoded)(uint32_t)                = lv_text_unicode_to_iso8859_1;
69     uint32_t (*const lv_text_encoded_conv_wc)(uint32_t)                   = lv_text_iso8859_1_conv_wc;
70     uint32_t (*const lv_text_encoded_next)(const char *, uint32_t *)      = lv_text_iso8859_1_next;
71     uint32_t (*const lv_text_encoded_prev)(const char *, uint32_t *)      = lv_text_iso8859_1_prev;
72     uint32_t (*const lv_text_encoded_get_byte_id)(const char *, uint32_t) = lv_text_iso8859_1_get_byte_id;
73     uint32_t (*const lv_text_encoded_get_char_id)(const char *, uint32_t)     = lv_text_iso8859_1_get_char_id;
74     uint32_t (*const lv_text_get_encoded_length)(const char *)            = lv_text_iso8859_1_get_length;
75 
76 #endif
77 
78 /**********************
79  *      MACROS
80  **********************/
81 
82 #define LV_IS_ASCII(value)              ((value & 0x80U) == 0x00U)
83 #define LV_IS_2BYTES_UTF8_CODE(value)   ((value & 0xE0U) == 0xC0U)
84 #define LV_IS_3BYTES_UTF8_CODE(value)   ((value & 0xF0U) == 0xE0U)
85 #define LV_IS_4BYTES_UTF8_CODE(value)   ((value & 0xF8U) == 0xF0U)
86 #define LV_IS_INVALID_UTF8_CODE(value)  ((value & 0xC0U) != 0x80U)
87 
88 /**********************
89  *   GLOBAL FUNCTIONS
90  **********************/
91 
lv_text_get_size(lv_point_t * size_res,const char * text,const lv_font_t * font,int32_t letter_space,int32_t line_space,int32_t max_width,lv_text_flag_t flag)92 void lv_text_get_size(lv_point_t * size_res, const char * text, const lv_font_t * font, int32_t letter_space,
93                       int32_t line_space, int32_t max_width, lv_text_flag_t flag)
94 {
95     size_res->x = 0;
96     size_res->y = 0;
97 
98     if(text == NULL) return;
99     if(font == NULL) return;
100 
101     if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
102 
103     uint32_t line_start     = 0;
104     uint32_t new_line_start = 0;
105     uint16_t letter_height = lv_font_get_line_height(font);
106 
107     /*Calc. the height and longest line*/
108     while(text[line_start] != '\0') {
109         new_line_start += lv_text_get_next_line(&text[line_start], LV_TEXT_LEN_MAX, font, letter_space, max_width, NULL, flag);
110 
111         if((unsigned long)size_res->y + (unsigned long)letter_height + (unsigned long)line_space > LV_MAX_OF(int32_t)) {
112             LV_LOG_WARN("integer overflow while calculating text height");
113             return;
114         }
115         else {
116             size_res->y += letter_height;
117             size_res->y += line_space;
118         }
119 
120         /*Calculate the longest line*/
121         int32_t act_line_length = lv_text_get_width(&text[line_start], new_line_start - line_start, font, letter_space);
122 
123         size_res->x = LV_MAX(act_line_length, size_res->x);
124         line_start  = new_line_start;
125     }
126 
127     /*Make the text one line taller if the last character is '\n' or '\r'*/
128     if((line_start != 0) && (text[line_start - 1] == '\n' || text[line_start - 1] == '\r')) {
129         size_res->y += letter_height + line_space;
130     }
131 
132     /*Correction with the last line space or set the height manually if the text is empty*/
133     if(size_res->y == 0)
134         size_res->y = letter_height;
135     else
136         size_res->y -= line_space;
137 }
138 
lv_text_is_cmd(lv_text_cmd_state_t * state,uint32_t c)139 bool lv_text_is_cmd(lv_text_cmd_state_t * state, uint32_t c)
140 {
141     bool ret = false;
142 
143     if(c == (uint32_t)LV_TXT_COLOR_CMD[0]) {
144         if(*state == LV_TEXT_CMD_STATE_WAIT) { /*Start char*/
145             *state = LV_TEXT_CMD_STATE_PAR;
146             ret = true;
147         }
148         /*Other start char in parameter is escaped cmd. char*/
149         else if(*state == LV_TEXT_CMD_STATE_WAIT) {
150             *state = LV_TEXT_CMD_STATE_WAIT;
151         }
152         /*Command end*/
153         else if(*state == LV_TEXT_CMD_STATE_IN) {
154             *state = LV_TEXT_CMD_STATE_WAIT;
155             ret = true;
156         }
157     }
158 
159     /*Skip the color parameter and wait the space after it*/
160     if(*state == LV_TEXT_CMD_STATE_PAR) {
161         if(c == ' ') {
162             *state = LV_TEXT_CMD_STATE_IN; /*After the parameter the text is in the command*/
163         }
164         ret = true;
165     }
166 
167     return ret;
168 }
169 
170 /**
171  * Get the next word of text. A word is delimited by break characters.
172  *
173  * If the word cannot fit in the max_width space, obey LV_TXT_LINE_BREAK_LONG_* rules.
174  *
175  * If the next word cannot fit anything, return 0.
176  *
177  * If the first character is a break character, returns the next index.
178  *
179  * Example calls from lv_text_get_next_line() assuming sufficient max_width and
180  * txt = "Test text\n"
181  *        0123456789
182  *
183  * Calls would be as follows:
184  *     1. Return i=4, pointing at breakchar ' ', for the string "Test"
185  *     2. Return i=5, since i=4 was a breakchar.
186  *     3. Return i=9, pointing at breakchar '\n'
187  *     4. Parenting lv_text_get_next_line() would detect subsequent '\0'
188  *
189  * TODO: Returned word_w_ptr may overestimate the returned word's width when
190  * max_width is reached. In current usage, this has no impact.
191  *
192  * @param txt a '\0' terminated string
193  * @param font pointer to a font
194  * @param letter_space letter space
195  * @param max_width max width of the text (break the lines to fit this size). Set COORD_MAX to avoid line breaks
196  * @param flags settings for the text from 'txt_flag_type' enum
197  * @param[out] word_w_ptr width (in pixels) of the parsed word. May be NULL.
198  * @param cmd_state Pointer to a lv_text_cmd_state_t variable which stored the current state of command processing
199  * @return the index of the first char of the next word (in byte index not letter index. With UTF-8 they are different)
200  */
lv_text_get_next_word(const char * txt,const lv_font_t * font,int32_t letter_space,int32_t max_width,lv_text_flag_t flag,uint32_t * word_w_ptr,lv_text_cmd_state_t * cmd_state)201 static uint32_t lv_text_get_next_word(const char * txt, const lv_font_t * font,
202                                       int32_t letter_space, int32_t max_width,
203                                       lv_text_flag_t flag, uint32_t * word_w_ptr,
204                                       lv_text_cmd_state_t * cmd_state)
205 {
206     if(txt == NULL || txt[0] == '\0') return 0;
207     if(font == NULL) return 0;
208 
209     if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
210 
211     uint32_t i = 0, i_next = 0, i_next_next = 0;  /*Iterating index into txt*/
212     uint32_t letter = 0;      /*Letter at i*/
213     uint32_t letter_next = 0; /*Letter at i_next*/
214     int32_t letter_w;
215     int32_t cur_w = 0;  /*Pixel Width of traversed string*/
216     uint32_t word_len = 0;   /*Number of characters in the traversed word*/
217     uint32_t break_index = NO_BREAK_FOUND; /*only used for "long" words*/
218     uint32_t break_letter_count = 0; /*Number of characters up to the long word break point*/
219 
220     letter = lv_text_encoded_next(txt, &i_next);
221     i_next_next = i_next;
222 
223     /*Obtain the full word, regardless if it fits or not in max_width*/
224     while(txt[i] != '\0') {
225         letter_next = lv_text_encoded_next(txt, &i_next_next);
226         word_len++;
227 
228         /*Handle the recolor command*/
229         if((flag & LV_TEXT_FLAG_RECOLOR) != 0) {
230             if(lv_text_is_cmd(cmd_state, letter)) {
231                 i = i_next;
232                 i_next = i_next_next;
233                 letter = letter_next;
234                 continue;   /*Skip the letter if it is part of a command*/
235             }
236         }
237 
238         letter_w = lv_font_get_glyph_width(font, letter, letter_next);
239         cur_w += letter_w;
240 
241         if(letter_w > 0) {
242             cur_w += letter_space;
243         }
244 
245         /*Test if this character fits within max_width*/
246         if(break_index == NO_BREAK_FOUND && (cur_w - letter_space) > max_width) {
247             break_index = i;
248             break_letter_count = word_len - 1;
249             if(flag & LV_TEXT_FLAG_BREAK_ALL) {
250                 break;
251             }
252             /*break_index is now pointing at the character that doesn't fit*/
253         }
254 
255         /*Check for new line chars and breakchars*/
256         if(letter == '\n' || letter == '\r' || lv_text_is_break_char(letter)) {
257             /*Update the output width on the first character if it fits.
258              *Must do this here in case first letter is a break character.*/
259             if(i == 0 && break_index == NO_BREAK_FOUND && word_w_ptr != NULL) *word_w_ptr = cur_w;
260             word_len--;
261             break;
262         }
263         else if(lv_text_is_a_word(letter_next) || lv_text_is_a_word(letter)) {
264             /*Found a word for single letter, usually true for CJK*/
265             *word_w_ptr = cur_w;
266             i = i_next;
267             break;
268         }
269 
270         /*Update the output width*/
271         if(word_w_ptr != NULL && break_index == NO_BREAK_FOUND) *word_w_ptr = cur_w;
272 
273         i = i_next;
274         i_next = i_next_next;
275         letter = letter_next;
276     }
277 
278     /*Entire Word fits in the provided space*/
279     if(break_index == NO_BREAK_FOUND) {
280         if(word_len == 0 || (letter == '\r' && letter_next == '\n')) i = i_next;
281         return i;
282     }
283 
284 #if LV_TXT_LINE_BREAK_LONG_LEN > 0
285     /*Word doesn't fit in provided space, but isn't "long"*/
286     if(word_len < LV_TXT_LINE_BREAK_LONG_LEN) {
287         if(flag & LV_TEXT_FLAG_BREAK_ALL) return break_index;
288         if(word_w_ptr != NULL) *word_w_ptr = 0; /*Return no word*/
289         return 0;
290     }
291 
292     /*Word is "long," but insufficient amounts can fit in provided space*/
293     if(break_letter_count < LV_TXT_LINE_BREAK_LONG_PRE_MIN_LEN) {
294         if(flag & LV_TEXT_FLAG_BREAK_ALL) return break_index;
295         if(word_w_ptr != NULL) *word_w_ptr = 0;
296         return 0;
297     }
298 
299     /*Word is a "long", but letters may need to be better distributed*/
300     {
301         i = break_index;
302         int32_t n_move = LV_TXT_LINE_BREAK_LONG_POST_MIN_LEN - (word_len - break_letter_count);
303         /*Move pointer "i" backwards*/
304         for(; n_move > 0; n_move--) {
305             lv_text_encoded_prev(txt, &i);
306             /**
307              * TODO: it would be appropriate to update the returned
308              * word width hereHowever, in current usage, this doesn't impact anything.
309              */
310         }
311     }
312     return i;
313 #else
314     if(flag & LV_TEXT_FLAG_BREAK_ALL) return break_index;
315     if(word_w_ptr != NULL) *word_w_ptr = 0; /*Return no word*/
316     (void) break_letter_count;
317     return 0;
318 #endif
319 }
320 
lv_text_get_next_line(const char * txt,uint32_t len,const lv_font_t * font,int32_t letter_space,int32_t max_width,int32_t * used_width,lv_text_flag_t flag)321 uint32_t lv_text_get_next_line(const char * txt, uint32_t len,
322                                const lv_font_t * font, int32_t letter_space,
323                                int32_t max_width, int32_t * used_width, lv_text_flag_t flag)
324 {
325     if(used_width) *used_width = 0;
326 
327     if(txt == NULL) return 0;
328     if(txt[0] == '\0') return 0;
329     if(font == NULL) return 0;
330 
331     int32_t line_w = 0;
332 
333     /*If max_width doesn't matter simply find the new line character
334      *without thinking about word wrapping*/
335     if((flag & LV_TEXT_FLAG_EXPAND) || (flag & LV_TEXT_FLAG_FIT)) {
336         uint32_t i;
337         for(i = 0; i < len && txt[i] != '\n' && txt[i] != '\r' && txt[i] != '\0'; i++) {
338             /*Just find the new line chars or string ends by incrementing `i`*/
339         }
340         if(i < len && txt[i] != '\0') i++;    /*To go beyond `\n`*/
341         if(used_width) *used_width = -1;
342         return i;
343     }
344 
345     if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
346     lv_text_cmd_state_t cmd_state = LV_TEXT_CMD_STATE_WAIT;
347 
348     uint32_t i = 0;                                        /*Iterating index into txt*/
349 
350     while(i < len && txt[i] != '\0' && max_width > 0) {
351         lv_text_flag_t word_flag = flag;
352         if(i == 0) word_flag |= LV_TEXT_FLAG_BREAK_ALL;
353 
354         uint32_t word_w = 0;
355         uint32_t advance = lv_text_get_next_word(&txt[i], font, letter_space, max_width, word_flag, &word_w, &cmd_state);
356         max_width -= word_w;
357         line_w += word_w;
358 
359         if(advance == 0) {
360             break;
361         }
362 
363         i += advance;
364 
365         if(txt[0] == '\n' || txt[0] == '\r') break;
366 
367         if(txt[i] == '\n' || txt[i] == '\r') {
368             i++;  /*Include the following newline in the current line*/
369             break;
370         }
371 
372     }
373 
374     /*Always step at least one to avoid infinite loops*/
375     if(i == 0) {
376         uint32_t letter = lv_text_encoded_next(txt, &i);
377         if(used_width != NULL) {
378             line_w = lv_font_get_glyph_width(font, letter, '\0');
379         }
380     }
381 
382     if(used_width != NULL) {
383         *used_width = line_w;
384     }
385 
386     return i;
387 }
388 
lv_text_get_width(const char * txt,uint32_t length,const lv_font_t * font,int32_t letter_space)389 int32_t lv_text_get_width(const char * txt, uint32_t length, const lv_font_t * font, int32_t letter_space)
390 {
391     if(txt == NULL) return 0;
392     if(font == NULL) return 0;
393     if(txt[0] == '\0') return 0;
394 
395     uint32_t i                   = 0;
396     int32_t width             = 0;
397 
398     if(length != 0) {
399         while(i < length) {
400             uint32_t letter;
401             uint32_t letter_next;
402             lv_text_encoded_letter_next_2(txt, &letter, &letter_next, &i);
403 
404             int32_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
405             if(char_width > 0) {
406                 width += char_width;
407                 width += letter_space;
408             }
409         }
410 
411         if(width > 0) {
412             width -= letter_space; /*Trim the last letter space. Important if the text is center
413                                       aligned*/
414         }
415     }
416 
417     return width;
418 }
419 
lv_text_get_width_with_flags(const char * txt,uint32_t length,const lv_font_t * font,int32_t letter_space,lv_text_flag_t flags)420 int32_t lv_text_get_width_with_flags(const char * txt, uint32_t length, const lv_font_t * font, int32_t letter_space,
421                                      lv_text_flag_t flags)
422 {
423     if(txt == NULL) return 0;
424     if(font == NULL) return 0;
425     if(txt[0] == '\0') return 0;
426 
427     uint32_t i                   = 0;
428     int32_t width             = 0;
429     lv_text_cmd_state_t cmd_state = LV_TEXT_CMD_STATE_WAIT;
430 
431     if(length != 0) {
432         while(i < length) {
433             uint32_t letter;
434             uint32_t letter_next;
435             lv_text_encoded_letter_next_2(txt, &letter, &letter_next, &i);
436 
437             if((flags & LV_TEXT_FLAG_RECOLOR) != 0) {
438                 if(lv_text_is_cmd(&cmd_state, letter) != false) {
439                     continue;
440                 }
441             }
442 
443             int32_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
444             if(char_width > 0) {
445                 width += char_width;
446                 width += letter_space;
447             }
448         }
449 
450         if(width > 0) {
451             width -= letter_space; /*Trim the last letter space. Important if the text is center
452                                       aligned*/
453         }
454     }
455 
456     return width;
457 }
458 
lv_text_ins(char * txt_buf,uint32_t pos,const char * ins_txt)459 void lv_text_ins(char * txt_buf, uint32_t pos, const char * ins_txt)
460 {
461     if(txt_buf == NULL || ins_txt == NULL) return;
462 
463     size_t old_len = lv_strlen(txt_buf);
464     size_t ins_len = lv_strlen(ins_txt);
465     if(ins_len == 0) return;
466 
467     size_t new_len = ins_len + old_len;
468     pos              = lv_text_encoded_get_byte_id(txt_buf, pos); /*Convert to byte index instead of letter index*/
469 
470     /*Copy the second part into the end to make place to text to insert*/
471     size_t i;
472     for(i = new_len; i >= pos + ins_len; i--) {
473         txt_buf[i] = txt_buf[i - ins_len];
474     }
475 
476     /*Copy the text into the new space*/
477     lv_memcpy(txt_buf + pos, ins_txt, ins_len);
478 }
479 
lv_text_cut(char * txt,uint32_t pos,uint32_t len)480 void lv_text_cut(char * txt, uint32_t pos, uint32_t len)
481 {
482     if(txt == NULL) return;
483 
484     size_t old_len = lv_strlen(txt);
485 
486     pos = lv_text_encoded_get_byte_id(txt, pos); /*Convert to byte index instead of letter index*/
487     len = lv_text_encoded_get_byte_id(&txt[pos], len);
488 
489     /*Copy the second part into the end to make place to text to insert*/
490     uint32_t i;
491     for(i = pos; i <= old_len - len; i++) {
492         txt[i] = txt[i + len];
493     }
494 }
495 
lv_text_set_text_vfmt(const char * fmt,va_list ap)496 char * lv_text_set_text_vfmt(const char * fmt, va_list ap)
497 {
498     /*Allocate space for the new text by using trick from C99 standard section 7.19.6.12*/
499     va_list ap_copy;
500     va_copy(ap_copy, ap);
501     uint32_t len = lv_vsnprintf(NULL, 0, fmt, ap_copy);
502     va_end(ap_copy);
503 
504     char * text = 0;
505 #if LV_USE_ARABIC_PERSIAN_CHARS
506     /*Put together the text according to the format string*/
507     char * raw_txt = lv_malloc(len + 1);
508     LV_ASSERT_MALLOC(raw_txt);
509     if(raw_txt == NULL) {
510         return NULL;
511     }
512 
513     lv_vsnprintf(raw_txt, len + 1, fmt, ap);
514 
515     /*Get the size of the Arabic text and process it*/
516     size_t len_ap = lv_text_ap_calc_bytes_count(raw_txt);
517     text = lv_malloc(len_ap + 1);
518     LV_ASSERT_MALLOC(text);
519     if(text == NULL) {
520         return NULL;
521     }
522     lv_text_ap_proc(raw_txt, text);
523 
524     lv_free(raw_txt);
525 #else
526     text = lv_malloc(len + 1);
527     LV_ASSERT_MALLOC(text);
528     if(text == NULL) {
529         return NULL;
530     }
531 
532     lv_vsnprintf(text, len + 1, fmt, ap);
533 #endif
534 
535     return text;
536 }
537 
lv_text_encoded_letter_next_2(const char * txt,uint32_t * letter,uint32_t * letter_next,uint32_t * ofs)538 void lv_text_encoded_letter_next_2(const char * txt, uint32_t * letter, uint32_t * letter_next, uint32_t * ofs)
539 {
540     *letter = lv_text_encoded_next(txt, ofs);
541     *letter_next = *letter != '\0' ? lv_text_encoded_next(&txt[*ofs], NULL) : 0;
542 }
543 
544 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
545 /*******************************
546  *   UTF-8 ENCODER/DECODER
547  ******************************/
548 
549 /**
550  * Give the size of an UTF-8 coded character
551  * @param str pointer to a character in a string
552  * @return length of the UTF-8 character (1,2,3 or 4), 0 on invalid code.
553  */
lv_text_utf8_size(const char * str)554 static uint8_t lv_text_utf8_size(const char * str)
555 {
556     if(LV_IS_ASCII(str[0]))
557         return 1;
558     else if(LV_IS_2BYTES_UTF8_CODE(str[0]))
559         return 2;
560     else if(LV_IS_3BYTES_UTF8_CODE(str[0]))
561         return 3;
562     else if(LV_IS_4BYTES_UTF8_CODE(str[0]))
563         return 4;
564     return 0;
565 }
566 
567 /**
568  * Convert a Unicode letter to UTF-8.
569  * @param letter_uni a Unicode letter
570  * @return UTF-8 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
571  */
lv_text_unicode_to_utf8(uint32_t letter_uni)572 static uint32_t lv_text_unicode_to_utf8(uint32_t letter_uni)
573 {
574     if(letter_uni < 128) return letter_uni;
575     uint8_t bytes[4];
576 
577     if(letter_uni < 0x0800) {
578         bytes[0] = ((letter_uni >> 6) & 0x1F) | 0xC0;
579         bytes[1] = ((letter_uni >> 0) & 0x3F) | 0x80;
580         bytes[2] = 0;
581         bytes[3] = 0;
582     }
583     else if(letter_uni < 0x010000) {
584         bytes[0] = ((letter_uni >> 12) & 0x0F) | 0xE0;
585         bytes[1] = ((letter_uni >> 6) & 0x3F) | 0x80;
586         bytes[2] = ((letter_uni >> 0) & 0x3F) | 0x80;
587         bytes[3] = 0;
588     }
589     else if(letter_uni < 0x110000) {
590         bytes[0] = ((letter_uni >> 18) & 0x07) | 0xF0;
591         bytes[1] = ((letter_uni >> 12) & 0x3F) | 0x80;
592         bytes[2] = ((letter_uni >> 6) & 0x3F) | 0x80;
593         bytes[3] = ((letter_uni >> 0) & 0x3F) | 0x80;
594     }
595     else {
596         return 0;
597     }
598 
599     uint32_t * res_p = (uint32_t *)bytes;
600     return *res_p;
601 }
602 
603 /**
604  * Convert a wide character, e.g. 'Á' little endian to be UTF-8 compatible
605  * @param c a wide character or a  Little endian number
606  * @return `c` in big endian
607  */
lv_text_utf8_conv_wc(uint32_t c)608 static uint32_t lv_text_utf8_conv_wc(uint32_t c)
609 {
610 #if LV_BIG_ENDIAN_SYSTEM == 0
611     /*Swap the bytes (UTF-8 is big endian, but the MCUs are little endian)*/
612     if((c & 0x80) != 0) {
613         uint32_t swapped;
614         uint8_t c8[4];
615         lv_memcpy(c8, &c, 4);
616         swapped = (c8[0] << 24) + (c8[1] << 16) + (c8[2] << 8) + (c8[3]);
617         uint8_t i;
618         for(i = 0; i < 4; i++) {
619             if((swapped & 0xFF) == 0)
620                 swapped = (swapped >> 8); /*Ignore leading zeros (they were in the end originally)*/
621         }
622         c = swapped;
623     }
624 #endif
625     return c;
626 }
627 
628 /**
629  * Decode an UTF-8 character from a string.
630  * @param txt pointer to '\0' terminated string
631  * @param i start byte index in 'txt' where to start.
632  *          After call it will point to the next UTF-8 char in 'txt'.
633  *          NULL to use txt[0] as index
634  * @return the decoded Unicode character or 0 on invalid UTF-8 code
635  */
lv_text_utf8_next(const char * txt,uint32_t * i)636 static uint32_t lv_text_utf8_next(const char * txt, uint32_t * i)
637 {
638     /**
639      * Unicode to UTF-8
640      * 00000000 00000000 00000000 0xxxxxxx -> 0xxxxxxx
641      * 00000000 00000000 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
642      * 00000000 00000000 zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
643      * 00000000 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
644      */
645 
646     uint32_t result = 0;
647 
648     /*Dummy 'i' pointer is required*/
649     uint32_t i_tmp = 0;
650     if(i == NULL) i = &i_tmp;
651 
652     /*Normal ASCII*/
653     if(LV_IS_ASCII(txt[*i])) {
654         result = txt[*i];
655         (*i)++;
656     }
657     /*Real UTF-8 decode*/
658     else {
659         /*2 bytes UTF-8 code*/
660         if(LV_IS_2BYTES_UTF8_CODE(txt[*i])) {
661             result = (uint32_t)(txt[*i] & 0x1F) << 6;
662             (*i)++;
663             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
664             result += (txt[*i] & 0x3F);
665             (*i)++;
666         }
667         /*3 bytes UTF-8 code*/
668         else if(LV_IS_3BYTES_UTF8_CODE(txt[*i])) {
669             result = (uint32_t)(txt[*i] & 0x0F) << 12;
670             (*i)++;
671 
672             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
673             result += (uint32_t)(txt[*i] & 0x3F) << 6;
674             (*i)++;
675 
676             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
677             result += (txt[*i] & 0x3F);
678             (*i)++;
679         }
680         /*4 bytes UTF-8 code*/
681         else if(LV_IS_4BYTES_UTF8_CODE(txt[*i])) {
682             result = (uint32_t)(txt[*i] & 0x07) << 18;
683             (*i)++;
684 
685             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
686             result += (uint32_t)(txt[*i] & 0x3F) << 12;
687             (*i)++;
688 
689             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
690             result += (uint32_t)(txt[*i] & 0x3F) << 6;
691             (*i)++;
692 
693             if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
694             result += txt[*i] & 0x3F;
695             (*i)++;
696         }
697         else {
698             (*i)++; /*Not UTF-8 char. Go the next.*/
699         }
700     }
701     return result;
702 }
703 
704 /**
705  * Get previous UTF-8 character form a string.
706  * @param txt pointer to '\0' terminated string
707  * @param i start byte index in 'txt' where to start. After the call it will point to the previous
708  * UTF-8 char in 'txt'.
709  * @return the decoded Unicode character or 0 on invalid UTF-8 code
710  */
lv_text_utf8_prev(const char * txt,uint32_t * i)711 static uint32_t lv_text_utf8_prev(const char * txt, uint32_t * i)
712 {
713     uint8_t c_size;
714     uint8_t cnt = 0;
715 
716     /*Try to find a !0 long UTF-8 char by stepping one character back*/
717     (*i)--;
718     do {
719         if(cnt >= 4) return 0; /*No UTF-8 char found before the initial*/
720 
721         c_size = lv_text_encoded_size(&txt[*i]);
722         if(c_size == 0) {
723             if(*i != 0)
724                 (*i)--;
725             else
726                 return 0;
727         }
728         cnt++;
729     } while(c_size == 0);
730 
731     uint32_t i_tmp  = *i;
732     uint32_t letter = lv_text_encoded_next(txt, &i_tmp); /*Character found, get it*/
733 
734     return letter;
735 }
736 
737 /**
738  * Convert a character index (in an UTF-8 text) to byte index.
739  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
740  * @param txt a '\0' terminated UTF-8 string
741  * @param utf8_id character index
742  * @return byte index of the 'utf8_id'th letter
743  */
lv_text_utf8_get_byte_id(const char * txt,uint32_t utf8_id)744 static uint32_t lv_text_utf8_get_byte_id(const char * txt, uint32_t utf8_id)
745 {
746     uint32_t i;
747     uint32_t byte_cnt = 0;
748     for(i = 0; i < utf8_id && txt[byte_cnt] != '\0'; i++) {
749         uint8_t c_size = lv_text_encoded_size(&txt[byte_cnt]);
750         /* If the char was invalid tell it's 1 byte long*/
751         byte_cnt += c_size ? c_size : 1;
752     }
753 
754     return byte_cnt;
755 }
756 
757 /**
758  * Convert a byte index (in an UTF-8 text) to character index.
759  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
760  * @param txt a '\0' terminated UTF-8 string
761  * @param byte_id byte index
762  * @return character index of the letter at 'byte_id'th position
763  */
lv_text_utf8_get_char_id(const char * txt,uint32_t byte_id)764 static uint32_t lv_text_utf8_get_char_id(const char * txt, uint32_t byte_id)
765 {
766     uint32_t i        = 0;
767     uint32_t char_cnt = 0;
768 
769     while(i < byte_id) {
770         lv_text_encoded_next(txt, &i); /*'i' points to the next letter so use the prev. value*/
771         char_cnt++;
772     }
773 
774     return char_cnt;
775 }
776 
777 /**
778  * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
779  * E.g.: "ÁBC" is 3 characters (but 4 bytes)
780  * @param txt a '\0' terminated char string
781  * @return number of characters
782  */
lv_text_utf8_get_length(const char * txt)783 static uint32_t lv_text_utf8_get_length(const char * txt)
784 {
785     uint32_t len = 0;
786     uint32_t i   = 0;
787 
788     while(txt[i] != '\0') {
789         lv_text_encoded_next(txt, &i);
790         len++;
791     }
792 
793     return len;
794 }
795 
796 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
797 /*******************************
798  *  ASCII ENCODER/DECODER
799  ******************************/
800 
801 /**
802  * Give the size of an ISO8859-1 coded character
803  * @param str pointer to a character in a string
804  * @return length of the ISO8859-1 coded character, will be always 1.
805  */
lv_text_iso8859_1_size(const char * str)806 static uint8_t lv_text_iso8859_1_size(const char * str)
807 {
808     LV_UNUSED(str); /*Unused*/
809     return 1;
810 }
811 
812 /**
813  * Convert a Unicode letter to ISO8859-1.
814  * @param letter_uni a Unicode letter
815  * @return ISO8859-1 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
816  */
lv_text_unicode_to_iso8859_1(uint32_t letter_uni)817 static uint32_t lv_text_unicode_to_iso8859_1(uint32_t letter_uni)
818 {
819     if(letter_uni < 256)
820         return letter_uni;
821     else
822         return ' ';
823 }
824 
825 /**
826  * Convert wide characters to ASCII, however wide characters in ASCII range (e.g. 'A') are ASCII compatible by default.
827  * So this function does nothing just returns with `c`.
828  * @param c a character, e.g. 'A'
829  * @return same as `c`
830  */
lv_text_iso8859_1_conv_wc(uint32_t c)831 static uint32_t lv_text_iso8859_1_conv_wc(uint32_t c)
832 {
833     return c;
834 }
835 
836 /**
837  * Decode an ISO8859-1 character from a string.
838  * @param txt pointer to '\0' terminated string
839  * @param i start byte index in 'txt' where to start.
840  *          After call it will point to the next ISO8859-1 coded char in 'txt'.
841  *          NULL to use txt[0] as index
842  * @return the decoded ISO8859-1 character.
843  */
lv_text_iso8859_1_next(const char * txt,uint32_t * i)844 static uint32_t lv_text_iso8859_1_next(const char * txt, uint32_t * i)
845 {
846     if(i == NULL) return txt[0]; /*Get the next char*/
847 
848     uint8_t letter = txt[*i];
849     (*i)++;
850     return letter;
851 }
852 
853 /**
854  * Get previous ISO8859-1 character form a string.
855  * @param txt pointer to '\0' terminated string
856  * @param i start byte index in 'txt' where to start. After the call it will point to the previous ISO8859-1 coded char in 'txt'.
857  * @return the decoded ISO8859-1 character.
858  */
lv_text_iso8859_1_prev(const char * txt,uint32_t * i)859 static uint32_t lv_text_iso8859_1_prev(const char * txt, uint32_t * i)
860 {
861     if(i == NULL) return *(txt - 1); /*Get the prev. char*/
862 
863     (*i)--;
864     uint8_t letter = txt[*i];
865 
866     return letter;
867 }
868 
869 /**
870  * Convert a character index (in an ISO8859-1 text) to byte index.
871  * The ISO8859-1 encoding is compatible with ASCII so the indices of characters is the same as the indices of bytes.
872  * @param txt a '\0' terminated char string
873  * @param utf8_id character index
874  * @return byte index of the 'utf8_id'th letter
875  */
lv_text_iso8859_1_get_byte_id(const char * txt,uint32_t utf8_id)876 static uint32_t lv_text_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id)
877 {
878     LV_UNUSED(txt); /*Unused*/
879     return utf8_id; /*In Non encoded no difference*/
880 }
881 
882 /**
883  * Convert a byte index (in an ISO8859-1 text) to character index.
884  * The ISO8859-1 encoding is compatible with ASCII so the indices of characters is the same as the indices of bytes.
885  * @param txt a '\0' terminated char string
886  * @param byte_id byte index
887  * @return character index of the letter at 'byte_id'th position
888  */
lv_text_iso8859_1_get_char_id(const char * txt,uint32_t byte_id)889 static uint32_t lv_text_iso8859_1_get_char_id(const char * txt, uint32_t byte_id)
890 {
891     LV_UNUSED(txt); /*Unused*/
892     return byte_id; /*In Non encoded no difference*/
893 }
894 
895 /**
896  * Get the number of characters (and NOT bytes) in a string.
897  * The ISO8859-1 encoding is compatible with ASCII so the number of characters is the same as the number of bytes.
898  * @param txt a '\0' terminated char string
899  * @return number of characters
900  */
lv_text_iso8859_1_get_length(const char * txt)901 static uint32_t lv_text_iso8859_1_get_length(const char * txt)
902 {
903     return lv_strlen(txt);
904 }
905 #else
906 
907 #error "Invalid character encoding. See `LV_TXT_ENC` in `lv_conf.h`"
908 
909 #endif
910