1 /**
2  * @file lv_text.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include "lv_txt.h"
10 #include "lv_math.h"
11 #include "lv_log.h"
12 
13 /*********************
14  *      DEFINES
15  *********************/
16 #define NO_BREAK_FOUND UINT32_MAX
17 
18 /**********************
19  *      TYPEDEFS
20  **********************/
21 
22 /**********************
23  *  STATIC PROTOTYPES
24  **********************/
25 static inline bool is_break_char(uint32_t letter);
26 
27 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
28     static uint8_t lv_txt_utf8_size(const char * str);
29     static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni);
30     static uint32_t lv_txt_utf8_conv_wc(uint32_t c);
31     static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i);
32     static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i_start);
33     static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id);
34     static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id);
35     static uint32_t lv_txt_utf8_get_length(const char * txt);
36 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
37     static uint8_t lv_txt_iso8859_1_size(const char * str);
38     static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni);
39     static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c);
40     static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i);
41     static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i_start);
42     static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id);
43     static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id);
44     static uint32_t lv_txt_iso8859_1_get_length(const char * txt);
45 #endif
46 /**********************
47  *  STATIC VARIABLES
48  **********************/
49 
50 /**********************
51  *  GLOBAL VARIABLES
52  **********************/
53 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
54     uint8_t (*_lv_txt_encoded_size)(const char *)                   = lv_txt_utf8_size;
55     uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t)                = lv_txt_unicode_to_utf8;
56     uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t)                   = lv_txt_utf8_conv_wc;
57     uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *)      = lv_txt_utf8_next;
58     uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *)      = lv_txt_utf8_prev;
59     uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_utf8_get_byte_id;
60     uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_utf8_get_char_id;
61     uint32_t (*_lv_txt_get_encoded_length)(const char *)            = lv_txt_utf8_get_length;
62 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
63     uint8_t (*_lv_txt_encoded_size)(const char *)                   = lv_txt_iso8859_1_size;
64     uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t)                = lv_txt_unicode_to_iso8859_1;
65     uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t)                   = lv_txt_iso8859_1_conv_wc;
66     uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *)      = lv_txt_iso8859_1_next;
67     uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *)      = lv_txt_iso8859_1_prev;
68     uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_byte_id;
69     uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t)     = lv_txt_iso8859_1_get_char_id;
70     uint32_t (*_lv_txt_get_encoded_length)(const char *)            = lv_txt_iso8859_1_get_length;
71 
72 #endif
73 
74 /**********************
75  *      MACROS
76  **********************/
77 
78 /**********************
79  *   GLOBAL FUNCTIONS
80  **********************/
81 
82 /**
83  * Get size of a text
84  * @param size_res pointer to a 'point_t' variable to store the result
85  * @param text pointer to a text
86  * @param font pointer to font of the text
87  * @param letter_space letter space of the text
88  * @param txt.line_space line space of the text
89  * @param flags settings for the text from 'txt_flag_t' enum
90  * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid
91  * line breaks
92  */
_lv_txt_get_size(lv_point_t * size_res,const char * text,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t line_space,lv_coord_t max_width,lv_txt_flag_t flag)93 void _lv_txt_get_size(lv_point_t * size_res, const char * text, const lv_font_t * font, lv_coord_t letter_space,
94                       lv_coord_t line_space, lv_coord_t max_width, lv_txt_flag_t flag)
95 {
96     size_res->x = 0;
97     size_res->y = 0;
98 
99     if(text == NULL) return;
100     if(font == NULL) return;
101 
102     if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
103 
104     uint32_t line_start     = 0;
105     uint32_t new_line_start = 0;
106     uint16_t letter_height = lv_font_get_line_height(font);
107 
108     /*Calc. the height and longest line*/
109     while(text[line_start] != '\0') {
110         new_line_start += _lv_txt_get_next_line(&text[line_start], font, letter_space, max_width, flag);
111 
112         if((unsigned long)size_res->y + (unsigned long)letter_height + (unsigned long)line_space > LV_MAX_OF(lv_coord_t)) {
113             LV_LOG_WARN("lv_txt_get_size: integer overflow while calculating text height");
114             return;
115         }
116         else {
117             size_res->y += letter_height;
118             size_res->y += line_space;
119         }
120 
121         /*Calculate the the longest line*/
122         lv_coord_t act_line_length = _lv_txt_get_width(&text[line_start], new_line_start - line_start, font, letter_space,
123                                                        flag);
124 
125         size_res->x = LV_MATH_MAX(act_line_length, size_res->x);
126         line_start  = new_line_start;
127     }
128 
129     /*Make the text one line taller if the last character is '\n' or '\r'*/
130     if((line_start != 0) && (text[line_start - 1] == '\n' || text[line_start - 1] == '\r')) {
131         size_res->y += letter_height + line_space;
132     }
133 
134     /*Correction with the last line space or set the height manually if the text is empty*/
135     if(size_res->y == 0)
136         size_res->y = letter_height;
137     else
138         size_res->y -= line_space;
139 }
140 
141 /**
142  * Get the next word of text. A word is delimited by break characters.
143  *
144  * If the word cannot fit in the max_width space, obey LV_TXT_LINE_BREAK_LONG_* rules.
145  *
146  * If the next word cannot fit anything, return 0.
147  *
148  * If the first character is a break character, returns the next index.
149  *
150  * Example calls from lv_txt_get_next_line() assuming sufficient max_width and
151  * txt = "Test text\n"
152  *        0123456789
153  *
154  * Calls would be as follows:
155  *     1. Return i=4, pointing at breakchar ' ', for the string "Test"
156  *     2. Return i=5, since i=4 was a breakchar.
157  *     3. Return i=9, pointing at breakchar '\n'
158  *     4. Parenting lv_txt_get_next_line() would detect subsequent '\0'
159  *
160  * TODO: Returned word_w_ptr may overestimate the returned word's width when
161  * max_width is reached. In current usage, this has no impact.
162  *
163  * @param txt a '\0' terminated string
164  * @param font pointer to a font
165  * @param letter_space letter space
166  * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
167  * @param flags settings for the text from 'txt_flag_type' enum
168  * @param[out] word_w_ptr width (in pixels) of the parsed word. May be NULL.
169  * @param force Force return the fraction of the word that can fit in the provided space.
170  * @return the index of the first char of the next word (in byte index not letter index. With UTF-8 they are different)
171  */
lv_txt_get_next_word(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_txt_flag_t flag,uint32_t * word_w_ptr,lv_txt_cmd_state_t * cmd_state,bool force)172 static uint32_t lv_txt_get_next_word(const char * txt, const lv_font_t * font,
173                                      lv_coord_t letter_space, lv_coord_t max_width,
174                                      lv_txt_flag_t flag, uint32_t * word_w_ptr, lv_txt_cmd_state_t * cmd_state, bool force)
175 {
176     if(txt == NULL || txt[0] == '\0') return 0;
177     if(font == NULL) return 0;
178 
179     if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
180 
181     uint32_t i = 0, i_next = 0, i_next_next = 0;  /* Iterating index into txt */
182     uint32_t letter = 0;      /* Letter at i */
183     uint32_t letter_next = 0; /* Letter at i_next */
184     lv_coord_t letter_w;
185     lv_coord_t cur_w = 0;  /* Pixel Width of transversed string */
186     uint32_t word_len = 0;   /* Number of characters in the transversed word */
187     uint32_t break_index = NO_BREAK_FOUND; /* only used for "long" words */
188     uint32_t break_letter_count = 0; /* Number of characters up to the long word break point */
189 
190     letter = _lv_txt_encoded_next(txt, &i_next);
191     i_next_next = i_next;
192 
193     /* Obtain the full word, regardless if it fits or not in max_width */
194     while(txt[i] != '\0') {
195         letter_next = _lv_txt_encoded_next(txt, &i_next_next);
196         word_len++;
197 
198         /*Handle the recolor command*/
199         if((flag & LV_TXT_FLAG_RECOLOR) != 0) {
200             if(_lv_txt_is_cmd(cmd_state, letter) != false) {
201                 i = i_next;
202                 i_next = i_next_next;
203                 letter = letter_next;
204                 continue;   /*Skip the letter is it is part of a command*/
205             }
206         }
207 
208         letter_w = lv_font_get_glyph_width(font, letter, letter_next);
209         cur_w += letter_w;
210 
211         if(letter_w > 0) {
212             cur_w += letter_space;
213         }
214 
215         /* Test if this character fits within max_width */
216         if(break_index == NO_BREAK_FOUND && (cur_w - letter_space) > max_width) {
217             break_index = i;
218             break_letter_count = word_len - 1;
219             /* break_index is now pointing at the character that doesn't fit */
220         }
221 
222         /*Check for new line chars and breakchars*/
223         if(letter == '\n' || letter == '\r' || is_break_char(letter)) {
224             /* Update the output width on the first character if it fits.
225              * Must do this here incase first letter is a break character. */
226             if(i == 0 && break_index == NO_BREAK_FOUND && word_w_ptr != NULL) *word_w_ptr = cur_w;
227             word_len--;
228             break;
229         }
230 
231         /* Update the output width */
232         if(word_w_ptr != NULL && break_index == NO_BREAK_FOUND) *word_w_ptr = cur_w;
233 
234 
235         i = i_next;
236         i_next = i_next_next;
237         letter = letter_next;
238     }
239 
240     /* Entire Word fits in the provided space */
241     if(break_index == NO_BREAK_FOUND) {
242         if(word_len == 0 || (letter == '\r' && letter_next == '\n')) i = i_next;
243         return i;
244     }
245 
246 #if LV_TXT_LINE_BREAK_LONG_LEN > 0
247     /* Word doesn't fit in provided space, but isn't "long" */
248     if(word_len < LV_TXT_LINE_BREAK_LONG_LEN) {
249         if(force) return break_index;
250         if(word_w_ptr != NULL) *word_w_ptr = 0; /* Return no word */
251         return 0;
252     }
253 
254     /* Word is "long," but insufficient amounts can fit in provided space */
255     if(break_letter_count < LV_TXT_LINE_BREAK_LONG_PRE_MIN_LEN) {
256         if(force) return break_index;
257         if(word_w_ptr != NULL) *word_w_ptr = 0;
258         return 0;
259     }
260 
261     /* Word is a "long", but letters may need to be better distributed */
262     {
263         i = break_index;
264         int32_t n_move = LV_TXT_LINE_BREAK_LONG_POST_MIN_LEN - (word_len - break_letter_count);
265         /* Move pointer "i" backwards */
266         for(; n_move > 0; n_move--) {
267             _lv_txt_encoded_prev(txt, &i);
268             // TODO: it would be appropriate to update the returned word width here
269             // However, in current usage, this doesn't impact anything.
270         }
271     }
272     return i;
273 #else
274     if(force) return break_index;
275     if(word_w_ptr != NULL) *word_w_ptr = 0; /* Return no word */
276     (void) break_letter_count;
277     return 0;
278 #endif
279 }
280 
281 /**
282  * Get the next line of text. Check line length and break chars too.
283  *
284  * A line of txt includes the \n character.
285  *
286  * @param txt a '\0' terminated string
287  * @param font pointer to a font
288  * @param letter_space letter space
289  * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
290  * @param flags settings for the text from 'txt_flag_type' enum
291  * @return the index of the first char of the new line (in byte index not letter index. With UTF-8 they are different)
292  */
_lv_txt_get_next_line(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_txt_flag_t flag)293 uint32_t _lv_txt_get_next_line(const char * txt, const lv_font_t * font,
294                                lv_coord_t letter_space, lv_coord_t max_width, lv_txt_flag_t flag)
295 {
296     if(txt == NULL) return 0;
297     if(font == NULL) return 0;
298 
299     /* If max_width doesn't mater simply find the new line character
300      * without thinking about word wrapping*/
301     if((flag & LV_TXT_FLAG_EXPAND) || (flag & LV_TXT_FLAG_FIT)) {
302         uint32_t i;
303         for(i = 0; txt[i] != '\n' && txt[i] != '\r' && txt[i] != '\0'; i++) {
304             /*Just find the new line chars or string ends by incrementing `i`*/
305         }
306         if(txt[i] != '\0') i++;    /*To go beyond `\n`*/
307         return i;
308     }
309 
310     if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
311     lv_txt_cmd_state_t cmd_state = LV_TXT_CMD_STATE_WAIT;
312     uint32_t i = 0;                                        /* Iterating index into txt */
313 
314     while(txt[i] != '\0' && max_width > 0) {
315         uint32_t word_w = 0;
316         uint32_t advance = lv_txt_get_next_word(&txt[i], font, letter_space, max_width, flag, &word_w, &cmd_state, i == 0);
317         max_width -= word_w;
318 
319         if(advance == 0) {
320             if(i == 0) _lv_txt_encoded_next(txt, &i); // prevent inf loops
321             break;
322         }
323 
324         i += advance;
325 
326         if(txt[0] == '\n' || txt[0] == '\r') break;
327 
328         if(txt[i] == '\n' || txt[i] == '\r') {
329             i++;  /* Include the following newline in the current line */
330             break;
331         }
332 
333     }
334 
335     /* Always step at least one to avoid infinite loops */
336     if(i == 0) {
337         _lv_txt_encoded_next(txt, &i);
338     }
339 
340     return i;
341 }
342 
343 /**
344  * Give the length of a text with a given font
345  * @param txt a '\0' terminate string
346  * @param length length of 'txt' in byte count and not characters (Á is 1 character but 2 bytes in
347  * UTF-8)
348  * @param font pointer to a font
349  * @param letter_space letter space
350  * @param flags settings for the text from 'txt_flag_t' enum
351  * @return length of a char_num long text
352  */
_lv_txt_get_width(const char * txt,uint32_t length,const lv_font_t * font,lv_coord_t letter_space,lv_txt_flag_t flag)353 lv_coord_t _lv_txt_get_width(const char * txt, uint32_t length, const lv_font_t * font, lv_coord_t letter_space,
354                              lv_txt_flag_t flag)
355 {
356     if(txt == NULL) return 0;
357     if(font == NULL) return 0;
358 
359     uint32_t i                   = 0;
360     lv_coord_t width             = 0;
361     lv_txt_cmd_state_t cmd_state = LV_TXT_CMD_STATE_WAIT;
362 
363     if(length != 0) {
364         while(i < length) {
365             uint32_t letter      = _lv_txt_encoded_next(txt, &i);
366             uint32_t letter_next = _lv_txt_encoded_next(&txt[i], NULL);
367             if((flag & LV_TXT_FLAG_RECOLOR) != 0) {
368                 if(_lv_txt_is_cmd(&cmd_state, letter) != false) {
369                     continue;
370                 }
371             }
372 
373             lv_coord_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
374             if(char_width > 0) {
375                 width += char_width;
376                 width += letter_space;
377             }
378         }
379 
380         if(width > 0) {
381             width -= letter_space; /*Trim the last letter space. Important if the text is center
382                                       aligned */
383         }
384     }
385 
386     return width;
387 }
388 
389 /**
390  * Check next character in a string and decide if the character is part of the command or not
391  * @param state pointer to a txt_cmd_state_t variable which stores the current state of command
392  * processing (Initied. to TXT_CMD_STATE_WAIT )
393  * @param c the current character
394  * @return true: the character is part of a command and should not be written,
395  *         false: the character should be written
396  */
_lv_txt_is_cmd(lv_txt_cmd_state_t * state,uint32_t c)397 bool _lv_txt_is_cmd(lv_txt_cmd_state_t * state, uint32_t c)
398 {
399     bool ret = false;
400 
401     if(c == (uint32_t)LV_TXT_COLOR_CMD[0]) {
402         if(*state == LV_TXT_CMD_STATE_WAIT) { /*Start char*/
403             *state = LV_TXT_CMD_STATE_PAR;
404             ret    = true;
405         }
406         /*Other start char in parameter is escaped cmd. char */
407         else if(*state == LV_TXT_CMD_STATE_PAR) {
408             *state = LV_TXT_CMD_STATE_WAIT;
409         }
410         /*Command end */
411         else if(*state == LV_TXT_CMD_STATE_IN) {
412             *state = LV_TXT_CMD_STATE_WAIT;
413             ret    = true;
414         }
415     }
416 
417     /*Skip the color parameter and wait the space after it*/
418     if(*state == LV_TXT_CMD_STATE_PAR) {
419         if(c == ' ') {
420             *state = LV_TXT_CMD_STATE_IN; /*After the parameter the text is in the command*/
421         }
422         ret = true;
423     }
424 
425     return ret;
426 }
427 
428 /**
429  * Insert a string into an other
430  * @param txt_buf the original text (must be big enough for the result text)
431  * @param pos position to insert. Expressed in character index and not byte index (Different in
432  * UTF-8) 0: before the original text, 1: after the first char etc.
433  * @param ins_txt text to insert
434  */
_lv_txt_ins(char * txt_buf,uint32_t pos,const char * ins_txt)435 void _lv_txt_ins(char * txt_buf, uint32_t pos, const char * ins_txt)
436 {
437     size_t old_len = strlen(txt_buf);
438     size_t ins_len = strlen(ins_txt);
439     if(ins_len == 0) return;
440 
441     size_t new_len = ins_len + old_len;
442     pos              = _lv_txt_encoded_get_byte_id(txt_buf, pos); /*Convert to byte index instead of letter index*/
443 
444     /*Copy the second part into the end to make place to text to insert*/
445     size_t i;
446     for(i = new_len; i >= pos + ins_len; i--) {
447         txt_buf[i] = txt_buf[i - ins_len];
448     }
449 
450     /* Copy the text into the new space*/
451     _lv_memcpy_small(txt_buf + pos, ins_txt, ins_len);
452 }
453 
454 /**
455  * Delete a part of a string
456  * @param txt string to modify
457  * @param pos position where to start the deleting (0: before the first char, 1: after the first
458  * char etc.)
459  * @param len number of characters to delete
460  */
_lv_txt_cut(char * txt,uint32_t pos,uint32_t len)461 void _lv_txt_cut(char * txt, uint32_t pos, uint32_t len)
462 {
463 
464     size_t old_len = strlen(txt);
465 
466     pos = _lv_txt_encoded_get_byte_id(txt, pos); /*Convert to byte index instead of letter index*/
467     len = _lv_txt_encoded_get_byte_id(&txt[pos], len);
468 
469     /*Copy the second part into the end to make place to text to insert*/
470     uint32_t i;
471     for(i = pos; i <= old_len - len; i++) {
472         txt[i] = txt[i + len];
473     }
474 }
475 
476 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
477 /*******************************
478  *   UTF-8 ENCODER/DECOER
479  ******************************/
480 
481 /**
482  * Give the size of an UTF-8 coded character
483  * @param str pointer to a character in a string
484  * @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
485  */
lv_txt_utf8_size(const char * str)486 static uint8_t lv_txt_utf8_size(const char * str)
487 {
488     if((str[0] & 0x80) == 0)
489         return 1;
490     else if((str[0] & 0xE0) == 0xC0)
491         return 2;
492     else if((str[0] & 0xF0) == 0xE0)
493         return 3;
494     else if((str[0] & 0xF8) == 0xF0)
495         return 4;
496     return 0; /*If the char was invalid tell it's 1 byte long*/
497 }
498 
499 /**
500  * Convert an Unicode letter to UTF-8.
501  * @param letter_uni an Unicode letter
502  * @return UTF-8 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
503  */
lv_txt_unicode_to_utf8(uint32_t letter_uni)504 static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni)
505 {
506     if(letter_uni < 128) return letter_uni;
507     uint8_t bytes[4];
508 
509     if(letter_uni < 0x0800) {
510         bytes[0] = ((letter_uni >> 6) & 0x1F) | 0xC0;
511         bytes[1] = ((letter_uni >> 0) & 0x3F) | 0x80;
512         bytes[2] = 0;
513         bytes[3] = 0;
514     }
515     else if(letter_uni < 0x010000) {
516         bytes[0] = ((letter_uni >> 12) & 0x0F) | 0xE0;
517         bytes[1] = ((letter_uni >> 6) & 0x3F) | 0x80;
518         bytes[2] = ((letter_uni >> 0) & 0x3F) | 0x80;
519         bytes[3] = 0;
520     }
521     else if(letter_uni < 0x110000) {
522         bytes[0] = ((letter_uni >> 18) & 0x07) | 0xF0;
523         bytes[1] = ((letter_uni >> 12) & 0x3F) | 0x80;
524         bytes[2] = ((letter_uni >> 6) & 0x3F) | 0x80;
525         bytes[3] = ((letter_uni >> 0) & 0x3F) | 0x80;
526     }
527 
528     uint32_t * res_p = (uint32_t *)bytes;
529     return *res_p;
530 }
531 
532 /**
533  * Convert a wide character, e.g. 'Á' little endian to be UTF-8 compatible
534  * @param c a wide character or a  Little endian number
535  * @return `c` in big endian
536  */
lv_txt_utf8_conv_wc(uint32_t c)537 static uint32_t lv_txt_utf8_conv_wc(uint32_t c)
538 {
539 #if LV_BIG_ENDIAN_SYSTEM == 0
540     /*Swap the bytes (UTF-8 is big endian, but the MCUs are little endian)*/
541     if((c & 0x80) != 0) {
542         uint32_t swapped;
543         uint8_t c8[4];
544         _lv_memcpy_small(c8, &c, 4);
545         swapped = (c8[0] << 24) + (c8[1] << 16) + (c8[2] << 8) + (c8[3]);
546         uint8_t i;
547         for(i = 0; i < 4; i++) {
548             if((swapped & 0xFF) == 0)
549                 swapped = (swapped >> 8); /*Ignore leading zeros (they were in the end originally)*/
550         }
551         c = swapped;
552     }
553 #endif
554     return c;
555 }
556 
557 /**
558  * Decode an UTF-8 character from a string.
559  * @param txt pointer to '\0' terminated string
560  * @param i start byte index in 'txt' where to start.
561  *          After call it will point to the next UTF-8 char in 'txt'.
562  *          NULL to use txt[0] as index
563  * @return the decoded Unicode character or 0 on invalid UTF-8 code
564  */
lv_txt_utf8_next(const char * txt,uint32_t * i)565 static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i)
566 {
567     /* Unicode to UTF-8
568      * 00000000 00000000 00000000 0xxxxxxx -> 0xxxxxxx
569      * 00000000 00000000 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
570      * 00000000 00000000 zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
571      * 00000000 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
572      * */
573 
574     uint32_t result = 0;
575 
576     /*Dummy 'i' pointer is required*/
577     uint32_t i_tmp = 0;
578     if(i == NULL) i = &i_tmp;
579 
580     /*Normal ASCII*/
581     if((txt[*i] & 0x80) == 0) {
582         result = txt[*i];
583         (*i)++;
584     }
585     /*Real UTF-8 decode*/
586     else {
587         /*2 bytes UTF-8 code*/
588         if((txt[*i] & 0xE0) == 0xC0) {
589             result = (uint32_t)(txt[*i] & 0x1F) << 6;
590             (*i)++;
591             if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
592             result += (txt[*i] & 0x3F);
593             (*i)++;
594         }
595         /*3 bytes UTF-8 code*/
596         else if((txt[*i] & 0xF0) == 0xE0) {
597             result = (uint32_t)(txt[*i] & 0x0F) << 12;
598             (*i)++;
599 
600             if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
601             result += (uint32_t)(txt[*i] & 0x3F) << 6;
602             (*i)++;
603 
604             if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
605             result += (txt[*i] & 0x3F);
606             (*i)++;
607         }
608         /*4 bytes UTF-8 code*/
609         else if((txt[*i] & 0xF8) == 0xF0) {
610             result = (uint32_t)(txt[*i] & 0x07) << 18;
611             (*i)++;
612 
613             if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
614             result += (uint32_t)(txt[*i] & 0x3F) << 12;
615             (*i)++;
616 
617             if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
618             result += (uint32_t)(txt[*i] & 0x3F) << 6;
619             (*i)++;
620 
621             if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
622             result += txt[*i] & 0x3F;
623             (*i)++;
624         }
625         else {
626             (*i)++; /*Not UTF-8 char. Go the next.*/
627         }
628     }
629     return result;
630 }
631 
632 /**
633  * Get previous UTF-8 character form a string.
634  * @param txt pointer to '\0' terminated string
635  * @param i start byte index in 'txt' where to start. After the call it will point to the previous
636  * UTF-8 char in 'txt'.
637  * @return the decoded Unicode character or 0 on invalid UTF-8 code
638  */
lv_txt_utf8_prev(const char * txt,uint32_t * i)639 static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i)
640 {
641     uint8_t c_size;
642     uint8_t cnt = 0;
643 
644     /*Try to find a !0 long UTF-8 char by stepping one character back*/
645     (*i)--;
646     do {
647         if(cnt >= 4) return 0; /*No UTF-8 char found before the initial*/
648 
649         c_size = _lv_txt_encoded_size(&txt[*i]);
650         if(c_size == 0) {
651             if(*i != 0)
652                 (*i)--;
653             else
654                 return 0;
655         }
656         cnt++;
657     } while(c_size == 0);
658 
659     uint32_t i_tmp  = *i;
660     uint32_t letter = _lv_txt_encoded_next(txt, &i_tmp); /*Character found, get it*/
661 
662     return letter;
663 }
664 
665 /**
666  * Convert a character index (in an UTF-8 text) to byte index.
667  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
668  * @param txt a '\0' terminated UTF-8 string
669  * @param utf8_id character index
670  * @return byte index of the 'utf8_id'th letter
671  */
lv_txt_utf8_get_byte_id(const char * txt,uint32_t utf8_id)672 static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id)
673 {
674     uint32_t i;
675     uint32_t byte_cnt = 0;
676     for(i = 0; i < utf8_id; i++) {
677         uint8_t c_size = _lv_txt_encoded_size(&txt[byte_cnt]);
678         byte_cnt += c_size > 0 ? c_size : 1;
679     }
680 
681     return byte_cnt;
682 }
683 
684 /**
685  * Convert a byte index (in an UTF-8 text) to character index.
686  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
687  * @param txt a '\0' terminated UTF-8 string
688  * @param byte_id byte index
689  * @return character index of the letter at 'byte_id'th position
690  */
lv_txt_utf8_get_char_id(const char * txt,uint32_t byte_id)691 static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id)
692 {
693     uint32_t i        = 0;
694     uint32_t char_cnt = 0;
695 
696     while(i < byte_id) {
697         _lv_txt_encoded_next(txt, &i); /*'i' points to the next letter so use the prev. value*/
698         char_cnt++;
699     }
700 
701     return char_cnt;
702 }
703 
704 /**
705  * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
706  * E.g.: "ÁBC" is 3 characters (but 4 bytes)
707  * @param txt a '\0' terminated char string
708  * @return number of characters
709  */
lv_txt_utf8_get_length(const char * txt)710 static uint32_t lv_txt_utf8_get_length(const char * txt)
711 {
712     uint32_t len = 0;
713     uint32_t i   = 0;
714 
715     while(txt[i] != '\0') {
716         _lv_txt_encoded_next(txt, &i);
717         len++;
718     }
719 
720     return len;
721 }
722 
723 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
724 /*******************************
725  *  ASCII ENCODER/DECOER
726  ******************************/
727 
728 /**
729  * Give the size of an ISO8859-1 coded character
730  * @param str pointer to a character in a string
731  * @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
732  */
lv_txt_iso8859_1_size(const char * str)733 static uint8_t lv_txt_iso8859_1_size(const char * str)
734 {
735     (void)str; /*Unused*/
736     return 1;
737 }
738 
739 /**
740  * Convert an Unicode letter to ISO8859-1.
741  * @param letter_uni an Unicode letter
742  * @return ISO8859-1 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
743  */
lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)744 static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)
745 {
746     if(letter_uni < 128)
747         return letter_uni;
748     else
749         return ' ';
750 }
751 
752 /**
753  * Convert wide characters to ASCII, however wide characters in ASCII range (e.g. 'A') are ASCII compatible by default.
754  * So this function does nothing just returns with `c`.
755  * @param c a character, e.g. 'A'
756  * @return same as `c`
757  */
lv_txt_iso8859_1_conv_wc(uint32_t c)758 static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c)
759 {
760     return c;
761 }
762 
763 /**
764  * Decode an ISO8859-1 character from a string.
765  * @param txt pointer to '\0' terminated string
766  * @param i start byte index in 'txt' where to start.
767  *          After call it will point to the next UTF-8 char in 'txt'.
768  *          NULL to use txt[0] as index
769  * @return the decoded Unicode character or 0 on invalid UTF-8 code
770  */
lv_txt_iso8859_1_next(const char * txt,uint32_t * i)771 static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i)
772 {
773     if(i == NULL) return txt[1]; /*Get the next char */
774 
775     uint8_t letter = txt[*i];
776     (*i)++;
777     return letter;
778 }
779 
780 /**
781  * Get previous ISO8859-1 character form a string.
782  * @param txt pointer to '\0' terminated string
783  * @param i start byte index in 'txt' where to start. After the call it will point to the previous UTF-8 char in 'txt'.
784  * @return the decoded Unicode character or 0 on invalid UTF-8 code
785  */
lv_txt_iso8859_1_prev(const char * txt,uint32_t * i)786 static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i)
787 {
788     if(i == NULL) return *(txt - 1); /*Get the prev. char */
789 
790     (*i)--;
791     uint8_t letter = txt[*i];
792 
793     return letter;
794 }
795 
796 /**
797  * Convert a character index (in an ISO8859-1 text) to byte index.
798  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
799  * @param txt a '\0' terminated UTF-8 string
800  * @param utf8_id character index
801  * @return byte index of the 'utf8_id'th letter
802  */
lv_txt_iso8859_1_get_byte_id(const char * txt,uint32_t utf8_id)803 static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id)
804 {
805     (void)txt;      /*Unused*/
806     return utf8_id; /*In Non encoded no difference*/
807 }
808 
809 /**
810  * Convert a byte index (in an ISO8859-1 text) to character index.
811  * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
812  * @param txt a '\0' terminated UTF-8 string
813  * @param byte_id byte index
814  * @return character index of the letter at 'byte_id'th position
815  */
lv_txt_iso8859_1_get_char_id(const char * txt,uint32_t byte_id)816 static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id)
817 {
818     (void)txt;      /*Unused*/
819     return byte_id; /*In Non encoded no difference*/
820 }
821 
822 /**
823  * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
824  * E.g.: "ÁBC" is 3 characters (but 4 bytes)
825  * @param txt a '\0' terminated char string
826  * @return number of characters
827  */
lv_txt_iso8859_1_get_length(const char * txt)828 static uint32_t lv_txt_iso8859_1_get_length(const char * txt)
829 {
830     return strlen(txt);
831 }
832 #else
833 
834 #error "Invalid character encoding. See `LV_TXT_ENC` in `lv_conf.h`"
835 
836 #endif
837 
838 /**********************
839  *   STATIC FUNCTIONS
840  **********************/
841 
842 /**
843  * Test if char is break char or not (a text can broken here or not)
844  * @param letter a letter
845  * @return false: 'letter' is not break char
846  */
is_break_char(uint32_t letter)847 static inline bool is_break_char(uint32_t letter)
848 {
849     uint8_t i;
850     bool ret = false;
851 
852     /*Compare the letter to TXT_BREAK_CHARS*/
853     for(i = 0; LV_TXT_BREAK_CHARS[i] != '\0'; i++) {
854         if(letter == (uint32_t)LV_TXT_BREAK_CHARS[i]) {
855             ret = true; /*If match then it is break char*/
856             break;
857         }
858     }
859 
860     return ret;
861 }
862