1 /**
2 * @file lv_txt.c
3 *
4 */
5
6 /*********************
7 * INCLUDES
8 *********************/
9 #include <stdarg.h>
10 #include "lv_txt.h"
11 #include "lv_txt_ap.h"
12 #include "lv_math.h"
13 #include "lv_log.h"
14 #include "lv_mem.h"
15 #include "lv_assert.h"
16
17 /*********************
18 * DEFINES
19 *********************/
20 #define NO_BREAK_FOUND UINT32_MAX
21
22 /**********************
23 * TYPEDEFS
24 **********************/
25
26 /**********************
27 * STATIC PROTOTYPES
28 **********************/
29
30 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
31 static uint8_t lv_txt_utf8_size(const char * str);
32 static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni);
33 static uint32_t lv_txt_utf8_conv_wc(uint32_t c);
34 static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i);
35 static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i_start);
36 static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id);
37 static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id);
38 static uint32_t lv_txt_utf8_get_length(const char * txt);
39 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
40 static uint8_t lv_txt_iso8859_1_size(const char * str);
41 static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni);
42 static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c);
43 static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i);
44 static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i_start);
45 static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id);
46 static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id);
47 static uint32_t lv_txt_iso8859_1_get_length(const char * txt);
48 #endif
49 /**********************
50 * STATIC VARIABLES
51 **********************/
52
53 /**********************
54 * GLOBAL VARIABLES
55 **********************/
56 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
57 uint8_t (*_lv_txt_encoded_size)(const char *) = lv_txt_utf8_size;
58 uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t) = lv_txt_unicode_to_utf8;
59 uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t) = lv_txt_utf8_conv_wc;
60 uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *) = lv_txt_utf8_next;
61 uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *) = lv_txt_utf8_prev;
62 uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_utf8_get_byte_id;
63 uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_utf8_get_char_id;
64 uint32_t (*_lv_txt_get_encoded_length)(const char *) = lv_txt_utf8_get_length;
65 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
66 uint8_t (*_lv_txt_encoded_size)(const char *) = lv_txt_iso8859_1_size;
67 uint32_t (*_lv_txt_unicode_to_encoded)(uint32_t) = lv_txt_unicode_to_iso8859_1;
68 uint32_t (*_lv_txt_encoded_conv_wc)(uint32_t) = lv_txt_iso8859_1_conv_wc;
69 uint32_t (*_lv_txt_encoded_next)(const char *, uint32_t *) = lv_txt_iso8859_1_next;
70 uint32_t (*_lv_txt_encoded_prev)(const char *, uint32_t *) = lv_txt_iso8859_1_prev;
71 uint32_t (*_lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_byte_id;
72 uint32_t (*_lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_char_id;
73 uint32_t (*_lv_txt_get_encoded_length)(const char *) = lv_txt_iso8859_1_get_length;
74
75 #endif
76
77 /**********************
78 * MACROS
79 **********************/
80
81 #define LV_IS_ASCII(value) ((value & 0x80U) == 0x00U)
82 #define LV_IS_2BYTES_UTF8_CODE(value) ((value & 0xE0U) == 0xC0U)
83 #define LV_IS_3BYTES_UTF8_CODE(value) ((value & 0xF0U) == 0xE0U)
84 #define LV_IS_4BYTES_UTF8_CODE(value) ((value & 0xF8U) == 0xF0U)
85 #define LV_IS_INVALID_UTF8_CODE(value) ((value & 0xC0U) != 0x80U)
86
87 /**********************
88 * GLOBAL FUNCTIONS
89 **********************/
90
lv_txt_get_size(lv_point_t * size_res,const char * text,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t line_space,lv_coord_t max_width,lv_text_flag_t flag)91 void lv_txt_get_size(lv_point_t * size_res, const char * text, const lv_font_t * font, lv_coord_t letter_space,
92 lv_coord_t line_space, lv_coord_t max_width, lv_text_flag_t flag)
93 {
94 size_res->x = 0;
95 size_res->y = 0;
96
97 if(text == NULL) return;
98 if(font == NULL) return;
99
100 if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
101
102 uint32_t line_start = 0;
103 uint32_t new_line_start = 0;
104 uint16_t letter_height = lv_font_get_line_height(font);
105
106 /*Calc. the height and longest line*/
107 while(text[line_start] != '\0') {
108 new_line_start += _lv_txt_get_next_line(&text[line_start], font, letter_space, max_width, NULL, flag);
109
110 if((unsigned long)size_res->y + (unsigned long)letter_height + (unsigned long)line_space > LV_MAX_OF(lv_coord_t)) {
111 LV_LOG_WARN("lv_txt_get_size: integer overflow while calculating text height");
112 return;
113 }
114 else {
115 size_res->y += letter_height;
116 size_res->y += line_space;
117 }
118
119 /*Calculate the longest line*/
120 lv_coord_t act_line_length = lv_txt_get_width(&text[line_start], new_line_start - line_start, font, letter_space,
121 flag);
122
123 size_res->x = LV_MAX(act_line_length, size_res->x);
124 line_start = new_line_start;
125 }
126
127 /*Make the text one line taller if the last character is '\n' or '\r'*/
128 if((line_start != 0) && (text[line_start - 1] == '\n' || text[line_start - 1] == '\r')) {
129 size_res->y += letter_height + line_space;
130 }
131
132 /*Correction with the last line space or set the height manually if the text is empty*/
133 if(size_res->y == 0)
134 size_res->y = letter_height;
135 else
136 size_res->y -= line_space;
137 }
138
139 /**
140 * Get the next word of text. A word is delimited by break characters.
141 *
142 * If the word cannot fit in the max_width space, obey LV_TXT_LINE_BREAK_LONG_* rules.
143 *
144 * If the next word cannot fit anything, return 0.
145 *
146 * If the first character is a break character, returns the next index.
147 *
148 * Example calls from lv_txt_get_next_line() assuming sufficient max_width and
149 * txt = "Test text\n"
150 * 0123456789
151 *
152 * Calls would be as follows:
153 * 1. Return i=4, pointing at breakchar ' ', for the string "Test"
154 * 2. Return i=5, since i=4 was a breakchar.
155 * 3. Return i=9, pointing at breakchar '\n'
156 * 4. Parenting lv_txt_get_next_line() would detect subsequent '\0'
157 *
158 * TODO: Returned word_w_ptr may overestimate the returned word's width when
159 * max_width is reached. In current usage, this has no impact.
160 *
161 * @param txt a '\0' terminated string
162 * @param font pointer to a font
163 * @param letter_space letter space
164 * @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
165 * @param flags settings for the text from 'txt_flag_type' enum
166 * @param[out] word_w_ptr width (in pixels) of the parsed word. May be NULL.
167 * @param force Force return the fraction of the word that can fit in the provided space.
168 * @return the index of the first char of the next word (in byte index not letter index. With UTF-8 they are different)
169 */
lv_txt_get_next_word(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_text_flag_t flag,uint32_t * word_w_ptr,lv_text_cmd_state_t * cmd_state,bool force)170 static uint32_t lv_txt_get_next_word(const char * txt, const lv_font_t * font,
171 lv_coord_t letter_space, lv_coord_t max_width,
172 lv_text_flag_t flag, uint32_t * word_w_ptr, lv_text_cmd_state_t * cmd_state, bool force)
173 {
174 if(txt == NULL || txt[0] == '\0') return 0;
175 if(font == NULL) return 0;
176
177 if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
178
179 uint32_t i = 0, i_next = 0, i_next_next = 0; /*Iterating index into txt*/
180 uint32_t letter = 0; /*Letter at i*/
181 uint32_t letter_next = 0; /*Letter at i_next*/
182 lv_coord_t letter_w;
183 lv_coord_t cur_w = 0; /*Pixel Width of transversed string*/
184 uint32_t word_len = 0; /*Number of characters in the transversed word*/
185 uint32_t break_index = NO_BREAK_FOUND; /*only used for "long" words*/
186 uint32_t break_letter_count = 0; /*Number of characters up to the long word break point*/
187
188 letter = _lv_txt_encoded_next(txt, &i_next);
189 i_next_next = i_next;
190
191 /*Obtain the full word, regardless if it fits or not in max_width*/
192 while(txt[i] != '\0') {
193 letter_next = _lv_txt_encoded_next(txt, &i_next_next);
194 word_len++;
195
196 /*Handle the recolor command*/
197 if((flag & LV_TEXT_FLAG_RECOLOR) != 0) {
198 if(_lv_txt_is_cmd(cmd_state, letter) != false) {
199 i = i_next;
200 i_next = i_next_next;
201 letter = letter_next;
202 continue; /*Skip the letter if it is part of a command*/
203 }
204 }
205
206 letter_w = lv_font_get_glyph_width(font, letter, letter_next);
207 cur_w += letter_w;
208
209 if(letter_w > 0) {
210 cur_w += letter_space;
211 }
212
213 /*Test if this character fits within max_width*/
214 if(break_index == NO_BREAK_FOUND && (cur_w - letter_space) > max_width) {
215 break_index = i;
216 break_letter_count = word_len - 1;
217 /*break_index is now pointing at the character that doesn't fit*/
218 }
219
220 /*Check for new line chars and breakchars*/
221 if(letter == '\n' || letter == '\r' || _lv_txt_is_break_char(letter)) {
222 /*Update the output width on the first character if it fits.
223 *Must do this here in case first letter is a break character.*/
224 if(i == 0 && break_index == NO_BREAK_FOUND && word_w_ptr != NULL) *word_w_ptr = cur_w;
225 word_len--;
226 break;
227 }
228
229 /*Update the output width*/
230 if(word_w_ptr != NULL && break_index == NO_BREAK_FOUND) *word_w_ptr = cur_w;
231
232 i = i_next;
233 i_next = i_next_next;
234 letter = letter_next;
235 }
236
237 /*Entire Word fits in the provided space*/
238 if(break_index == NO_BREAK_FOUND) {
239 if(word_len == 0 || (letter == '\r' && letter_next == '\n')) i = i_next;
240 return i;
241 }
242
243 #if LV_TXT_LINE_BREAK_LONG_LEN > 0
244 /*Word doesn't fit in provided space, but isn't "long"*/
245 if(word_len < LV_TXT_LINE_BREAK_LONG_LEN) {
246 if(force) return break_index;
247 if(word_w_ptr != NULL) *word_w_ptr = 0; /*Return no word*/
248 return 0;
249 }
250
251 /*Word is "long," but insufficient amounts can fit in provided space*/
252 if(break_letter_count < LV_TXT_LINE_BREAK_LONG_PRE_MIN_LEN) {
253 if(force) return break_index;
254 if(word_w_ptr != NULL) *word_w_ptr = 0;
255 return 0;
256 }
257
258 /*Word is a "long", but letters may need to be better distributed*/
259 {
260 i = break_index;
261 int32_t n_move = LV_TXT_LINE_BREAK_LONG_POST_MIN_LEN - (word_len - break_letter_count);
262 /*Move pointer "i" backwards*/
263 for(; n_move > 0; n_move--) {
264 _lv_txt_encoded_prev(txt, &i);
265 // TODO: it would be appropriate to update the returned word width here
266 // However, in current usage, this doesn't impact anything.
267 }
268 }
269 return i;
270 #else
271 if(force) return break_index;
272 if(word_w_ptr != NULL) *word_w_ptr = 0; /*Return no word*/
273 (void) break_letter_count;
274 return 0;
275 #endif
276 }
277
_lv_txt_get_next_line(const char * txt,const lv_font_t * font,lv_coord_t letter_space,lv_coord_t max_width,lv_coord_t * used_width,lv_text_flag_t flag)278 uint32_t _lv_txt_get_next_line(const char * txt, const lv_font_t * font,
279 lv_coord_t letter_space, lv_coord_t max_width,
280 lv_coord_t * used_width, lv_text_flag_t flag)
281 {
282 if(used_width) *used_width = 0;
283
284 if(txt == NULL) return 0;
285 if(txt[0] == '\0') return 0;
286 if(font == NULL) return 0;
287
288 lv_coord_t line_w = 0;
289
290 /*If max_width doesn't mater simply find the new line character
291 *without thinking about word wrapping*/
292 if((flag & LV_TEXT_FLAG_EXPAND) || (flag & LV_TEXT_FLAG_FIT)) {
293 uint32_t i;
294 for(i = 0; txt[i] != '\n' && txt[i] != '\r' && txt[i] != '\0'; i++) {
295 /*Just find the new line chars or string ends by incrementing `i`*/
296 }
297 if(txt[i] != '\0') i++; /*To go beyond `\n`*/
298 if(used_width) *used_width = -1;
299 return i;
300 }
301
302 if(flag & LV_TEXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
303 lv_text_cmd_state_t cmd_state = LV_TEXT_CMD_STATE_WAIT;
304 uint32_t i = 0; /*Iterating index into txt*/
305
306 while(txt[i] != '\0' && max_width > 0) {
307 uint32_t word_w = 0;
308 uint32_t advance = lv_txt_get_next_word(&txt[i], font, letter_space, max_width, flag, &word_w, &cmd_state, i == 0);
309 max_width -= word_w;
310 line_w += word_w;
311
312 if(advance == 0) {
313 break;
314 }
315
316 i += advance;
317
318 if(txt[0] == '\n' || txt[0] == '\r') break;
319
320 if(txt[i] == '\n' || txt[i] == '\r') {
321 i++; /*Include the following newline in the current line*/
322 break;
323 }
324
325 }
326
327 /*Always step at least one to avoid infinite loops*/
328 if(i == 0) {
329 uint32_t letter = _lv_txt_encoded_next(txt, &i);
330 if(used_width != NULL) {
331 line_w = lv_font_get_glyph_width(font, letter, '\0');
332 }
333 }
334
335 if(used_width != NULL) {
336 *used_width = line_w;
337 }
338
339 return i;
340 }
341
lv_txt_get_width(const char * txt,uint32_t length,const lv_font_t * font,lv_coord_t letter_space,lv_text_flag_t flag)342 lv_coord_t lv_txt_get_width(const char * txt, uint32_t length, const lv_font_t * font, lv_coord_t letter_space,
343 lv_text_flag_t flag)
344 {
345 if(txt == NULL) return 0;
346 if(font == NULL) return 0;
347 if(txt[0] == '\0') return 0;
348
349 uint32_t i = 0;
350 lv_coord_t width = 0;
351 lv_text_cmd_state_t cmd_state = LV_TEXT_CMD_STATE_WAIT;
352
353 if(length != 0) {
354 while(i < length) {
355 uint32_t letter;
356 uint32_t letter_next;
357 _lv_txt_encoded_letter_next_2(txt, &letter, &letter_next, &i);
358
359 if((flag & LV_TEXT_FLAG_RECOLOR) != 0) {
360 if(_lv_txt_is_cmd(&cmd_state, letter) != false) {
361 continue;
362 }
363 }
364
365 lv_coord_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
366 if(char_width > 0) {
367 width += char_width;
368 width += letter_space;
369 }
370 }
371
372 if(width > 0) {
373 width -= letter_space; /*Trim the last letter space. Important if the text is center
374 aligned*/
375 }
376 }
377
378 return width;
379 }
380
_lv_txt_is_cmd(lv_text_cmd_state_t * state,uint32_t c)381 bool _lv_txt_is_cmd(lv_text_cmd_state_t * state, uint32_t c)
382 {
383 bool ret = false;
384
385 if(c == (uint32_t)LV_TXT_COLOR_CMD[0]) {
386 if(*state == LV_TEXT_CMD_STATE_WAIT) { /*Start char*/
387 *state = LV_TEXT_CMD_STATE_PAR;
388 ret = true;
389 }
390 /*Other start char in parameter is escaped cmd. char*/
391 else if(*state == LV_TEXT_CMD_STATE_PAR) {
392 *state = LV_TEXT_CMD_STATE_WAIT;
393 }
394 /*Command end*/
395 else if(*state == LV_TEXT_CMD_STATE_IN) {
396 *state = LV_TEXT_CMD_STATE_WAIT;
397 ret = true;
398 }
399 }
400
401 /*Skip the color parameter and wait the space after it*/
402 if(*state == LV_TEXT_CMD_STATE_PAR) {
403 if(c == ' ') {
404 *state = LV_TEXT_CMD_STATE_IN; /*After the parameter the text is in the command*/
405 }
406 ret = true;
407 }
408
409 return ret;
410 }
411
_lv_txt_ins(char * txt_buf,uint32_t pos,const char * ins_txt)412 void _lv_txt_ins(char * txt_buf, uint32_t pos, const char * ins_txt)
413 {
414 if(txt_buf == NULL || ins_txt == NULL) return;
415
416 size_t old_len = strlen(txt_buf);
417 size_t ins_len = strlen(ins_txt);
418 if(ins_len == 0) return;
419
420 size_t new_len = ins_len + old_len;
421 pos = _lv_txt_encoded_get_byte_id(txt_buf, pos); /*Convert to byte index instead of letter index*/
422
423 /*Copy the second part into the end to make place to text to insert*/
424 size_t i;
425 for(i = new_len; i >= pos + ins_len; i--) {
426 txt_buf[i] = txt_buf[i - ins_len];
427 }
428
429 /*Copy the text into the new space*/
430 lv_memcpy_small(txt_buf + pos, ins_txt, ins_len);
431 }
432
_lv_txt_cut(char * txt,uint32_t pos,uint32_t len)433 void _lv_txt_cut(char * txt, uint32_t pos, uint32_t len)
434 {
435 if(txt == NULL) return;
436
437 size_t old_len = strlen(txt);
438
439 pos = _lv_txt_encoded_get_byte_id(txt, pos); /*Convert to byte index instead of letter index*/
440 len = _lv_txt_encoded_get_byte_id(&txt[pos], len);
441
442 /*Copy the second part into the end to make place to text to insert*/
443 uint32_t i;
444 for(i = pos; i <= old_len - len; i++) {
445 txt[i] = txt[i + len];
446 }
447 }
448
_lv_txt_set_text_vfmt(const char * fmt,va_list ap)449 char * _lv_txt_set_text_vfmt(const char * fmt, va_list ap)
450 {
451 /*Allocate space for the new text by using trick from C99 standard section 7.19.6.12*/
452 va_list ap_copy;
453 va_copy(ap_copy, ap);
454 uint32_t len = lv_vsnprintf(NULL, 0, fmt, ap_copy);
455 va_end(ap_copy);
456
457 char * text = 0;
458 #if LV_USE_ARABIC_PERSIAN_CHARS
459 /*Put together the text according to the format string*/
460 char * raw_txt = lv_mem_buf_get(len + 1);
461 LV_ASSERT_MALLOC(raw_txt);
462 if(raw_txt == NULL) {
463 return NULL;
464 }
465
466 lv_vsnprintf(raw_txt, len + 1, fmt, ap);
467
468 /*Get the size of the Arabic text and process it*/
469 size_t len_ap = _lv_txt_ap_calc_bytes_cnt(raw_txt);
470 text = lv_mem_alloc(len_ap + 1);
471 LV_ASSERT_MALLOC(text);
472 if(text == NULL) {
473 return NULL;
474 }
475 _lv_txt_ap_proc(raw_txt, text);
476
477 lv_mem_buf_release(raw_txt);
478 #else
479 text = lv_mem_alloc(len + 1);
480 LV_ASSERT_MALLOC(text);
481 if(text == NULL) {
482 return NULL;
483 }
484 text[len] = 0; /*Ensure NULL termination*/
485
486 lv_vsnprintf(text, len + 1, fmt, ap);
487 #endif
488
489 return text;
490 }
491
_lv_txt_encoded_letter_next_2(const char * txt,uint32_t * letter,uint32_t * letter_next,uint32_t * ofs)492 void _lv_txt_encoded_letter_next_2(const char * txt, uint32_t * letter, uint32_t * letter_next, uint32_t * ofs)
493 {
494 *letter = _lv_txt_encoded_next(txt, ofs);
495 *letter_next = *letter != '\0' ? _lv_txt_encoded_next(&txt[*ofs], NULL) : 0;
496 }
497
498 #if LV_TXT_ENC == LV_TXT_ENC_UTF8
499 /*******************************
500 * UTF-8 ENCODER/DECODER
501 ******************************/
502
503 /**
504 * Give the size of an UTF-8 coded character
505 * @param str pointer to a character in a string
506 * @return length of the UTF-8 character (1,2,3 or 4), 0 on invalid code.
507 */
lv_txt_utf8_size(const char * str)508 static uint8_t lv_txt_utf8_size(const char * str)
509 {
510 if(LV_IS_ASCII(str[0]))
511 return 1;
512 else if(LV_IS_2BYTES_UTF8_CODE(str[0]))
513 return 2;
514 else if(LV_IS_3BYTES_UTF8_CODE(str[0]))
515 return 3;
516 else if(LV_IS_4BYTES_UTF8_CODE(str[0]))
517 return 4;
518 return 0;
519 }
520
521 /**
522 * Convert an Unicode letter to UTF-8.
523 * @param letter_uni an Unicode letter
524 * @return UTF-8 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
525 */
lv_txt_unicode_to_utf8(uint32_t letter_uni)526 static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni)
527 {
528 if(letter_uni < 128) return letter_uni;
529 uint8_t bytes[4];
530
531 if(letter_uni < 0x0800) {
532 bytes[0] = ((letter_uni >> 6) & 0x1F) | 0xC0;
533 bytes[1] = ((letter_uni >> 0) & 0x3F) | 0x80;
534 bytes[2] = 0;
535 bytes[3] = 0;
536 }
537 else if(letter_uni < 0x010000) {
538 bytes[0] = ((letter_uni >> 12) & 0x0F) | 0xE0;
539 bytes[1] = ((letter_uni >> 6) & 0x3F) | 0x80;
540 bytes[2] = ((letter_uni >> 0) & 0x3F) | 0x80;
541 bytes[3] = 0;
542 }
543 else if(letter_uni < 0x110000) {
544 bytes[0] = ((letter_uni >> 18) & 0x07) | 0xF0;
545 bytes[1] = ((letter_uni >> 12) & 0x3F) | 0x80;
546 bytes[2] = ((letter_uni >> 6) & 0x3F) | 0x80;
547 bytes[3] = ((letter_uni >> 0) & 0x3F) | 0x80;
548 }
549
550 uint32_t * res_p = (uint32_t *)bytes;
551 return *res_p;
552 }
553
554 /**
555 * Convert a wide character, e.g. 'Á' little endian to be UTF-8 compatible
556 * @param c a wide character or a Little endian number
557 * @return `c` in big endian
558 */
lv_txt_utf8_conv_wc(uint32_t c)559 static uint32_t lv_txt_utf8_conv_wc(uint32_t c)
560 {
561 #if LV_BIG_ENDIAN_SYSTEM == 0
562 /*Swap the bytes (UTF-8 is big endian, but the MCUs are little endian)*/
563 if((c & 0x80) != 0) {
564 uint32_t swapped;
565 uint8_t c8[4];
566 lv_memcpy_small(c8, &c, 4);
567 swapped = (c8[0] << 24) + (c8[1] << 16) + (c8[2] << 8) + (c8[3]);
568 uint8_t i;
569 for(i = 0; i < 4; i++) {
570 if((swapped & 0xFF) == 0)
571 swapped = (swapped >> 8); /*Ignore leading zeros (they were in the end originally)*/
572 }
573 c = swapped;
574 }
575 #endif
576 return c;
577 }
578
579 /**
580 * Decode an UTF-8 character from a string.
581 * @param txt pointer to '\0' terminated string
582 * @param i start byte index in 'txt' where to start.
583 * After call it will point to the next UTF-8 char in 'txt'.
584 * NULL to use txt[0] as index
585 * @return the decoded Unicode character or 0 on invalid UTF-8 code
586 */
lv_txt_utf8_next(const char * txt,uint32_t * i)587 static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i)
588 {
589 /**
590 * Unicode to UTF-8
591 * 00000000 00000000 00000000 0xxxxxxx -> 0xxxxxxx
592 * 00000000 00000000 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
593 * 00000000 00000000 zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
594 * 00000000 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
595 */
596
597 uint32_t result = 0;
598
599 /*Dummy 'i' pointer is required*/
600 uint32_t i_tmp = 0;
601 if(i == NULL) i = &i_tmp;
602
603 /*Normal ASCII*/
604 if(LV_IS_ASCII(txt[*i])) {
605 result = txt[*i];
606 (*i)++;
607 }
608 /*Real UTF-8 decode*/
609 else {
610 /*2 bytes UTF-8 code*/
611 if(LV_IS_2BYTES_UTF8_CODE(txt[*i])) {
612 result = (uint32_t)(txt[*i] & 0x1F) << 6;
613 (*i)++;
614 if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
615 result += (txt[*i] & 0x3F);
616 (*i)++;
617 }
618 /*3 bytes UTF-8 code*/
619 else if(LV_IS_3BYTES_UTF8_CODE(txt[*i])) {
620 result = (uint32_t)(txt[*i] & 0x0F) << 12;
621 (*i)++;
622
623 if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
624 result += (uint32_t)(txt[*i] & 0x3F) << 6;
625 (*i)++;
626
627 if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
628 result += (txt[*i] & 0x3F);
629 (*i)++;
630 }
631 /*4 bytes UTF-8 code*/
632 else if(LV_IS_4BYTES_UTF8_CODE(txt[*i])) {
633 result = (uint32_t)(txt[*i] & 0x07) << 18;
634 (*i)++;
635
636 if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
637 result += (uint32_t)(txt[*i] & 0x3F) << 12;
638 (*i)++;
639
640 if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
641 result += (uint32_t)(txt[*i] & 0x3F) << 6;
642 (*i)++;
643
644 if(LV_IS_INVALID_UTF8_CODE(txt[*i])) return 0;
645 result += txt[*i] & 0x3F;
646 (*i)++;
647 }
648 else {
649 (*i)++; /*Not UTF-8 char. Go the next.*/
650 }
651 }
652 return result;
653 }
654
655 /**
656 * Get previous UTF-8 character form a string.
657 * @param txt pointer to '\0' terminated string
658 * @param i start byte index in 'txt' where to start. After the call it will point to the previous
659 * UTF-8 char in 'txt'.
660 * @return the decoded Unicode character or 0 on invalid UTF-8 code
661 */
lv_txt_utf8_prev(const char * txt,uint32_t * i)662 static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i)
663 {
664 uint8_t c_size;
665 uint8_t cnt = 0;
666
667 /*Try to find a !0 long UTF-8 char by stepping one character back*/
668 (*i)--;
669 do {
670 if(cnt >= 4) return 0; /*No UTF-8 char found before the initial*/
671
672 c_size = _lv_txt_encoded_size(&txt[*i]);
673 if(c_size == 0) {
674 if(*i != 0)
675 (*i)--;
676 else
677 return 0;
678 }
679 cnt++;
680 } while(c_size == 0);
681
682 uint32_t i_tmp = *i;
683 uint32_t letter = _lv_txt_encoded_next(txt, &i_tmp); /*Character found, get it*/
684
685 return letter;
686 }
687
688 /**
689 * Convert a character index (in an UTF-8 text) to byte index.
690 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
691 * @param txt a '\0' terminated UTF-8 string
692 * @param utf8_id character index
693 * @return byte index of the 'utf8_id'th letter
694 */
lv_txt_utf8_get_byte_id(const char * txt,uint32_t utf8_id)695 static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id)
696 {
697 uint32_t i;
698 uint32_t byte_cnt = 0;
699 for(i = 0; i < utf8_id && txt[byte_cnt] != '\0'; i++) {
700 uint8_t c_size = _lv_txt_encoded_size(&txt[byte_cnt]);
701 /* If the char was invalid tell it's 1 byte long*/
702 byte_cnt += c_size ? c_size : 1;
703 }
704
705 return byte_cnt;
706 }
707
708 /**
709 * Convert a byte index (in an UTF-8 text) to character index.
710 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
711 * @param txt a '\0' terminated UTF-8 string
712 * @param byte_id byte index
713 * @return character index of the letter at 'byte_id'th position
714 */
lv_txt_utf8_get_char_id(const char * txt,uint32_t byte_id)715 static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id)
716 {
717 uint32_t i = 0;
718 uint32_t char_cnt = 0;
719
720 while(i < byte_id) {
721 _lv_txt_encoded_next(txt, &i); /*'i' points to the next letter so use the prev. value*/
722 char_cnt++;
723 }
724
725 return char_cnt;
726 }
727
728 /**
729 * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
730 * E.g.: "ÁBC" is 3 characters (but 4 bytes)
731 * @param txt a '\0' terminated char string
732 * @return number of characters
733 */
lv_txt_utf8_get_length(const char * txt)734 static uint32_t lv_txt_utf8_get_length(const char * txt)
735 {
736 uint32_t len = 0;
737 uint32_t i = 0;
738
739 while(txt[i] != '\0') {
740 _lv_txt_encoded_next(txt, &i);
741 len++;
742 }
743
744 return len;
745 }
746
747 #elif LV_TXT_ENC == LV_TXT_ENC_ASCII
748 /*******************************
749 * ASCII ENCODER/DECODER
750 ******************************/
751
752 /**
753 * Give the size of an ISO8859-1 coded character
754 * @param str pointer to a character in a string
755 * @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
756 */
lv_txt_iso8859_1_size(const char * str)757 static uint8_t lv_txt_iso8859_1_size(const char * str)
758 {
759 LV_UNUSED(str); /*Unused*/
760 return 1;
761 }
762
763 /**
764 * Convert an Unicode letter to ISO8859-1.
765 * @param letter_uni an Unicode letter
766 * @return ISO8859-1 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
767 */
lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)768 static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)
769 {
770 if(letter_uni < 256)
771 return letter_uni;
772 else
773 return ' ';
774 }
775
776 /**
777 * Convert wide characters to ASCII, however wide characters in ASCII range (e.g. 'A') are ASCII compatible by default.
778 * So this function does nothing just returns with `c`.
779 * @param c a character, e.g. 'A'
780 * @return same as `c`
781 */
lv_txt_iso8859_1_conv_wc(uint32_t c)782 static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c)
783 {
784 return c;
785 }
786
787 /**
788 * Decode an ISO8859-1 character from a string.
789 * @param txt pointer to '\0' terminated string
790 * @param i start byte index in 'txt' where to start.
791 * After call it will point to the next UTF-8 char in 'txt'.
792 * NULL to use txt[0] as index
793 * @return the decoded Unicode character or 0 on invalid UTF-8 code
794 */
lv_txt_iso8859_1_next(const char * txt,uint32_t * i)795 static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i)
796 {
797 if(i == NULL) return txt[1]; /*Get the next char*/
798
799 uint8_t letter = txt[*i];
800 (*i)++;
801 return letter;
802 }
803
804 /**
805 * Get previous ISO8859-1 character form a string.
806 * @param txt pointer to '\0' terminated string
807 * @param i start byte index in 'txt' where to start. After the call it will point to the previous UTF-8 char in 'txt'.
808 * @return the decoded Unicode character or 0 on invalid UTF-8 code
809 */
lv_txt_iso8859_1_prev(const char * txt,uint32_t * i)810 static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i)
811 {
812 if(i == NULL) return *(txt - 1); /*Get the prev. char*/
813
814 (*i)--;
815 uint8_t letter = txt[*i];
816
817 return letter;
818 }
819
820 /**
821 * Convert a character index (in an ISO8859-1 text) to byte index.
822 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
823 * @param txt a '\0' terminated UTF-8 string
824 * @param utf8_id character index
825 * @return byte index of the 'utf8_id'th letter
826 */
lv_txt_iso8859_1_get_byte_id(const char * txt,uint32_t utf8_id)827 static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id)
828 {
829 LV_UNUSED(txt); /*Unused*/
830 return utf8_id; /*In Non encoded no difference*/
831 }
832
833 /**
834 * Convert a byte index (in an ISO8859-1 text) to character index.
835 * E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
836 * @param txt a '\0' terminated UTF-8 string
837 * @param byte_id byte index
838 * @return character index of the letter at 'byte_id'th position
839 */
lv_txt_iso8859_1_get_char_id(const char * txt,uint32_t byte_id)840 static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id)
841 {
842 LV_UNUSED(txt); /*Unused*/
843 return byte_id; /*In Non encoded no difference*/
844 }
845
846 /**
847 * Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
848 * E.g.: "ÁBC" is 3 characters (but 4 bytes)
849 * @param txt a '\0' terminated char string
850 * @return number of characters
851 */
lv_txt_iso8859_1_get_length(const char * txt)852 static uint32_t lv_txt_iso8859_1_get_length(const char * txt)
853 {
854 return strlen(txt);
855 }
856 #else
857
858 #error "Invalid character encoding. See `LV_TXT_ENC` in `lv_conf.h`"
859
860 #endif
861