1 /**
2 * @file lv_text_ap.c
3 *
4 */
5
6 /*********************
7 * INCLUDES
8 *********************/
9 #include "lv_bidi.h"
10 #include "lv_text_private.h"
11 #include "lv_text_ap.h"
12 #include "lv_types.h"
13 #include "../stdlib/lv_mem.h"
14 #include "../draw/lv_draw.h"
15
16 /*********************
17 * DEFINES
18 *********************/
19
20 /**********************
21 * TYPEDEFS
22 **********************/
23 typedef struct {
24 uint8_t char_offset;
25 uint16_t char_end_form;
26 int8_t char_beginning_form_offset;
27 int8_t char_middle_form_offset;
28 int8_t char_isolated_form_offset;
29 struct {
30 uint8_t conj_to_previous;
31 uint8_t conj_to_next;
32 } ap_chars_conjunction;
33 } ap_chars_map_t;
34
35 /**********************
36 * STATIC PROTOTYPES
37 **********************/
38 #if LV_USE_ARABIC_PERSIAN_CHARS == 1
39 static uint32_t lv_ap_get_char_index(uint16_t c);
40 static uint32_t lv_text_lam_alef(uint32_t ch_curr, uint32_t ch_next);
41 static bool lv_text_is_arabic_vowel(uint16_t c);
42
43 /**********************
44 * STATIC VARIABLES
45 **********************/
46
47 const ap_chars_map_t ap_chars_map[] = {
48 /*{Key Offset, End, Beginning, Middle, Isolated, {conjunction}}*/
49 {0, 0xFE81, 0, 0, 0, {0, 0}}, // أ
50 {1, 0xFE84, -1, 0, -1, {1, 0}}, // أ
51 {2, 0xFE86, -1, 0, -1, {1, 0}}, // ؤ
52 {3, 0xFE88, -1, 0, -1, {1, 0}}, // ﺇ
53 {4, 0xFE8A, 1, 2, -1, {1, 1}}, // ئ
54 {5, 0xFE8E, -1, 0, -1, {1, 0}}, // آ
55 {6, 0xFE90, 1, 2, -1, {1, 1}}, // ب
56 {92, 0xFB57, 1, 2, -1, {1, 1}}, // پ
57 {8, 0xFE96, 1, 2, -1, {1, 1}}, // ت
58 {9, 0xFE9A, 1, 2, -1, {1, 1}}, // ث
59 {10, 0xFE9E, 1, 2, -1, {1, 1}}, // ج
60 {100, 0xFB7B, 1, 2, -1, {1, 1}}, // چ
61 {11, 0xFEA2, 1, 2, -1, {1, 1}}, // ح
62 {12, 0xFEA6, 1, 2, -1, {1, 1}}, // خ
63 {13, 0xFEAA, -1, 0, -1, {1, 0}}, // د
64 {14, 0xFEAC, -1, 0, -1, {1, 0}}, // ذ
65 {15, 0xFEAE, -1, 0, -1, {1, 0}}, // ر
66 {16, 0xFEB0, -1, 0, -1, {1, 0}}, // ز
67 {118, 0xFB8B, -1, 0, -1, {1, 0}}, // ژ
68 {17, 0xFEB2, 1, 2, -1, {1, 1}}, // س
69 {18, 0xFEB6, 1, 2, -1, {1, 1}}, // ش
70 {19, 0xFEBA, 1, 2, -1, {1, 1}}, // ص
71 {20, 0xFEBE, 1, 2, -1, {1, 1}}, // ض
72 {21, 0xFEC2, 1, 2, -1, {1, 1}}, // ط
73 {22, 0xFEC6, 1, 2, -1, {1, 1}}, // ظ
74 {23, 0xFECA, 1, 2, -1, {1, 1}}, // ع
75 {24, 0xFECE, 1, 2, -1, {1, 1}}, // غ
76 {30, 0x0640, 0, 0, 0, {1, 1}}, // - (mad, hyphen)
77 {31, 0xFED2, 1, 2, -1, {1, 1}}, // ف
78 {32, 0xFED6, 1, 2, -1, {1, 1}}, // ق
79 {135, 0xFB8F, 1, 2, -1, {1, 1}}, // ک
80 {33, 0xFEDA, 1, 2, -1, {1, 1}}, // ﻙ
81 {141, 0xFB93, 1, 2, -1, {1, 1}}, // گ
82 {34, 0xFEDE, 1, 2, -1, {1, 1}}, // ل
83 {35, 0xFEE2, 1, 2, -1, {1, 1}}, // م
84 {36, 0xFEE6, 1, 2, -1, {1, 1}}, // ن
85 {38, 0xFEEE, -1, 0, -1, {1, 0}}, // و
86 {37, 0xFEEA, 1, 2, -1, {1, 1}}, // ه
87 {39, 0xFEF0, 0, 0, -1, {1, 0}}, // ى
88 {40, 0xFEF2, 1, 2, -1, {1, 1}}, // ي
89 {170, 0xFBFD, 1, 2, -1, {1, 1}}, // ی
90 {7, 0xFE94, -1, 2, -1, {1, 0}}, // ة
91 {206, 0x06F0, -1, 2, 0, {0, 0}}, // ۰
92 {207, 0x06F1, 0, 0, 0, {0, 0}}, // ۱
93 {208, 0x06F2, 0, 0, 0, {0, 0}}, // ۲
94 {209, 0x06F3, 0, 0, 0, {0, 0}}, // ۳
95 {210, 0x06F4, 0, 0, 0, {0, 0}}, // ۴
96 {211, 0x06F5, 0, 0, 0, {0, 0}}, // ۵
97 {212, 0x06F6, 0, 0, 0, {0, 0}}, // ۶
98 {213, 0x06F7, 0, 0, 0, {0, 0}}, // ۷
99 {214, 0x06F8, 0, 0, 0, {0, 0}}, // ۸
100 {215, 0x06F9, 0, 0, 0, {0, 0}}, // ۹
101 LV_AP_END_CHARS_LIST
102 };
103 /**********************
104 * MACROS
105 **********************/
106
107 /**********************
108 * GLOBAL FUNCTIONS
109 **********************/
lv_text_ap_calc_bytes_count(const char * txt)110 uint32_t lv_text_ap_calc_bytes_count(const char * txt)
111 {
112 uint32_t txt_length = 0;
113 uint32_t chars_cnt = 0;
114 uint32_t current_ap_idx = 0;
115 uint32_t i, j;
116 uint32_t ch_enc;
117
118 txt_length = lv_text_get_encoded_length(txt);
119
120 i = 0;
121 j = 0;
122 while(i < txt_length) {
123 ch_enc = lv_text_encoded_next(txt, &j);
124 current_ap_idx = lv_ap_get_char_index(ch_enc);
125
126 if(current_ap_idx != LV_UNDEF_ARABIC_PERSIAN_CHARS)
127 ch_enc = ap_chars_map[current_ap_idx].char_end_form;
128
129 if(ch_enc < 0x80)
130 chars_cnt++;
131 else if(ch_enc < 0x0800)
132 chars_cnt += 2;
133 else if(ch_enc < 0x010000)
134 chars_cnt += 3;
135 else
136 chars_cnt += 4;
137
138 i++;
139 }
140
141 return chars_cnt + 1;
142 }
143
lv_text_ap_proc(const char * txt,char * txt_out)144 void lv_text_ap_proc(const char * txt, char * txt_out)
145 {
146 uint32_t txt_length = 0;
147 uint32_t index_current, idx_next, idx_previous, i, j;
148 uint32_t * ch_enc;
149 uint32_t * ch_fin;
150 char * txt_out_temp;
151
152 txt_length = lv_text_get_encoded_length(txt);
153
154 ch_enc = (uint32_t *)lv_malloc(sizeof(uint32_t) * (txt_length + 1));
155 ch_fin = (uint32_t *)lv_malloc(sizeof(uint32_t) * (txt_length + 1));
156
157 i = 0;
158 j = 0;
159 while(j < txt_length)
160 ch_enc[j++] = lv_text_encoded_next(txt, &i);
161
162 ch_enc[j] = 0;
163
164 i = 0;
165 j = 0;
166 idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
167 while(i < txt_length) {
168 index_current = lv_ap_get_char_index(ch_enc[i]);
169 idx_next = lv_ap_get_char_index(ch_enc[i + 1]);
170
171 if(lv_text_is_arabic_vowel(ch_enc[i])) { // Current character is a vowel
172 ch_fin[j] = ch_enc[i];
173 i++;
174 j++;
175 continue; // Skip this character
176 }
177 else if(lv_text_is_arabic_vowel(ch_enc[i + 1])) { // Next character is a vowel
178 idx_next = lv_ap_get_char_index(ch_enc[i + 2]); // Skip the vowel character to join with the character after it
179 }
180
181 if(index_current == LV_UNDEF_ARABIC_PERSIAN_CHARS) {
182 ch_fin[j] = ch_enc[i];
183 j++;
184 i++;
185 idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
186 continue;
187 }
188
189 uint8_t conjunction_to_previous = (i == 0 ||
190 idx_previous == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_previous].ap_chars_conjunction.conj_to_next;
191 uint8_t conjunction_to_next = ((i == txt_length - 1) ||
192 idx_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_next].ap_chars_conjunction.conj_to_previous;
193
194 uint32_t lam_alef = lv_text_lam_alef(index_current, idx_next);
195 if(lam_alef) {
196 if(conjunction_to_previous) {
197 lam_alef ++;
198 }
199 ch_fin[j] = lam_alef;
200 idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
201 i += 2;
202 j++;
203 continue;
204 }
205
206 if(conjunction_to_previous && conjunction_to_next)
207 ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_middle_form_offset;
208 else if(!conjunction_to_previous && conjunction_to_next)
209 ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_beginning_form_offset;
210 else if(conjunction_to_previous && !conjunction_to_next)
211 ch_fin[j] = ap_chars_map[index_current].char_end_form;
212 else
213 ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_isolated_form_offset;
214 idx_previous = index_current;
215 i++;
216 j++;
217 }
218 ch_fin[j] = 0;
219 for(i = 0; i < txt_length; i++)
220 ch_enc[i] = 0;
221 for(i = 0; i < j; i++)
222 ch_enc[i] = ch_fin[i];
223 lv_free(ch_fin);
224
225 txt_out_temp = txt_out;
226 i = 0;
227
228 while(i < txt_length) {
229 if(ch_enc[i] < 0x80) {
230 *(txt_out_temp++) = ch_enc[i] & 0xFF;
231 }
232 else if(ch_enc[i] < 0x0800) {
233 *(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x1F) | 0xC0;
234 *(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
235 }
236 else if(ch_enc[i] < 0x010000) {
237 *(txt_out_temp++) = ((ch_enc[i] >> 12) & 0x0F) | 0xE0;
238 *(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x3F) | 0x80;
239 *(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
240 }
241 else if(ch_enc[i] < 0x110000) {
242 *(txt_out_temp++) = ((ch_enc[i] >> 18) & 0x07) | 0xF0;
243 *(txt_out_temp++) = ((ch_enc[i] >> 12) & 0x3F) | 0x80;
244 *(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x3F) | 0x80;
245 *(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
246 }
247
248 i++;
249 }
250 *(txt_out_temp) = '\0';
251 lv_free(ch_enc);
252 }
253 /**********************
254 * STATIC FUNCTIONS
255 **********************/
256
lv_ap_get_char_index(uint16_t c)257 static uint32_t lv_ap_get_char_index(uint16_t c)
258 {
259 for(uint8_t i = 0; ap_chars_map[i].char_end_form; i++) {
260 if(c == (ap_chars_map[i].char_offset + LV_AP_ALPHABET_BASE_CODE))
261 return i;
262 else if(c == ap_chars_map[i].char_end_form //is it an End form
263 || c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_beginning_form_offset) //is it a Beginning form
264 || c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_middle_form_offset) //is it a middle form
265 || c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_isolated_form_offset)) { //is it an isolated form
266 return i;
267 }
268 }
269 return LV_UNDEF_ARABIC_PERSIAN_CHARS;
270 }
271
lv_text_lam_alef(uint32_t ch_curr,uint32_t ch_next)272 static uint32_t lv_text_lam_alef(uint32_t ch_curr, uint32_t ch_next)
273 {
274 uint32_t ch_code = 0;
275 if(ap_chars_map[ch_curr].char_offset != 34) {
276 return 0;
277 }
278 if(ch_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) {
279 return 0;
280 }
281 ch_code = ap_chars_map[ch_next].char_offset + LV_AP_ALPHABET_BASE_CODE;
282 if(ch_code == 0x0622) {
283 return 0xFEF5; // (lam-alef) mad
284 }
285 if(ch_code == 0x0623) {
286 return 0xFEF7; // (lam-alef) top hamza
287 }
288 if(ch_code == 0x0625) {
289 return 0xFEF9; // (lam-alef) bot hamza
290 }
291 if(ch_code == 0x0627) {
292 return 0xFEFB; // (lam-alef) alef
293 }
294 return 0;
295 }
296
lv_text_is_arabic_vowel(uint16_t c)297 static bool lv_text_is_arabic_vowel(uint16_t c)
298 {
299 return (c >= 0x064B) && (c <= 0x0652);
300 }
301
302 #endif
303