1 /**
2  * @file lv_text_ap.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include "lv_bidi.h"
10 #include "lv_text_private.h"
11 #include "lv_text_ap.h"
12 #include "lv_types.h"
13 #include "../stdlib/lv_mem.h"
14 #include "../draw/lv_draw.h"
15 
16 /*********************
17  *      DEFINES
18  *********************/
19 
20 /**********************
21  *      TYPEDEFS
22  **********************/
23 typedef struct {
24     uint8_t char_offset;
25     uint16_t char_end_form;
26     int8_t char_beginning_form_offset;
27     int8_t char_middle_form_offset;
28     int8_t char_isolated_form_offset;
29     struct {
30         uint8_t conj_to_previous;
31         uint8_t conj_to_next;
32     } ap_chars_conjunction;
33 } ap_chars_map_t;
34 
35 /**********************
36  *  STATIC PROTOTYPES
37  **********************/
38 #if LV_USE_ARABIC_PERSIAN_CHARS == 1
39 static uint32_t lv_ap_get_char_index(uint16_t c);
40 static uint32_t lv_text_lam_alef(uint32_t ch_curr, uint32_t ch_next);
41 static bool lv_text_is_arabic_vowel(uint16_t c);
42 
43 /**********************
44  *  STATIC VARIABLES
45  **********************/
46 
47 const ap_chars_map_t ap_chars_map[] = {
48     /*{Key Offset, End, Beginning, Middle, Isolated, {conjunction}}*/
49     {0, 0xFE81, 0, 0, 0,  {0, 0}},      // أ
50     {1, 0xFE84, -1, 0, -1,  {1, 0}},    // أ
51     {2, 0xFE86, -1, 0, -1,  {1, 0}},    // ؤ
52     {3, 0xFE88, -1, 0, -1,  {1, 0}},    // ﺇ
53     {4, 0xFE8A, 1, 2, -1,  {1, 1}},    // ئ
54     {5, 0xFE8E, -1, 0, -1,  {1, 0}},    // آ
55     {6, 0xFE90, 1, 2, -1,  {1, 1}},    // ب
56     {92, 0xFB57, 1, 2, -1,  {1, 1}},   // پ
57     {8, 0xFE96, 1, 2, -1,  {1, 1}},    // ت
58     {9, 0xFE9A, 1, 2, -1,  {1, 1}},    // ث
59     {10, 0xFE9E, 1, 2, -1,  {1, 1}},   // ج
60     {100, 0xFB7B, 1, 2, -1,  {1, 1}},  // چ
61     {11, 0xFEA2, 1, 2, -1,  {1, 1}},   // ح
62     {12, 0xFEA6, 1, 2, -1,  {1, 1}},   // خ
63     {13, 0xFEAA, -1, 0, -1,  {1, 0}},   // د
64     {14, 0xFEAC, -1, 0, -1,  {1, 0}},   // ذ
65     {15, 0xFEAE, -1, 0, -1,  {1, 0}},   // ر
66     {16, 0xFEB0, -1, 0, -1,  {1, 0}},   // ز
67     {118, 0xFB8B, -1, 0, -1,  {1, 0}},  // ژ
68     {17, 0xFEB2, 1, 2, -1,  {1, 1}},   // س
69     {18, 0xFEB6, 1, 2, -1,  {1, 1}},   // ش
70     {19, 0xFEBA, 1, 2, -1,  {1, 1}},   // ص
71     {20, 0xFEBE, 1, 2, -1,  {1, 1}},   // ض
72     {21, 0xFEC2, 1, 2, -1,  {1, 1}},   // ط
73     {22, 0xFEC6, 1, 2, -1,  {1, 1}},   // ظ
74     {23, 0xFECA, 1, 2, -1,  {1, 1}},   // ع
75     {24, 0xFECE, 1, 2, -1,  {1, 1}},   // غ
76     {30, 0x0640, 0, 0, 0,  {1, 1}},   // - (mad, hyphen)
77     {31, 0xFED2, 1, 2, -1,  {1, 1}},   // ف
78     {32, 0xFED6, 1, 2, -1,  {1, 1}},   // ق
79     {135, 0xFB8F, 1, 2, -1,  {1, 1}},  // ک
80     {33, 0xFEDA, 1, 2, -1,  {1, 1}},  // ﻙ
81     {141, 0xFB93, 1, 2, -1,  {1, 1}},  // گ
82     {34, 0xFEDE, 1, 2, -1,  {1, 1}},   // ل
83     {35, 0xFEE2, 1, 2, -1,  {1, 1}},   // م
84     {36, 0xFEE6, 1, 2, -1,  {1, 1}},   // ن
85     {38, 0xFEEE, -1, 0, -1,  {1, 0}},   // و
86     {37, 0xFEEA, 1, 2, -1,  {1, 1}},   // ه
87     {39, 0xFEF0, 0, 0, -1, {1, 0}},   // ى
88     {40, 0xFEF2, 1, 2, -1,  {1, 1}},   // ي
89     {170, 0xFBFD, 1, 2, -1,  {1, 1}},   // ی
90     {7, 0xFE94, -1, 2, -1,  {1, 0}},   // ة
91     {206, 0x06F0, -1, 2, 0,  {0, 0}},  // ۰
92     {207, 0x06F1, 0, 0, 0,  {0, 0}},  // ۱
93     {208, 0x06F2, 0, 0, 0,  {0, 0}},  // ۲
94     {209, 0x06F3, 0, 0, 0,  {0, 0}},  // ۳
95     {210, 0x06F4, 0, 0, 0,  {0, 0}},  // ۴
96     {211, 0x06F5, 0, 0, 0,  {0, 0}},  // ۵
97     {212, 0x06F6, 0, 0, 0,  {0, 0}},  // ۶
98     {213, 0x06F7, 0, 0, 0,  {0, 0}},  // ۷
99     {214, 0x06F8, 0, 0, 0,  {0, 0}},  // ۸
100     {215, 0x06F9, 0, 0, 0,  {0, 0}},  // ۹
101     LV_AP_END_CHARS_LIST
102 };
103 /**********************
104 *      MACROS
105 **********************/
106 
107 /**********************
108 *   GLOBAL FUNCTIONS
109 **********************/
lv_text_ap_calc_bytes_count(const char * txt)110 uint32_t lv_text_ap_calc_bytes_count(const char * txt)
111 {
112     uint32_t txt_length = 0;
113     uint32_t chars_cnt = 0;
114     uint32_t current_ap_idx = 0;
115     uint32_t i, j;
116     uint32_t ch_enc;
117 
118     txt_length = lv_text_get_encoded_length(txt);
119 
120     i = 0;
121     j = 0;
122     while(i < txt_length) {
123         ch_enc = lv_text_encoded_next(txt, &j);
124         current_ap_idx = lv_ap_get_char_index(ch_enc);
125 
126         if(current_ap_idx != LV_UNDEF_ARABIC_PERSIAN_CHARS)
127             ch_enc = ap_chars_map[current_ap_idx].char_end_form;
128 
129         if(ch_enc < 0x80)
130             chars_cnt++;
131         else if(ch_enc < 0x0800)
132             chars_cnt += 2;
133         else if(ch_enc < 0x010000)
134             chars_cnt += 3;
135         else
136             chars_cnt += 4;
137 
138         i++;
139     }
140 
141     return chars_cnt + 1;
142 }
143 
lv_text_ap_proc(const char * txt,char * txt_out)144 void lv_text_ap_proc(const char * txt, char * txt_out)
145 {
146     uint32_t txt_length = 0;
147     uint32_t index_current, idx_next, idx_previous, i, j;
148     uint32_t * ch_enc;
149     uint32_t * ch_fin;
150     char * txt_out_temp;
151 
152     txt_length = lv_text_get_encoded_length(txt);
153 
154     ch_enc = (uint32_t *)lv_malloc(sizeof(uint32_t) * (txt_length + 1));
155     ch_fin = (uint32_t *)lv_malloc(sizeof(uint32_t) * (txt_length + 1));
156 
157     i = 0;
158     j = 0;
159     while(j < txt_length)
160         ch_enc[j++] = lv_text_encoded_next(txt, &i);
161 
162     ch_enc[j] = 0;
163 
164     i = 0;
165     j = 0;
166     idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
167     while(i < txt_length) {
168         index_current = lv_ap_get_char_index(ch_enc[i]);
169         idx_next = lv_ap_get_char_index(ch_enc[i + 1]);
170 
171         if(lv_text_is_arabic_vowel(ch_enc[i])) {  // Current character is a vowel
172             ch_fin[j] = ch_enc[i];
173             i++;
174             j++;
175             continue;   // Skip this character
176         }
177         else if(lv_text_is_arabic_vowel(ch_enc[i + 1])) {    // Next character is a vowel
178             idx_next = lv_ap_get_char_index(ch_enc[i + 2]); // Skip the vowel character to join with the character after it
179         }
180 
181         if(index_current == LV_UNDEF_ARABIC_PERSIAN_CHARS) {
182             ch_fin[j] = ch_enc[i];
183             j++;
184             i++;
185             idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
186             continue;
187         }
188 
189         uint8_t conjunction_to_previous = (i == 0 ||
190                                            idx_previous == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_previous].ap_chars_conjunction.conj_to_next;
191         uint8_t conjunction_to_next = ((i == txt_length - 1) ||
192                                        idx_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_next].ap_chars_conjunction.conj_to_previous;
193 
194         uint32_t lam_alef = lv_text_lam_alef(index_current, idx_next);
195         if(lam_alef) {
196             if(conjunction_to_previous) {
197                 lam_alef ++;
198             }
199             ch_fin[j] = lam_alef;
200             idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
201             i += 2;
202             j++;
203             continue;
204         }
205 
206         if(conjunction_to_previous && conjunction_to_next)
207             ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_middle_form_offset;
208         else if(!conjunction_to_previous && conjunction_to_next)
209             ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_beginning_form_offset;
210         else if(conjunction_to_previous && !conjunction_to_next)
211             ch_fin[j] = ap_chars_map[index_current].char_end_form;
212         else
213             ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_isolated_form_offset;
214         idx_previous = index_current;
215         i++;
216         j++;
217     }
218     ch_fin[j] = 0;
219     for(i = 0; i < txt_length; i++)
220         ch_enc[i] = 0;
221     for(i = 0; i < j; i++)
222         ch_enc[i] = ch_fin[i];
223     lv_free(ch_fin);
224 
225     txt_out_temp = txt_out;
226     i = 0;
227 
228     while(i < txt_length) {
229         if(ch_enc[i] < 0x80) {
230             *(txt_out_temp++) = ch_enc[i] & 0xFF;
231         }
232         else if(ch_enc[i] < 0x0800) {
233             *(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x1F) | 0xC0;
234             *(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
235         }
236         else if(ch_enc[i] < 0x010000) {
237             *(txt_out_temp++) = ((ch_enc[i] >> 12) & 0x0F) | 0xE0;
238             *(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x3F) | 0x80;
239             *(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
240         }
241         else if(ch_enc[i] < 0x110000) {
242             *(txt_out_temp++) = ((ch_enc[i] >> 18) & 0x07) | 0xF0;
243             *(txt_out_temp++) = ((ch_enc[i] >> 12) & 0x3F) | 0x80;
244             *(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x3F) | 0x80;
245             *(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
246         }
247 
248         i++;
249     }
250     *(txt_out_temp) = '\0';
251     lv_free(ch_enc);
252 }
253 /**********************
254 *   STATIC FUNCTIONS
255 **********************/
256 
lv_ap_get_char_index(uint16_t c)257 static uint32_t lv_ap_get_char_index(uint16_t c)
258 {
259     for(uint8_t i = 0; ap_chars_map[i].char_end_form; i++) {
260         if(c == (ap_chars_map[i].char_offset + LV_AP_ALPHABET_BASE_CODE))
261             return i;
262         else if(c == ap_chars_map[i].char_end_form                                                  //is it an End form
263                 || c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_beginning_form_offset)     //is it a Beginning form
264                 || c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_middle_form_offset)       //is it a middle form
265                 || c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_isolated_form_offset)) {  //is it an isolated form
266             return i;
267         }
268     }
269     return LV_UNDEF_ARABIC_PERSIAN_CHARS;
270 }
271 
lv_text_lam_alef(uint32_t ch_curr,uint32_t ch_next)272 static uint32_t lv_text_lam_alef(uint32_t ch_curr, uint32_t ch_next)
273 {
274     uint32_t ch_code = 0;
275     if(ap_chars_map[ch_curr].char_offset != 34) {
276         return 0;
277     }
278     if(ch_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) {
279         return 0;
280     }
281     ch_code = ap_chars_map[ch_next].char_offset + LV_AP_ALPHABET_BASE_CODE;
282     if(ch_code == 0x0622) {
283         return 0xFEF5;    // (lam-alef) mad
284     }
285     if(ch_code == 0x0623) {
286         return 0xFEF7;    // (lam-alef) top hamza
287     }
288     if(ch_code == 0x0625) {
289         return 0xFEF9;    // (lam-alef) bot hamza
290     }
291     if(ch_code == 0x0627) {
292         return 0xFEFB;    // (lam-alef) alef
293     }
294     return 0;
295 }
296 
lv_text_is_arabic_vowel(uint16_t c)297 static bool lv_text_is_arabic_vowel(uint16_t c)
298 {
299     return (c >= 0x064B) && (c <= 0x0652);
300 }
301 
302 #endif
303