1/*
2 * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6#if __riscv_atomic == 1
7
8.macro ALIGNED_PTR_2 ptr, offset
9    andi    \ptr, a0, -4         // aligned ptr
10    sub     \offset, a0, \ptr
11    slli    \offset, \offset, 3  // offset (in bits) between ptr and aligned ptr
12    li      t6, 24
13    bne     \offset, t6, 1f      // do atomic operation in case var is not splited between 2 words
14    lr.w    t2, (a0)             // invokes 'Load access fault!'
151:
16.endm
17
18    .global __atomic_load_2
19    .type   __atomic_load_2, @function
20__atomic_load_2:
21    ALIGNED_PTR_2 t0, t1
22    lr.w    t2, (t0)     // t2 - load atomic
23    srl     t4, t2, t1
24    slli    a0, t4, 0x10
25    srli    a0, a0, 0x10
26    ret
27    .size   __atomic_load_2, . - __atomic_load_2
28
29
30    .global __atomic_store_2
31    .type   __atomic_store_2, @function
32__atomic_store_2:
33    ALIGNED_PTR_2 t0, t1
34    li      t6, 0xffff
35    sll     t6, t6, t1
36    not     t6, t6       // t6 - bitwise mask
37    sll     t5, a1, t1   // t5 - shifted new value to easy place into aligned memory
381:                       // do not change registers (t0, t1, t5, t6) after this label
39    lr.w    t2, (t0)     // t2 - load atomic
40    and     t3, t2, t6   // t3 - masked aliged memory. Atomic variable part is zeroed here
41    or      t4, t5, t3   // t4 - combine desire half-word with half-word from origin aligned memory
42    sc.w    t3, t4, (t0) // t3 - atomic write result (0 - success)
43    bnez    t3, 1b
44    ret
45    .size   __atomic_store_2, . - __atomic_store_2
46
47
48    .global __atomic_exchange_2
49    .type   __atomic_exchange_2, @function
50__atomic_exchange_2:
51    ALIGNED_PTR_2 t0, t1
52    li      t6, 0xffff
53    sll     t6, t6, t1
54    not     t6, t6       // t6 - bitwise mask
55    sll     t5, a1, t1   // t5 - shifted new value to easy place into aligned memory
561:                       // do not change registers (t0, t1, t5, t6) after this label
57    lr.w    t2, (t0)     // t2 - load atomic
58    and     t3, t2, t6   // t3 - masked aliged memory. Atomic variable part is zeroed here
59    or      t4, t5, t3   // t4 - combine desire half-word with half-word from origin aligned memory
60    sc.w    t3, t4, (t0) // t3 - atomic write result (0 - success)
61    bnez    t3, 1b
62    srl     t4, t2, t1
63    slli    a0, t4, 0x10
64    srli    a0, a0, 0x10
65    ret
66    .size   __atomic_exchange_2, . - __atomic_exchange_2
67
68
69    .global __atomic_compare_exchange_2
70    .type   __atomic_compare_exchange_2, @function
71__atomic_compare_exchange_2:
72    ALIGNED_PTR_2 t0, t1
73    li      t6, 0xffff0000
74    srl     t6, t6, t1   // t6 - bitwise mask (0xffff0000/0x0000ffff)
75    lhu     t5, (a1)
76    sll     t5, t5, t1   // t5 - shifted expect value to easy compare with aligned memory
77    sll     t4, a2, t1   // t4 - shifted desired value to easy place into aligned memory
781:                       // do not change registers (t0, t1, t4, t5) after this label
79    not     t6, t6
80    lr.w    t2, (t0)     // t2 - load atomic
81    and     t3, t2, t6   // t3 - prepare half-word from aligned memory to compare with expected (t5)
82    bne     t3, t5, 2f
83    not     t6, t6
84    and     t2, t2, t6
85    or      t3, t4, t2   // t3 - combine desire half-word with half-word from origin aligned memory
86    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
87    bnez    t2, 1b
88    li      a0, 1
89    ret
902:
91    srl     t3, t3, t1
92    sh      t3, (a1)     // store atomic value into expect variable
93    li      a0, 0
94    ret
95    .size   __atomic_compare_exchange_2, . - __atomic_compare_exchange_2
96
97
98    .global __atomic_fetch_or_2
99    .type   __atomic_fetch_or_2, @function
100__atomic_fetch_or_2:
101    ALIGNED_PTR_2 t0, t1
102    sll     t2, a1, t1   // t2 - shifted value half-word.
103    amoor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
104    srl     t0, t0, t1
105    slli    a0, t0, 0x10
106    srli    a0, a0, 0x10
107    ret
108    .size   __atomic_fetch_or_2, . - __atomic_fetch_or_2
109
110
111    .global __atomic_or_fetch_2
112    .type   __atomic_or_fetch_2, @function
113__atomic_or_fetch_2:
114    ALIGNED_PTR_2 t0, t1
115    sll     t2, a1, t1   // t2 - shifted value half-word.
116    amoor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
117    srl     t2, t2, t1
118    slli    a0, t2, 0x10
119    srli    a0, a0, 0x10
120    ret
121    .size   __atomic_or_fetch_2, . - __atomic_or_fetch_2
122
123
124    .global __atomic_fetch_xor_2
125    .type   __atomic_fetch_xor_2, @function
126__atomic_fetch_xor_2:
127    ALIGNED_PTR_2 t0, t1
128    sll     t2, a1, t1    // t2 - shifted value half-word.
129    amoxor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
130    srl     t0, t0, t1
131    slli    a0, t0, 0x10
132    srli    a0, a0, 0x10
133    ret
134    .size   __atomic_fetch_xor_2, . - __atomic_fetch_xor_2
135
136
137    .global __atomic_xor_fetch_2
138    .type   __atomic_xor_fetch_2, @function
139__atomic_xor_fetch_2:
140    ALIGNED_PTR_2 t0, t1
141    sll     t2, a1, t1    // t2 - shifted value half-word.
142    amoxor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
143    srl     t2, t2, t1
144    slli    a0, t2, 0x10
145    srli    a0, a0, 0x10
146    ret
147    .size   __atomic_xor_fetch_2, . - __atomic_xor_fetch_2
148
149
150    .global __atomic_fetch_and_2
151    .type   __atomic_fetch_and_2, @function
152__atomic_fetch_and_2:
153    ALIGNED_PTR_2 t0, t1
154    li      t6, 0xffff0000  // t6 - bitwise mask
155    srl     t6, t6, t1      // t6 - using to fill non-atomic bytes with 0xff in aligned memory
156    sll     t2, a1, t1      // t2 - shifted value half-word.
157    or      t2, t2, t6      // t2 - 0xXXXXffff or 0xffffXXXX where is value halfword
158    amoand.w   t0, t2, (t0) // t0 - shifted value before atomic operation performed
159    srl     t0, t0, t1
160    slli    a0, t0, 0x10
161    srli    a0, a0, 0x10
162    ret
163    .size   __atomic_fetch_and_2, . - __atomic_fetch_and_2
164
165
166    .global __atomic_and_fetch_2
167    .type   __atomic_and_fetch_2, @function
168__atomic_and_fetch_2:
169    ALIGNED_PTR_2 t0, t1
170    li      t6, 0xffff0000  // t6 - bitwise mask
171    srl     t6, t6, t1      // t6 - using to fill non-atomic bytes with 0xff in aligned memory
172    sll     t2, a1, t1      // t2 - shifted value half-word.
173    or      t2, t2, t6      // t2 - 0xXXXXffff or 0xffffXXXX where XXXX is value halfword
174    amoand.w   t0, t2, (t0) // t0 - shifted value before atomic operation performed
175    srl     t2, t2, t1
176    slli    a0, t2, 0x10
177    srli    a0, a0, 0x10
178    ret
179    .size   __atomic_and_fetch_2, . - __atomic_and_fetch_2
180
181
182    .global __atomic_fetch_nand_2
183    .type   __atomic_fetch_nand_2, @function
184__atomic_fetch_nand_2:
185    ALIGNED_PTR_2 t0, t1
186    li      t5, 0xffff
187    sll     t5, t5, t1   // t5 - bitwise mask
188    not     t6, t5       // t6 - bitwise mask
1891:                       // do not change registers (t0, t1, t5, t6) after this label
190    lr.w    t2, (t0)     // t2 - load atomic
191    srl     t3, t2, t1
192    and     t3, t3, a1
193    not     t3, t3       // t3 - atomic value to write
194    sll     t3, t3, t1
195    and     t4, t2, t6   // t4 - masked aliged memory. Atomic variable part is zeroed here
196    or      t4, t4, t3   // t4 - combine desire byte-word with origin aligned memory
197    sc.w    t4, t4, (t0) // t3 - atomic write result (0 - success)
198    bnez    t4, 1b
199    srl     t4, t2, t1
200    slli    a0, t4, 0x10
201    srli    a0, a0, 0x10
202    ret
203    .size   __atomic_fetch_nand_2, . - __atomic_fetch_nand_2
204
205
206    .global __atomic_nand_fetch_2
207    .type   __atomic_nand_fetch_2, @function
208__atomic_nand_fetch_2:
209    ALIGNED_PTR_2 t0, t1
210    li      t5, 0xffff
211    sll     t5, t5, t1   // t5 - bitwise mask
212    not     t6, t5       // t6 - bitwise mask
2131:                       // do not change registers (t0, t1, t5, t6) after this label
214    lr.w    t2, (t0)     // t2 - load atomic
215    srl     t3, t2, t1
216    and     t3, t3, a1
217    not     t3, t3       // t3 - atomic value to write
218    sll     t3, t3, t1
219    and     t4, t2, t6   // t4 - masked aliged memory. Atomic variable part is zeroed here
220    or      t4, t4, t3   // t4 - combine desire byte-word with origin aligned memory
221    sc.w    t4, t4, (t0) // t3 - atomic write result (0 - success)
222    bnez    t4, 1b
223    srl     t4, t2, t1
224    slli    a0, t3, 0x10
225    srli    a0, a0, 0x10
226    ret
227    .size   __atomic_nand_fetch_2, . - __atomic_nand_fetch_2
228
229
230    .global __atomic_fetch_sub_2
231    .type   __atomic_fetch_sub_2, @function
232__atomic_fetch_sub_2:
233    ALIGNED_PTR_2 t0, t1
234    li      t5, 0xffff   // t5 - bitwise mask
235    not     t6, t5
236    srl     t6, t6, t1   // t6 - bitwise mask
2371:                       // do not change registers (t0, t1, t5, t6) after this label
238    lr.w    t2, (t0)     // t2 - load atomic
239    srl     a0, t2, t1
240    and     a0, a0, t5   // a0 - value in atomic before performing operation
241    sub     t3, a0, a1
242    and     t3, t3, t5   // t3 - value to be written to atomic
243    sll     t3, t3, t1
244    and     t2, t2, t6
245    or      t3, t3, t2   // t3 - value to be written into aligned memory
246    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
247    bnez    t2, 1b
248    ret
249    .size   __atomic_fetch_sub_2, . - __atomic_fetch_sub_2
250
251
252    .global __atomic_sub_fetch_2
253    .type   __atomic_sub_fetch_2, @function
254__atomic_sub_fetch_2:
255    ALIGNED_PTR_2 t0, t1
256    li      t5, 0xffff   // t5 - bitwise mask
257    not     t6, t5
258    srl     t6, t6, t1   // t6 - bitwise mask
2591:                       // do not change registers (t0, t1, t5, t6) after this label
260    lr.w    t2, (t0)     // t2 - load atomic
261    srl     t4, t2, t1
262    and     t4, t4, t5
263    sub     t4, t4, a1
264    and     t4, t4, t5   // t4 - value to be written to atomic
265    sll     t4, t4, t1
266    and     t2, t2, t6
267    or      t4, t4, t2   // t4 - value to be written into aligned memory
268    sc.w    t2, t4, (t0) // t2 - atomic write result (0 - success)
269    bnez    t2, 1b
270    srl     t4, t4, t1
271    slli    a0, t4, 0x10
272    srli    a0, a0, 0x10
273    ret
274    .size   __atomic_sub_fetch_2, . - __atomic_sub_fetch_2
275
276
277    .global __atomic_fetch_add_2
278    .type   __atomic_fetch_add_2, @function
279__atomic_fetch_add_2:
280    ALIGNED_PTR_2 t0, t1
281    li      t5, 0xffff   // t5 - bitwise mask
282    not     t6, t5
283    srl     t6, t6, t1   // t6 - bitwise mask
2841:                       // do not change registers (t0, t1, t5, t6) after this label
285    lr.w    t2, (t0)     // t2 - load atomic
286    srl     t4, t2, t1
287    and     t4, t4, t5   // t4 - half-word value in atomic before performing operation
288    add     t3, t4, a1
289    and     t4, t4, t5   // t3 - half-word value to be written to atomic
290    sll     t3, t3, t1
291    and     t2, t2, t6
292    or      t3, t3, t2   // t3 - value to be written into aligned memory
293    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
294    bnez    t2, 1b
295    slli    a0, t4, 0x10
296    srli    a0, a0, 0x10
297    ret
298    .size   __atomic_fetch_add_2, . - __atomic_fetch_add_2
299
300
301    .global __atomic_add_fetch_2
302    .type   __atomic_add_fetch_2, @function
303__atomic_add_fetch_2:
304    ALIGNED_PTR_2 t0, t1
305    li      t5, 0xffff   // t5 - bitwise mask
306    not     t6, t5
307    srl     t6, t6, t1   // t6 - bitwise mask
3081:                       // do not change registers (t0, t1, t5, t6) after this label
309    lr.w    t2, (t0)     // t2 - load atomic
310    srl     t4, t2, t1
311    and     t4, t4, t5
312    add     t4, t4, a1
313    and     t4, t4, t5   // t4 - value to be written to atomic
314    sll     t4, t4, t1
315    and     t2, t2, t6
316    or      t4, t4, t2   // t4 - value to be written into aligned memory
317    sc.w    t2, t4, (t0) // t2 - atomic write result (0 - success)
318    bnez    t2, 1b
319    srl     t4, t4, t1
320    slli    a0, t4, 0x10
321    srli    a0, a0, 0x10
322    ret
323    .size   __atomic_add_fetch_2, . - __atomic_add_fetch_2
324
325
326    .global __atomic_load_1
327    .type   __atomic_load_1, @function
328__atomic_load_1:
329    andi    t0, a0, -4   // t0 - aligned ptr
330    sub     t1, a0, t0
331    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
332    li      t6, 0xff
333    sll     t6, t6, t1
334    not     t6, t6       // t6 - bitwise mask
335    lr.w    t2, (t0)     // t2 - load atomic
336    srl     t4, t2, t1
337    andi    a0, t4, 0xff
338    ret
339    .size   __atomic_load_1, . - __atomic_load_1
340
341
342    .global __atomic_store_1
343    .type   __atomic_store_1, @function
344__atomic_store_1:
345    andi    t0, a0, -4   // t0 - aligned ptr
346    sub     t1, a0, t0
347    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
348    li      t6, 0xff
349    sll     t6, t6, t1
350    not     t6, t6       // t6 - bitwise mask
351    sll     t5, a1, t1   // t5 - shifted new value to easy place into aligned memory
3521:                       // do not change registers (t0, t1, t5, t6) after this label
353    lr.w    t2, (t0)     // t2 - load atomic
354    and     t3, t2, t6   // t3 - masked aliged memory. Atomic variable part is zeroed here
355    or      t4, t5, t3   // t4 - combine desire byte-word with origin aligned memory
356    sc.w    t3, t4, (t0) // t3 - atomic write result (0 - success)
357    bnez    t3, 1b
358    ret
359    .size   __atomic_store_1, . - __atomic_store_1
360
361
362    .global __atomic_exchange_1
363    .type   __atomic_exchange_1, @function
364__atomic_exchange_1:
365    andi    t0, a0, -4   // t0 - aligned ptr
366    sub     t1, a0, t0
367    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
368    li      t6, 0xff
369    sll     t6, t6, t1
370    not     t6, t6       // t6 - bitwise mask
371    sll     t5, a1, t1   // t5 - shifted new value to easy place into aligned memory
3721:                       // do not change registers (t0, t1, t5, t6) after this label
373    lr.w    t2, (t0)     // t2 - load atomic
374    and     t3, t2, t6   // t3 - masked aliged memory. Atomic variable part is zeroed here
375    or      t4, t5, t3   // t4 - combine desire byte-word with origin aligned memory
376    sc.w    t3, t4, (t0) // t3 - atomic write result (0 - success)
377    bnez    t3, 1b
378    srl     t4, t2, t1
379    andi    a0, t4, 0xff
380    ret
381    .size   __atomic_exchange_1, . - __atomic_exchange_1
382
383
384    .global __atomic_compare_exchange_1
385    .type   __atomic_compare_exchange_1, @function
386__atomic_compare_exchange_1:
387    andi    t0, a0, -4   // t0 - aligned ptr
388    sub     t1, a0, t0
389    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
390    li      t6, 0xff
391    sll     t6, t6, t1
392    not     t6, t6       // t6 - bitwise mask
393    lbu     t5, (a1)
394    sll     t5, t5, t1   // t5 - shifted expect value to easy compare with aligned memory
395    sll     t4, a2, t1   // t4 - shifted desired value to easy place into aligned memory
3961:                       // do not change registers (t0, t1, t4, t5) after this label
397    not     t6, t6
398    lr.w    t2, (t0)     // t2 - load atomic
399    and     t3, t2, t6   // t3 - prepare half-word from aligned memory to compare with expected (t5)
400    bne     t3, t5, 2f   // goto fail
401    not     t6, t6
402    and     t2, t2, t6
403    or      t3, t4, t2   // t3 - combine desire half-word with half-word from origin aligned memory
404    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
405    bnez    t2, 1b       // retry
406    li      a0, 1
407    ret
4082:
409    srl     t3, t3, t1
410    sb      t3, (a1)     // store atomic value into expect variable
411    li      a0, 0
412    ret
413    .size   __atomic_compare_exchange_1, . - __atomic_compare_exchange_1
414
415
416    .global __atomic_fetch_or_1
417    .type   __atomic_fetch_or_1, @function
418__atomic_fetch_or_1:
419    andi    t0, a0, -4   // t0 - aligned ptr
420    sub     t1, a0, t0
421    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
422    sll     t2, a1, t1   // t2 - shifted value half-word.
423    amoor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
424    srl     t0, t0, t1
425    andi    a0, t0, 0xff
426    ret
427    .size   __atomic_fetch_or_1, . - __atomic_fetch_or_1
428
429
430    .global __atomic_or_fetch_1
431    .type   __atomic_or_fetch_1, @function
432__atomic_or_fetch_1:
433    andi    t0, a0, -4   // t0 - aligned ptr
434    sub     t1, a0, t0
435    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
436    sll     t2, a1, t1   // t2 - shifted byte-word value.
437    amoor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
438    srl     t2, t2, t1
439    andi    a0, t2, 0xff
440    ret
441    .size   __atomic_or_fetch_1, . - __atomic_or_fetch_1
442
443
444    .global __atomic_fetch_xor_1
445    .type   __atomic_fetch_xor_1, @function
446__atomic_fetch_xor_1:
447    andi    t0, a0, -4    // t0 - aligned ptr
448    sub     t1, a0, t0
449    slli    t1, t1, 3     // t1 - offset (in bits) between ptr and aligned ptr
450    sll     t2, a1, t1    // t2 - shifted value byte-word.
451    amoxor.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
452    srl     t0, t0, t1
453    andi    a0, t0, 0xff
454    ret
455    .size   __atomic_fetch_xor_1, . - __atomic_fetch_xor_1
456
457
458    .global __atomic_xor_fetch_1
459    .type   __atomic_xor_fetch_1, @function
460__atomic_xor_fetch_1:
461    andi    t0, a0, -4     // t0 - aligned ptr
462    sub     t1, a0, t0
463    slli    t1, t1, 3      // t1 - offset (in bits) between ptr and aligned ptr
464    sll     t2, a1, t1     // t2 - shifted value byte-word.
465    amoxor.w t0, t2, (t0)  // t0 - shifted value before atomic operation performed
466    srl     t2, t2, t1
467    andi    a0, t2, 0xff
468    ret
469    .size   __atomic_xor_fetch_1, . - __atomic_xor_fetch_1
470
471
472    .global __atomic_fetch_and_1
473    .type   __atomic_fetch_and_1, @function
474__atomic_fetch_and_1:
475    andi    t0, a0, -4    // t0 - aligned ptr
476    sub     t1, a0, t0
477    slli    t1, t1, 3
478    li      t6, 0xff      // t6 - bitwise mask
479    sll     t6, t6, t1    // t6 - using to fill non-atomic bytes with 0xff in aligned memory
480    not     t6, t6
481    sll     t2, a1, t1    // t2 - shifted value byte-word.
482    or      t2, t2, t6    // t2 - (0xXXffffff or 0xffXXffff ...) where XX - new value to write
483    amoand.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
484    srl     t0, t0, t1
485    andi    a0, t0, 0xff
486    ret
487    .size   __atomic_fetch_and_1, . - __atomic_fetch_and_1
488
489
490    .global __atomic_and_fetch_1
491    .type   __atomic_and_fetch_1, @function
492__atomic_and_fetch_1:
493    andi    t0, a0, -4    // t0 - aligned ptr
494    sub     t1, a0, t0
495    slli    t1, t1, 3
496    li      t6, 0xff      // t6 - bitwise mask
497    sll     t6, t6, t1    // t6 - using to fill non-atomic bytes with 0xff in aligned memory
498    not     t6, t6
499    sll     t2, a1, t1    // t2 - shifted value byte-word.
500    or      t2, t2, t6    // t2 - (0xXXffffff or 0xffXXffff ...) where XX - new value to write
501    amoand.w t0, t2, (t0) // t0 - shifted value before atomic operation performed
502    srl     t2, t2, t1
503    andi    a0, t2, 0xff
504    ret
505    .size   __atomic_and_fetch_1, . - __atomic_and_fetch_1
506
507
508    .global __atomic_nand_fetch_1
509    .type   __atomic_nand_fetch_1, @function
510__atomic_nand_fetch_1:
511    andi    t0, a0, -4   // t0 - aligned ptr
512    sub     t1, a0, t0
513    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
514    li      t6, 0xff
515    sll     t6, t6, t1
516    not     t6, t6       // t6 - bitwise mask
5171:                       // do not change registers (t0, t1, t6) after this label
518    lr.w    t2, (t0)     // t2 - load atomic
519    srl     t3, t2, t1
520    and     t3, t3, a1
521    not     t3, t3       // t3 - atomic value to write
522    sll     t3, t3, t1
523    and     t4, t2, t6   // t4 - masked aliged memory. Atomic variable part is zeroed here
524    or      t4, t4, t3   // t4 - combine desire byte-word with origin aligned memory
525    sc.w    t3, t4, (t0) // t3 - atomic write result (0 - success)
526    bnez    t3, 1b
527    srl     t4, t4, t1
528    andi    a0, t4, 0xff
529    ret
530    .size   __atomic_nand_fetch_1, . - __atomic_nand_fetch_1
531
532
533    .global __atomic_fetch_nand_1
534    .type   __atomic_fetch_nand_1, @function
535__atomic_fetch_nand_1:
536    andi    t0, a0, -4   // t0 - aligned ptr
537    sub     t1, a0, t0
538    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
539    li      t6, 0xff
540    sll     t6, t6, t1
541    not     t6, t6       // t6 - bitwise mask
5421:                       // do not change registers (t0, t1, t6) after this label
543    lr.w    t2, (t0)     // t2 - load atomic
544    srl     t3, t2, t1
545    and     t3, t3, a1
546    not     t3, t3       // t3 - atomic value to write
547    sll     t3, t3, t1
548    and     t4, t2, t6   // t4 - masked aliged memory. Atomic variable part is zeroed here
549    or      t4, t4, t3   // t4 - combine desire byte-word with origin aligned memory
550    sc.w    t3, t4, (t0) // t3 - atomic write result (0 - success)
551    bnez    t3, 1b
552    srl     t4, t2, t1
553    andi    a0, t4, 0xff
554    ret
555    .size   __atomic_fetch_nand_1, . - __atomic_fetch_nand_1
556
557
558    .global __atomic_fetch_sub_1
559    .type   __atomic_fetch_sub_1, @function
560__atomic_fetch_sub_1:
561    andi    t0, a0, -4   // t0 - aligned ptr
562    sub     t1, a0, t0
563    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
564    li      t6, 0xff
565    sll     t6, t6, t1
566    not     t6, t6       // t6 - bitwise mask
5671:                       // do not change registers (t0, t1, t6) after this label
568    lr.w    t2, (t0)     // t2 - load atomic
569    srl     t4, t2, t1
570    andi    t4, t4, 0xff // t4 - value in atomic before performing operation
571    sub     t3, t4, a1
572    andi    t3, t3, 0xff // t3 - value to be written to atomic
573    sll     t3, t3, t1
574    and     t2, t2, t6
575    or      t3, t3, t2   // t3 - value to be written into aligned memory
576    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
577    bnez    t2, 1b
578    andi    a0, t4, 0xff
579    ret
580    .size   __atomic_fetch_sub_1, . - __atomic_fetch_sub_1
581
582
583    .global __atomic_sub_fetch_1
584    .type   __atomic_sub_fetch_1, @function
585__atomic_sub_fetch_1:
586    andi    t0, a0, -4   // t0 - aligned ptr
587    sub     t1, a0, t0
588    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
589    li      t6, 0xff
590    sll     t6, t6, t1
591    not     t6, t6       // t6 - bitwise mask
5921:                       // do not change registers (t0, t1, t6) after this label
593    lr.w    t2, (t0)     // t2 - load atomic
594    srl     t3, t2, t1
595    andi    t3, t3, 0xff // t3 - value in atomic before performing operation
596    sub     t3, t3, a1
597    andi    t3, t3, 0xff // t3 - value to be written to atomic
598    sll     t3, t3, t1
599    and     t2, t2, t6
600    or      t3, t3, t2   // t3 - value to be written into aligned memory
601    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
602    bnez    t2, 1b
603    srl     t3, t3, t1
604    andi    a0, t3, 0xff
605    ret
606    .size   __atomic_sub_fetch_1, . - __atomic_sub_fetch_1
607
608
609    .global __atomic_fetch_add_1
610    .type   __atomic_fetch_add_1, @function
611__atomic_fetch_add_1:
612    andi    t0, a0, -4   // t0 - aligned ptr
613    sub     t1, a0, t0
614    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
615    li      t6, 0xff
616    sll     t6, t6, t1
617    not     t6, t6       // t6 - bitwise mask
6181:                       // do not change registers (t0, t1, t6) after this label
619    lr.w    t2, (t0)     // t2 - load atomic
620    srl     t4, t2, t1
621    andi    t4, t4, 0xff // t4 - value in atomic before performing operation
622    add     t3, t4, a1
623    andi    t3, t3, 0xff // t3 - value to be written to atomic
624    sll     t3, t3, t1
625    and     t2, t2, t6
626    or      t3, t3, t2   // t3 - value to be written into aligned memory
627    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
628    bnez    t2, 1b
629    andi    a0, t4, 0xff
630    ret
631    .size   __atomic_fetch_add_1, . - __atomic_fetch_add_1
632
633
634    .global __atomic_add_fetch_1
635    .type   __atomic_add_fetch_1, @function
636__atomic_add_fetch_1:
637    andi    t0, a0, -4   // t0 - aligned ptr
638    sub     t1, a0, t0
639    slli    t1, t1, 3    // t1 - offset (in bits) between ptr and aligned ptr
640    li      t6, 0xff
641    sll     t6, t6, t1
642    not     t6, t6       // t6 - bitwise mask
6431:                       // do not change registers (t0, t1, t6) after this label
644    lr.w    t2, (t0)     // t2 - load atomic
645    srl     t3, t2, t1
646    andi    t3, t3, 0xff // t3 - value in atomic before performing operation
647    add     t3, t3, a1
648    andi    t3, t3, 0xff // t3 - value to be written to atomic
649    sll     t3, t3, t1
650    and     t2, t2, t6
651    or      t3, t3, t2   // t3 - value to be written into aligned memory
652    sc.w    t2, t3, (t0) // t2 - atomic write result (0 - success)
653    bnez    t2, 1b
654    srl     t3, t3, t1
655    andi    a0, t3, 0xff
656    ret
657    .size   __atomic_add_fetch_1, . - __atomic_add_fetch_1
658
659#endif // if __riscv_atomic == 1
660