Lines Matching +full:4 +full:f

29  * This function assumes 2- or 4-byte alignment.  Other alignments will fail!
35 bgeu sum, val, 99f ; \
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
53 loopgtz a5, 2f
55 beqz a5, 2f
61 l32i a7, a2, 4
76 addi a2, a2, 4*8
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
83 loopgtz a5, 3f
85 beqz a5, 3f
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
92 addi a2, a2, 4
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
118 bnez a5, 8f /* branch if 1-byte aligned */
124 j 1b /* now buf is 4-byte aligned */
131 srli a5, a3, 2 /* 4-byte chunks */
133 loopgtz a5, 2f
135 beqz a5, 2f
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
152 addi a2, a2, 4
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
187 This function is optimized for 4-byte aligned addresses. Other
197 /* We optimize the following alignment tests for the 4-byte
205 beqz a9, 1f /* branch if both are 4-byte aligned */
206 bbsi.l a10, 0, 5f /* branch if one address is odd */
207 j 3f /* one address is 2-byte aligned */
209 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
210 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
213 /* src and dst are both 4-byte aligned */
216 loopgtz a10, 2f
218 beqz a10, 2f
223 EX(10f) l32i a9, a2, 0
224 EX(10f) l32i a8, a2, 4
225 EX(10f) s32i a9, a3, 0
226 EX(10f) s32i a8, a3, 4
229 EX(10f) l32i a9, a2, 8
230 EX(10f) l32i a8, a2, 12
231 EX(10f) s32i a9, a3, 8
232 EX(10f) s32i a8, a3, 12
235 EX(10f) l32i a9, a2, 16
236 EX(10f) l32i a8, a2, 20
237 EX(10f) s32i a9, a3, 16
238 EX(10f) s32i a8, a3, 20
241 EX(10f) l32i a9, a2, 24
242 EX(10f) l32i a8, a2, 28
243 EX(10f) s32i a9, a3, 24
244 EX(10f) s32i a8, a3, 28
253 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
256 loopgtz a10, 3f
258 beqz a10, 3f
260 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
263 EX(10f) l32i a9, a2, 0
264 EX(10f) s32i a9, a3, 0
266 addi a2, a2, 4
267 addi a3, a3, 4
274 to here from the 4-byte alignment case to process, at most,
278 inefficient, so align your addresses to 4-byte boundaries.
287 loopgtz a10, 4f
289 beqz a10, 4f
294 EX(10f) l16ui a9, a2, 0
295 EX(10f) s16i a9, a3, 0
302 4:
304 _bbci.l a4, 0, 8f /* 1-byte chunk */
305 EX(10f) l8ui a9, a2, 0
306 EX(10f) s8i a9, a3, 0
322 loopgtz a10, 6f
324 beqz a10, 6f
329 EX(10f) l8ui a9, a2, 0
330 EX(10f) l8ui a8, a2, 1
331 EX(10f) s8i a9, a3, 0
332 EX(10f) s8i a8, a3, 1
346 j 4b /* process the possible trailing odd byte */