Lines Matching +full:5 +full:- +full:byte
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
51 srli a5, a3, 5 /* 32-byte chunks */
56 slli a5, a5, 5
57 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
101 5:
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
105 slli a6, a6, 8 /* load byte into bits 8..15 */
112 /* uncommon case, buf is 2-byte aligned */
118 bnez a5, 8f /* branch if 1-byte aligned */
123 addi a3, a3, -2 /* adjust len */
124 j 1b /* now buf is 4-byte aligned */
126 /* case: odd-byte aligned, len > 1
131 srli a5, a3, 2 /* 4-byte chunks */
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
169 j 5b /* branch to handle the remaining byte */
187 This function is optimized for 4-byte aligned addresses. Other
194 movi a5, -1
197 /* We optimize the following alignment tests for the 4-byte
199 (commented out below). However, both labels 5: and 3: are out
205 beqz a9, 1f /* branch if both are 4-byte aligned */
206 bbsi.l a10, 0, 5f /* branch if one address is odd */
207 j 3f /* one address is 2-byte aligned */
209 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
210 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
213 /* src and dst are both 4-byte aligned */
214 srli a10, a4, 5 /* 32-byte chunks */
219 slli a10, a10, 5
220 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
253 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
254 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
260 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
274 to here from the 4-byte alignment case to process, at most,
275 one 2-byte chunk. (2) It branches to here from above if
276 either src or dst is 2-byte aligned, and we process all bytes
277 here, except for perhaps a trailing odd byte. It's
278 inefficient, so align your addresses to 4-byte boundaries.
285 srli a10, a4, 1 /* 2-byte chunks */
291 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
303 /* This section processes a possible trailing odd byte. */
304 _bbci.l a4, 0, 8f /* 1-byte chunk */
308 slli a9, a9, 8 /* shift byte to bits 8..15 */
315 5:
317 process all bytes using 8-bit accesses. Grossly inefficient,
320 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
326 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
334 slli a9, a9, 8 /* combine into a single 16-bit value */
346 j 4b /* process the possible trailing odd byte */