1/* memcmp - compare memory 2 3 Copyright (c) 2018 Linaro Limited 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of the Linaro nor the 14 names of its contributors may be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 28 29/* 30 * Copyright (c) 2017 ARM Ltd 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. The name of the company may not be used to endorse or promote 42 * products derived from this software without specific prior written 43 * permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 46 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 47 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 48 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 49 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 50 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 51 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 52 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 53 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 54 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 55 */ 56 57#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 58/* See memcmp-stub.c */ 59#else 60 61/* Assumptions: 62 * 63 * ARMv8-a, AArch64, unaligned accesses. 64 */ 65 66#define L(l) .L ## l 67 68/* Parameters and result. */ 69#define src1 x0 70#define src2 x1 71#define limit x2 72#define result w0 73 74/* Internal variables. */ 75#define data1 x3 76#define data1w w3 77#define data1h x4 78#define data2 x5 79#define data2w w5 80#define data2h x6 81#define tmp1 x7 82#define tmp2 x8 83 84 .macro def_fn f p2align=0 85 .text 86 .p2align \p2align 87 .global \f 88 .type \f, %function 89\f: 90 .endm 91 92def_fn memcmp p2align=6 93 subs limit, limit, 8 94 b.lo L(less8) 95 96 ldr data1, [src1], 8 97 ldr data2, [src2], 8 98 cmp data1, data2 99 b.ne L(return) 100 101 subs limit, limit, 8 102 b.gt L(more16) 103 104 ldr data1, [src1, limit] 105 ldr data2, [src2, limit] 106 b L(return) 107 108L(more16): 109 ldr data1, [src1], 8 110 ldr data2, [src2], 8 111 cmp data1, data2 112 bne L(return) 113 114 /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 115 strings. */ 116 subs limit, limit, 16 117 b.ls L(last_bytes) 118 119 /* We overlap loads between 0-32 bytes at either side of SRC1 when we 120 try to align, so limit it only to strings larger than 128 bytes. */ 121 cmp limit, 96 122 b.ls L(loop16) 123 124 /* Align src1 and adjust src2 with bytes not yet done. */ 125 and tmp1, src1, 15 126 add limit, limit, tmp1 127 sub src1, src1, tmp1 128 sub src2, src2, tmp1 129 130 /* Loop performing 16 bytes per iteration using aligned src1. 131 Limit is pre-decremented by 16 and must be larger than zero. 132 Exit if <= 16 bytes left to do or if the data is not equal. */ 133 .p2align 4 134L(loop16): 135 ldp data1, data1h, [src1], 16 136 ldp data2, data2h, [src2], 16 137 subs limit, limit, 16 138 ccmp data1, data2, 0, hi 139 ccmp data1h, data2h, 0, eq 140 b.eq L(loop16) 141 142 cmp data1, data2 143 bne L(return) 144 mov data1, data1h 145 mov data2, data2h 146 cmp data1, data2 147 bne L(return) 148 149 /* Compare last 1-16 bytes using unaligned access. */ 150L(last_bytes): 151 add src1, src1, limit 152 add src2, src2, limit 153 ldp data1, data1h, [src1] 154 ldp data2, data2h, [src2] 155 cmp data1, data2 156 bne L(return) 157 mov data1, data1h 158 mov data2, data2h 159 cmp data1, data2 160 161 /* Compare data bytes and set return value to 0, -1 or 1. */ 162L(return): 163#ifndef __AARCH64EB__ 164 rev data1, data1 165 rev data2, data2 166#endif 167 cmp data1, data2 168L(ret_eq): 169 cset result, ne 170 cneg result, result, lo 171 ret 172 173 .p2align 4 174 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 175L(less8): 176 adds limit, limit, 4 177 b.lo L(less4) 178 ldr data1w, [src1], 4 179 ldr data2w, [src2], 4 180 cmp data1w, data2w 181 b.ne L(return) 182 sub limit, limit, 4 183L(less4): 184 adds limit, limit, 4 185 beq L(ret_eq) 186L(byte_loop): 187 ldrb data1w, [src1], 1 188 ldrb data2w, [src2], 1 189 subs limit, limit, 1 190 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 191 b.eq L(byte_loop) 192 sub result, data1w, data2w 193 ret 194 195 .size memcmp, . - memcmp 196#endif 197