1/* 2 * Copyright (c) 2014 3 * Imagination Technologies Limited. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#ifdef ANDROID_CHANGES 31# include "machine/asm.h" 32# include "machine/regdef.h" 33#elif _LIBC 34# include "machine/asm.h" 35# include "machine/regdef.h" 36#else 37# include <regdef.h> 38# include <sys/asm.h> 39#endif 40 41/* Technically strcmp should not read past the end of the strings being 42 compared. We will read a full word that may contain excess bits beyond 43 the NULL string terminator but unless ENABLE_READAHEAD is set, we will not 44 read the next word after the end of string. Setting ENABLE_READAHEAD will 45 improve performance but is technically illegal based on the definition of 46 strcmp. */ 47#ifdef ENABLE_READAHEAD 48# define DELAY_READ 49#else 50# define DELAY_READ nop 51#endif 52 53/* Testing on a little endian machine showed using CLZ was a 54 performance loss, so we are not turning it on by default. */ 55#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) 56# define USE_CLZ 57#endif 58 59/* Some asm.h files do not have the L macro definition. */ 60#ifndef L 61# if _MIPS_SIM == _ABIO32 62# define L(label) $L ## label 63# else 64# define L(label) .L ## label 65# endif 66#endif 67 68/* Some asm.h files do not have the PTR_ADDIU macro definition. */ 69#ifndef PTR_ADDIU 70# ifdef USE_DOUBLE 71# define PTR_ADDIU daddiu 72# else 73# define PTR_ADDIU addiu 74# endif 75#endif 76 77/* Allow the routine to be named something else if desired. */ 78#ifndef STRCMP_NAME 79# define STRCMP_NAME strcmp 80#endif 81 82#ifdef ANDROID_CHANGES 83LEAF(STRCMP_NAME, 0) 84#else 85LEAF(STRCMP_NAME) 86#endif 87 .set nomips16 88 .set noreorder 89 90 or t0, a0, a1 91 andi t0,0x3 92 bne t0, zero, L(byteloop) 93 94/* Both strings are 4 byte aligned at this point. */ 95 96 lui t8, 0x0101 97 ori t8, t8, 0x0101 98 lui t9, 0x7f7f 99 ori t9, 0x7f7f 100 101#define STRCMP32(OFFSET) \ 102 lw v0, OFFSET(a0); \ 103 lw v1, OFFSET(a1); \ 104 subu t0, v0, t8; \ 105 bne v0, v1, L(worddiff); \ 106 nor t1, v0, t9; \ 107 and t0, t0, t1; \ 108 bne t0, zero, L(returnzero) 109 110L(wordloop): 111 STRCMP32(0) 112 DELAY_READ 113 STRCMP32(4) 114 DELAY_READ 115 STRCMP32(8) 116 DELAY_READ 117 STRCMP32(12) 118 DELAY_READ 119 STRCMP32(16) 120 DELAY_READ 121 STRCMP32(20) 122 DELAY_READ 123 STRCMP32(24) 124 DELAY_READ 125 STRCMP32(28) 126 PTR_ADDIU a0, a0, 32 127 b L(wordloop) 128 PTR_ADDIU a1, a1, 32 129 130L(returnzero): 131 j ra 132 move v0, zero 133 134L(worddiff): 135#ifdef USE_CLZ 136 subu t0, v0, t8 137 nor t1, v0, t9 138 and t1, t0, t1 139 xor t0, v0, v1 140 or t0, t0, t1 141# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 142 wsbh t0, t0 143 rotr t0, t0, 16 144# endif 145 clz t1, t0 146 and t1, 0xf8 147# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 148 neg t1 149 addu t1, 24 150# endif 151 rotrv v0, v0, t1 152 rotrv v1, v1, t1 153 and v0, v0, 0xff 154 and v1, v1, 0xff 155 j ra 156 subu v0, v0, v1 157#else /* USE_CLZ */ 158# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 159 andi t0, v0, 0xff 160 beq t0, zero, L(wexit01) 161 andi t1, v1, 0xff 162 bne t0, t1, L(wexit01) 163 164 srl t8, v0, 8 165 srl t9, v1, 8 166 andi t8, t8, 0xff 167 beq t8, zero, L(wexit89) 168 andi t9, t9, 0xff 169 bne t8, t9, L(wexit89) 170 171 srl t0, v0, 16 172 srl t1, v1, 16 173 andi t0, t0, 0xff 174 beq t0, zero, L(wexit01) 175 andi t1, t1, 0xff 176 bne t0, t1, L(wexit01) 177 178 srl t8, v0, 24 179 srl t9, v1, 24 180# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 181 srl t0, v0, 24 182 beq t0, zero, L(wexit01) 183 srl t1, v1, 24 184 bne t0, t1, L(wexit01) 185 186 srl t8, v0, 16 187 srl t9, v1, 16 188 andi t8, t8, 0xff 189 beq t8, zero, L(wexit89) 190 andi t9, t9, 0xff 191 bne t8, t9, L(wexit89) 192 193 srl t0, v0, 8 194 srl t1, v1, 8 195 andi t0, t0, 0xff 196 beq t0, zero, L(wexit01) 197 andi t1, t1, 0xff 198 bne t0, t1, L(wexit01) 199 200 andi t8, v0, 0xff 201 andi t9, v1, 0xff 202# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 203 204L(wexit89): 205 j ra 206 subu v0, t8, t9 207L(wexit01): 208 j ra 209 subu v0, t0, t1 210#endif /* USE_CLZ */ 211 212/* It might seem better to do the 'beq' instruction between the two 'lbu' 213 instructions so that the nop is not needed but testing showed that this 214 code is actually faster (based on glibc strcmp test). */ 215#define BYTECMP01(OFFSET) \ 216 lbu v0, OFFSET(a0); \ 217 lbu v1, OFFSET(a1); \ 218 beq v0, zero, L(bexit01); \ 219 nop; \ 220 bne v0, v1, L(bexit01) 221 222#define BYTECMP89(OFFSET) \ 223 lbu t8, OFFSET(a0); \ 224 lbu t9, OFFSET(a1); \ 225 beq t8, zero, L(bexit89); \ 226 nop; \ 227 bne t8, t9, L(bexit89) 228 229L(byteloop): 230 BYTECMP01(0) 231 BYTECMP89(1) 232 BYTECMP01(2) 233 BYTECMP89(3) 234 BYTECMP01(4) 235 BYTECMP89(5) 236 BYTECMP01(6) 237 BYTECMP89(7) 238 PTR_ADDIU a0, a0, 8 239 b L(byteloop) 240 PTR_ADDIU a1, a1, 8 241 242L(bexit01): 243 j ra 244 subu v0, v0, v1 245L(bexit89): 246 j ra 247 subu v0, t8, t9 248 249 .set at 250 .set reorder 251 252END(STRCMP_NAME) 253