1/* 2 * Copyright (c) 2014 3 * Imagination Technologies Limited. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <picolibc.h> 31 32#ifdef ANDROID_CHANGES 33# include "machine/asm.h" 34# include "machine/regdef.h" 35#elif _LIBC 36# include "machine/asm.h" 37# include "machine/regdef.h" 38#else 39# include <regdef.h> 40# include <sys/asm.h> 41#endif 42 43/* Technically strcmp should not read past the end of the strings being 44 compared. We will read a full word that may contain excess bits beyond 45 the NULL string terminator but unless ENABLE_READAHEAD is set, we will not 46 read the next word after the end of string. Setting ENABLE_READAHEAD will 47 improve performance but is technically illegal based on the definition of 48 strcmp. */ 49#ifdef ENABLE_READAHEAD 50# define DELAY_READ 51#else 52# define DELAY_READ nop 53#endif 54 55/* Testing on a little endian machine showed using CLZ was a 56 performance loss, so we are not turning it on by default. */ 57#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) 58# define USE_CLZ 59#endif 60 61/* Some asm.h files do not have the L macro definition. */ 62#ifndef L 63# if _MIPS_SIM == _ABIO32 64# define L(label) $L ## label 65# else 66# define L(label) .L ## label 67# endif 68#endif 69 70/* Some asm.h files do not have the PTR_ADDIU macro definition. */ 71#ifndef PTR_ADDIU 72# ifdef USE_DOUBLE 73# define PTR_ADDIU daddiu 74# else 75# define PTR_ADDIU addiu 76# endif 77#endif 78 79/* Allow the routine to be named something else if desired. */ 80#ifndef STRCMP_NAME 81# define STRCMP_NAME strcmp 82#endif 83 84#ifdef ANDROID_CHANGES 85LEAF(STRCMP_NAME, 0) 86#else 87LEAF(STRCMP_NAME) 88#endif 89 .set nomips16 90 .set noreorder 91 92 or t0, a0, a1 93 andi t0,0x3 94 bne t0, zero, L(byteloop) 95 96/* Both strings are 4 byte aligned at this point. */ 97 98 lui t8, 0x0101 99 ori t8, t8, 0x0101 100 lui t9, 0x7f7f 101 ori t9, 0x7f7f 102 103#define STRCMP32(OFFSET) \ 104 lw v0, OFFSET(a0); \ 105 lw v1, OFFSET(a1); \ 106 subu t0, v0, t8; \ 107 bne v0, v1, L(worddiff); \ 108 nor t1, v0, t9; \ 109 and t0, t0, t1; \ 110 bne t0, zero, L(returnzero) 111 112L(wordloop): 113 STRCMP32(0) 114 DELAY_READ 115 STRCMP32(4) 116 DELAY_READ 117 STRCMP32(8) 118 DELAY_READ 119 STRCMP32(12) 120 DELAY_READ 121 STRCMP32(16) 122 DELAY_READ 123 STRCMP32(20) 124 DELAY_READ 125 STRCMP32(24) 126 DELAY_READ 127 STRCMP32(28) 128 PTR_ADDIU a0, a0, 32 129 b L(wordloop) 130 PTR_ADDIU a1, a1, 32 131 132L(returnzero): 133 j ra 134 move v0, zero 135 136L(worddiff): 137#ifdef USE_CLZ 138 subu t0, v0, t8 139 nor t1, v0, t9 140 and t1, t0, t1 141 xor t0, v0, v1 142 or t0, t0, t1 143# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 144 wsbh t0, t0 145 rotr t0, t0, 16 146# endif 147 clz t1, t0 148 and t1, 0xf8 149# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 150 neg t1 151 addu t1, 24 152# endif 153 rotrv v0, v0, t1 154 rotrv v1, v1, t1 155 and v0, v0, 0xff 156 and v1, v1, 0xff 157 j ra 158 subu v0, v0, v1 159#else /* USE_CLZ */ 160# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 161 andi t0, v0, 0xff 162 beq t0, zero, L(wexit01) 163 andi t1, v1, 0xff 164 bne t0, t1, L(wexit01) 165 166 srl t8, v0, 8 167 srl t9, v1, 8 168 andi t8, t8, 0xff 169 beq t8, zero, L(wexit89) 170 andi t9, t9, 0xff 171 bne t8, t9, L(wexit89) 172 173 srl t0, v0, 16 174 srl t1, v1, 16 175 andi t0, t0, 0xff 176 beq t0, zero, L(wexit01) 177 andi t1, t1, 0xff 178 bne t0, t1, L(wexit01) 179 180 srl t8, v0, 24 181 srl t9, v1, 24 182# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 183 srl t0, v0, 24 184 beq t0, zero, L(wexit01) 185 srl t1, v1, 24 186 bne t0, t1, L(wexit01) 187 188 srl t8, v0, 16 189 srl t9, v1, 16 190 andi t8, t8, 0xff 191 beq t8, zero, L(wexit89) 192 andi t9, t9, 0xff 193 bne t8, t9, L(wexit89) 194 195 srl t0, v0, 8 196 srl t1, v1, 8 197 andi t0, t0, 0xff 198 beq t0, zero, L(wexit01) 199 andi t1, t1, 0xff 200 bne t0, t1, L(wexit01) 201 202 andi t8, v0, 0xff 203 andi t9, v1, 0xff 204# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 205 206L(wexit89): 207 j ra 208 subu v0, t8, t9 209L(wexit01): 210 j ra 211 subu v0, t0, t1 212#endif /* USE_CLZ */ 213 214/* It might seem better to do the 'beq' instruction between the two 'lbu' 215 instructions so that the nop is not needed but testing showed that this 216 code is actually faster (based on glibc strcmp test). */ 217#define BYTECMP01(OFFSET) \ 218 lbu v0, OFFSET(a0); \ 219 lbu v1, OFFSET(a1); \ 220 beq v0, zero, L(bexit01); \ 221 nop; \ 222 bne v0, v1, L(bexit01) 223 224#define BYTECMP89(OFFSET) \ 225 lbu t8, OFFSET(a0); \ 226 lbu t9, OFFSET(a1); \ 227 beq t8, zero, L(bexit89); \ 228 nop; \ 229 bne t8, t9, L(bexit89) 230 231L(byteloop): 232 BYTECMP01(0) 233 BYTECMP89(1) 234 BYTECMP01(2) 235 BYTECMP89(3) 236 BYTECMP01(4) 237 BYTECMP89(5) 238 BYTECMP01(6) 239 BYTECMP89(7) 240 PTR_ADDIU a0, a0, 8 241 b L(byteloop) 242 PTR_ADDIU a1, a1, 8 243 244L(bexit01): 245 j ra 246 subu v0, v0, v1 247L(bexit89): 248 j ra 249 subu v0, t8, t9 250 251 .set at 252 .set reorder 253 254END(STRCMP_NAME) 255