1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31 .file "memcm_ca.s" 32#ifdef __PIC 33 .pic 34#endif 35#ifdef __PID 36 .pid 37#endif 38/* 39 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved 40 */ 41 42/* 43 procedure memcmp (optimized assembler version for the CA) 44 45 result = memcmp (src1_addr, src2_addr, max_bytes) 46 47 compare the byte array pointed to by src1_addr to the byte array 48 pointed to by src2_addr. Return 0 iff the arrays are equal, -1 if 49 src1_addr is lexicly less than src2_addr, and 1 if it is lexicly 50 greater. Do not compare more than max_bytes bytes. 51 52 Undefined behavior will occur if the end of either source array 53 is in the last word of the program's allocated memory space. This 54 is so because, in several cases, memcmp will fetch ahead one word. 55 Disallowing the fetch ahead would impose a severe performance penalty. 56 57 This program handles five cases: 58 59 1) both arguments start on a word boundary 60 2) neither are word aligned, but they are offset by the same amount 61 3) source1 is word aligned, source2 is not 62 4) source2 is word aligned, source1 is not 63 5) neither is word aligned, and they are offset by differing amounts 64 65 At the time of this writing, only g0 thru g7 and g14 are available 66 for use in this leafproc; other registers would have to be saved and 67 restored. These nine registers are sufficient to implement the routine. 68 The registers are used as follows: 69 70 g0 original src1 ptr; extracted word; return result 71 g1 src2 ptr; byt extraction mask 72 g2 maximum number of bytes to compare 73 g3 src2 word ptr 74 Little endian 75 g4 lsw of src1 76 g5 msw of src1 77 g6 src2 word 78 g7 src1 word ptr 79 Big endian 80 g4 msw of src1 81 g5 lsw of src1 82 g6 src1 word ptr 83 g7 src2 word 84 g13 return address 85 g14 shift count 86*/ 87 88#if __i960_BIG_ENDIAN__ 89#define MSW g4 90#define LSW g5 91#define SRC1 g6 92#define SRC2 g7 93#else 94#define LSW g4 95#define MSW g5 96#define SRC2 g6 97#define SRC1 g7 98#endif 99 100 .globl _memcmp 101 .globl __memcmp 102 .leafproc _memcmp, __memcmp 103 .align 2 104_memcmp: 105#ifndef __PIC 106 lda Lrett,g14 107#else 108 lda Lrett-(.+8)(ip),g14 109#endif 110__memcmp: 111Lrestart: 112#if __i960_BIG_ENDIAN__ 113 subo 1,g0,SRC1 114 notand SRC1,3,SRC1 # extract word addr of start of src1 115#else 116 notand g0,3,SRC1 # extract word addr of start of src1 117#endif 118 lda (g14),g13 # preserve return address 119 cmpibge.f 0,g2,Lequal_exit # return equality if number bytes 0 120 notand g1,3,g3 # extract word addr of start of src2 121 ld (SRC1),LSW # fetch word with at least first byte of src1 122 cmpo g3,g1 # check alignment of src2 123 ld 4(SRC1),MSW # fetch second word of src1 124 shlo 3,g0,g14 # compute shift count for src1 125#if __i960_BIG_ENDIAN__ 126 subo g14,0,g14 # adjust shift count for big endian. 127#endif 128 ld (g3),SRC2 # fetch word with at least first byte of src2 129 eshro g14,g4,LSW # extract word of src1 130 lda 8(SRC1),SRC1 # advance src1 word addr 131 bne.f Lsrc2_unaligned # branch if src2 is NOT word aligned 132 133 mov LSW,g0 # at least src2 is word aligned 134 135 lda 0xff,g1 136 137Lwloop: # word comparing loop 138 cmpo SRC2,g0 # compare src1 and src2 words 139 lda 4(g3),g3 # pre-increment src2 addr 140 mov MSW,LSW # move msw of src1 to lsw 141 ld (SRC1),MSW # pre-fetch next msw of src1 142 subi 4,g2,g2 # decrement maximum byte count 143 bne.f Lcloop # branch if src1 and src2 unequal 144 cmpi 0,g2 145 ld (g3),SRC2 # pre-fetch next word of src2 146 eshro g14,g4,g0 # extract word of src1 147 lda 4(SRC1),SRC1 # post-increment src1 addr 148 bl.t Lwloop # branch if max_bytes not reached yet 149 150 b Lequal_exit # strings were equal up through max_bytes 151 152Lcloop_setup: # setup for coming from Lsrc2_unaligned 153 mov LSW,g0 # restore extracted src1 word 154 subo 4,g2,g2 # make up for later re-incrementing 155 lda 0xff,g1 # byte extraction mask 156 157Lcloop: # character comparing loop 158#if __i960_BIG_ENDIAN__ 159 rotate 24,g1,g1 # shift mask for next byte 160#endif 161 and SRC2,g1,g3 # extract next char of src2 162 and g0,g1,LSW # extract next char of src1 163 cmpobne.f LSW,g3,.diff # check for equality 164#if ! __i960_BIG_ENDIAN__ 165 shlo 8,g1,g1 # shift mask for next byte 166#endif 167 subi 1,g2,g2 # decrement character counter 168 b Lcloop # branch if null not reached 169 170 171Lequal_exit: # words are equal up thru null byte 172 mov 0,g14 # conform to register conventions 173 lda 0,g0 # return zero, indicating equality 174 bx (g13) # return 175Lrett: 176 ret 177 178.diff: 179 addo 4,g2,g2 # to make up for extra decrement in loop 180 lda 0,g14 181 bl Lless_than_exit 182Lgreater_than_exit: 183 cmpibge.f 0,g2,Lequal_exit # branch if difference is beyond max_bytes 184 mov 1,g0 185 bx (g13) # g0 = 1 (src1 > src2) 186Lless_than_exit: 187 cmpibge.f 0,g2,Lequal_exit # branch if difference is beyond max_bytes 188 subi 1,0,g0 189 bx (g13) # g0 = -1 (src1 < src2) 190 191Lsrc2_unaligned: 192 notor g1,3,g14 # first step in computing new src1 ptr 193 ld 4(g3),SRC1 # fetch second word of src2 194 shlo 3,g1,MSW # compute shift count for src2 195#if __i960_BIG_ENDIAN__ 196 subo MSW,0,MSW 197#endif 198 eshro MSW,g6,SRC2 # extract word of src2 199 cmpo LSW,SRC2 # compare src1 and src2 words 200 lda 4(g3),g1 # set new src2 ptr 201 bne.f Lcloop_setup # first four bytes differ 202 subo g14,g0,g0 # second (final) step in computing new src1 ptr 203 addi g14,g2,g2 # compute new max_bytes too 204 lda (g13),g14 # prepare return pointer for Lrestart 205 b Lrestart # continue with both string fetches shifted 206