1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31#include <picolibc.h> 32 33 .file "memcm_ca.s" 34#ifdef __PIC 35 .pic 36#endif 37#ifdef __PID 38 .pid 39#endif 40/* 41 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved 42 */ 43 44/* 45 procedure memcmp (optimized assembler version for the CA) 46 47 result = memcmp (src1_addr, src2_addr, max_bytes) 48 49 compare the byte array pointed to by src1_addr to the byte array 50 pointed to by src2_addr. Return 0 iff the arrays are equal, -1 if 51 src1_addr is lexicly less than src2_addr, and 1 if it is lexicly 52 greater. Do not compare more than max_bytes bytes. 53 54 Undefined behavior will occur if the end of either source array 55 is in the last word of the program's allocated memory space. This 56 is so because, in several cases, memcmp will fetch ahead one word. 57 Disallowing the fetch ahead would impose a severe performance penalty. 58 59 This program handles five cases: 60 61 1) both arguments start on a word boundary 62 2) neither are word aligned, but they are offset by the same amount 63 3) source1 is word aligned, source2 is not 64 4) source2 is word aligned, source1 is not 65 5) neither is word aligned, and they are offset by differing amounts 66 67 At the time of this writing, only g0 thru g7 and g14 are available 68 for use in this leafproc; other registers would have to be saved and 69 restored. These nine registers are sufficient to implement the routine. 70 The registers are used as follows: 71 72 g0 original src1 ptr; extracted word; return result 73 g1 src2 ptr; byt extraction mask 74 g2 maximum number of bytes to compare 75 g3 src2 word ptr 76 Little endian 77 g4 lsw of src1 78 g5 msw of src1 79 g6 src2 word 80 g7 src1 word ptr 81 Big endian 82 g4 msw of src1 83 g5 lsw of src1 84 g6 src1 word ptr 85 g7 src2 word 86 g13 return address 87 g14 shift count 88*/ 89 90#if __i960_BIG_ENDIAN__ 91#define MSW g4 92#define LSW g5 93#define SRC1 g6 94#define SRC2 g7 95#else 96#define LSW g4 97#define MSW g5 98#define SRC2 g6 99#define SRC1 g7 100#endif 101 102 .globl _memcmp 103 .globl __memcmp 104 .leafproc _memcmp, __memcmp 105 .align 2 106_memcmp: 107#ifndef __PIC 108 lda Lrett,g14 109#else 110 lda Lrett-(.+8)(ip),g14 111#endif 112__memcmp: 113Lrestart: 114#if __i960_BIG_ENDIAN__ 115 subo 1,g0,SRC1 116 notand SRC1,3,SRC1 # extract word addr of start of src1 117#else 118 notand g0,3,SRC1 # extract word addr of start of src1 119#endif 120 lda (g14),g13 # preserve return address 121 cmpibge.f 0,g2,Lequal_exit # return equality if number bytes 0 122 notand g1,3,g3 # extract word addr of start of src2 123 ld (SRC1),LSW # fetch word with at least first byte of src1 124 cmpo g3,g1 # check alignment of src2 125 ld 4(SRC1),MSW # fetch second word of src1 126 shlo 3,g0,g14 # compute shift count for src1 127#if __i960_BIG_ENDIAN__ 128 subo g14,0,g14 # adjust shift count for big endian. 129#endif 130 ld (g3),SRC2 # fetch word with at least first byte of src2 131 eshro g14,g4,LSW # extract word of src1 132 lda 8(SRC1),SRC1 # advance src1 word addr 133 bne.f Lsrc2_unaligned # branch if src2 is NOT word aligned 134 135 mov LSW,g0 # at least src2 is word aligned 136 137 lda 0xff,g1 138 139Lwloop: # word comparing loop 140 cmpo SRC2,g0 # compare src1 and src2 words 141 lda 4(g3),g3 # pre-increment src2 addr 142 mov MSW,LSW # move msw of src1 to lsw 143 ld (SRC1),MSW # pre-fetch next msw of src1 144 subi 4,g2,g2 # decrement maximum byte count 145 bne.f Lcloop # branch if src1 and src2 unequal 146 cmpi 0,g2 147 ld (g3),SRC2 # pre-fetch next word of src2 148 eshro g14,g4,g0 # extract word of src1 149 lda 4(SRC1),SRC1 # post-increment src1 addr 150 bl.t Lwloop # branch if max_bytes not reached yet 151 152 b Lequal_exit # strings were equal up through max_bytes 153 154Lcloop_setup: # setup for coming from Lsrc2_unaligned 155 mov LSW,g0 # restore extracted src1 word 156 subo 4,g2,g2 # make up for later re-incrementing 157 lda 0xff,g1 # byte extraction mask 158 159Lcloop: # character comparing loop 160#if __i960_BIG_ENDIAN__ 161 rotate 24,g1,g1 # shift mask for next byte 162#endif 163 and SRC2,g1,g3 # extract next char of src2 164 and g0,g1,LSW # extract next char of src1 165 cmpobne.f LSW,g3,.diff # check for equality 166#if ! __i960_BIG_ENDIAN__ 167 shlo 8,g1,g1 # shift mask for next byte 168#endif 169 subi 1,g2,g2 # decrement character counter 170 b Lcloop # branch if null not reached 171 172 173Lequal_exit: # words are equal up thru null byte 174 mov 0,g14 # conform to register conventions 175 lda 0,g0 # return zero, indicating equality 176 bx (g13) # return 177Lrett: 178 ret 179 180.diff: 181 addo 4,g2,g2 # to make up for extra decrement in loop 182 lda 0,g14 183 bl Lless_than_exit 184Lgreater_than_exit: 185 cmpibge.f 0,g2,Lequal_exit # branch if difference is beyond max_bytes 186 mov 1,g0 187 bx (g13) # g0 = 1 (src1 > src2) 188Lless_than_exit: 189 cmpibge.f 0,g2,Lequal_exit # branch if difference is beyond max_bytes 190 subi 1,0,g0 191 bx (g13) # g0 = -1 (src1 < src2) 192 193Lsrc2_unaligned: 194 notor g1,3,g14 # first step in computing new src1 ptr 195 ld 4(g3),SRC1 # fetch second word of src2 196 shlo 3,g1,MSW # compute shift count for src2 197#if __i960_BIG_ENDIAN__ 198 subo MSW,0,MSW 199#endif 200 eshro MSW,g6,SRC2 # extract word of src2 201 cmpo LSW,SRC2 # compare src1 and src2 words 202 lda 4(g3),g1 # set new src2 ptr 203 bne.f Lcloop_setup # first four bytes differ 204 subo g14,g0,g0 # second (final) step in computing new src1 ptr 205 addi g14,g2,g2 # compute new max_bytes too 206 lda (g13),g14 # prepare return pointer for Lrestart 207 b Lrestart # continue with both string fetches shifted 208