1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31 .file "memcmp.s" 32#ifdef __PIC 33 .pic 34#endif 35#ifdef __PID 36 .pid 37#endif 38/* 39 * (c) copyright 1988,1993 Intel Corp., all rights reserved 40 */ 41/* 42 procedure memcmp (optimized assembler version for the 80960K series) 43 44 result = memcmp (src1_addr, src2_addr, max_bytes) 45 46 compare the byte array pointed to by src1_addr to the byte array 47 pointed to by src2_addr. Return 0 iff the arrays are equal, -1 iff 48 src1_addr is lexicographically less than src2_addr, and 1 iff it is 49 lexicographically greater. Do not compare more than max_bytes bytes. 50 51 Undefined behavior will occur if the end of either source array 52 is in the last two words of the program's allocated memory space. 53 This is so because memcmp fetches ahead. Disallowing the fetch ahead 54 would impose a severe performance penalty. 55 56 Strategy: 57 58 Fetch the source strings by words and compare the words until either 59 a differing word is found or max_bytes is exhausted. In the former 60 case, move through the words to find the differing byte and return 61 plus or minus one, appropriately. In the latter case, return zero 62 (equality). 63 64 Tactics: 65 66 1) Do NOT try to fetch the words in a word aligned manner because, 67 in my judgement, the performance degradation experienced due to 68 non-aligned accesses does NOT outweigh the time and complexity added 69 by the preamble that would be necessary to assure alignment. This 70 is supported by the intuition that most source arrays (even more 71 true of most big source arrays) will be word aligned to begin with. 72 73 2) Rather than decrementing max_bytes to zero, I calculate the 74 address of the byte after the last byte of the source_1 array, and 75 quit when the source byte pointer passes that. 76*/ 77 78 .globl _memcmp 79 .globl __memcmp 80 .leafproc _memcmp,__memcmp 81 .align 2 82 83_memcmp: 84#ifndef __PIC 85 lda .Lrett,g14 86#else 87 lda .Lrett-(.+8)(ip),g14 88#endif 89__memcmp: 90 mov g14,g13 # preserve return address 91 ldconst 0,g14 # conform to register conventions 92 cmpibge 0,g2,Lequal_exit # quit if max_bytes <= 0 93 addo g0,g2,g2 # calculate byte addr of byte after last in src1 94 95.Lwloop: 96 cmpo g0,g2 97 ld (g0), g5 # fetch word of source_1 98 bge Lequal_exit # quit (equal) if max_bytes exhausted 99 ld (g1), g3 # fetch word of source_2 100 addo 4,g0,g0 # post-increment source_1 byte ptr 101 addo 4,g1,g1 # post-increment source_2 byte ptr 102 cmpobe g5,g3,.Lwloop # branch if source words are equal 103 104 ldconst 0xff,g4 # byte extraction mask 105 subo 4,g0,g0 # back up src1 pointer 106 107.Lcloop: and g4,g5,g7 # extract and compare individual bytes 108 and g4,g3,g6 109 cmpobne g7,g6,.diff # branch if they are different 110 shlo 8,g4,g4 # position mask for next extraction 111 addo 1,g0,g0 112 cmpobl g0,g2,.Lcloop # quit if max_bytes is exhausted 113 114Lequal_exit: 115 mov 0,g0 116 bx (g13) 117.Lrett: 118 ret 119 120.diff: bl .neg # arrays differ at current byte. 121 /* return 1 or -1 appropriately */ 122 mov 1,g0 123 bx (g13) 124.neg: subi 1,0,g0 125.Lexit: 126 bx (g13) 127 128/* end or memcmp */ 129