1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31#include <picolibc.h> 32 33 .file "sncat_ca.s" 34#ifdef __PIC 35 .pic 36#endif 37#ifdef __PID 38 .pid 39#endif 40/* 41 * (c) copyright 1988,1993 Intel Corp., all rights reserved 42 */ 43 44/* 45 procedure strncat (optimized assembler version for the CA) 46 47 dest_addr = strncat (dest_addr, src_addr, max_bytes) 48 49 append the null terminated string pointed to by src_addr to the null 50 terminated string pointed to by dest_addr. Return the original 51 dest_addr. If the source string is longer than max_bytes, then 52 append only max_bytes bytes, and tack on a null byte on the end 53 54 This routine will fail if the source and destination string 55 overlap (in particular, if the end of the source is overlapped 56 by the beginning of the destination). The behavior is undefined. 57 This is acceptable according to the draft C standard. 58 59 Undefined behavior will also occur if the end of the source string 60 (i.e. the terminating null byte) is in the last word of the program's 61 allocated memory space. This is so because, in several cases, strncat 62 will fetch ahead one word. Disallowing the fetch ahead would impose 63 a severe performance penalty. 64 65 This program handles five cases: 66 67 1) both arguments start on a word boundary 68 2) neither are word aligned, but they are offset by the same amount 69 3) source is word aligned, destination is not 70 4) destination is word aligned, source is not 71 5) neither is word aligned, and they are offset by differing amounts 72 73 At the time of this writing, only g0 thru g7 and g13 are available 74 for use in this leafproc; other registers would have to be saved and 75 restored. These nine registers, plus tricky use of g14 are sufficient 76 to implement the routine. The registers are used as follows: 77 78 g0 original dest ptr; not modified, so that it may be returned. 79 g1 src ptr; shift count 80 g2 max_bytes 81 g3 src ptr (word aligned) 82 g4 dest ptr (word aligned) 83 g5 0xff -- byte extraction mask 84 Little endian: 85 g6 lsw of double word for extraction of 4 bytes 86 g7 msw of double word for extraction of 4 bytes 87 Big endian: 88 g6 msw of double word for extraction of 4 bytes 89 g7 lsw of double word for extraction of 4 bytes 90 g13 return address 91 g14 byte extracted. 92*/ 93 94#if __i960_BIG_ENDIAN__ 95#define MSW g6 96#define LSW g7 97#else 98#define LSW g6 99#define MSW g7 100#endif 101 102 .globl _strncat 103 .globl __strncat 104 .leafproc _strncat, __strncat 105 .align 2 106_strncat: 107#ifndef __PIC 108 lda Lrett,g14 109#else 110 lda Lrett-(.+8)(ip),g14 111#endif 112__strncat: 113 notand g0,3,g4 # extract word addr of start of dest 114 lda (g14),g13 # preserve return address 115 cmpibge.f 0,g2,Lexit_code # Lexit if number of bytes to move is <= zero. 116 and g0,3,LSW # extract byte offset of dest 117 ld (g4),MSW # fetch word containing at least first byte 118 shlo 3,LSW,g14 # get shift count for making mask for first word 119 subi 1,0,LSW # mask initially all ones 120#if __i960_BIG_ENDIAN__ 121 shro g14,LSW,LSW # get mask for bytes needed from first word 122#else 123 shlo g14,LSW,LSW # get mask for bytes needed from first word 124#endif 125 notor MSW,LSW,MSW # set unneeded bytes to all ones 126 lda 0xff,g5 # byte extraction mask 127Lsearch_for_word_with_null: 128 scanbyte 0,MSW # check for null byte 129 lda 4(g4),g4 # post-increment dest word pointer 130 mov MSW,LSW # keep a copy of current word 131 ld (g4),MSW # fetch next word of dest 132 bno.t Lsearch_for_word_with_null # branch if null not found yet 133#if __i960_BIG_ENDIAN__ 134 shro 24,LSW,g14 # extract byte 135#else 136 and g5,LSW,g14 # extract byte 137#endif 138 cmpo 0,g14 # branch if null is first byte of word 139 subo 4,g4,g4 # move dest word ptr to word with null 140 notand g1,3,g3 # extract word addr of start of src 141 bne.t Lsearch_for_null 142 143Lcase_14: 144 cmpo g1,g3 # check alignment of source 145 ld (g3),LSW # fetch first word of source 146 shlo 3,g1,g14 # compute shift count 147 lda 4(g3),g3 # post-increment src addr 148 bne.f Lcase_4 # branch if source is unaligned 149Lcase_1: 150Lcase_1_wloop: # word copying loop 151 cmpi g2,4 # check for fewer than four bytes to move 152 lda (LSW),g1 # keep a copy of the src word 153 bl.f Lcase_1_cloop # branch if fewer than four bytes to copy 154 scanbyte 0,g1 # check for null byte in src word 155 ld (g3),LSW # pre-fetch next word of src 156 addo 4,g3,g3 # post-increment src addr 157 bo.f Lcase_1_cloop # branch if word contains null byte 158 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 159 st g1,(g4) # store word in dest string 160 addo 4,g4,g4 # post-increment dest addr 161 b Lcase_1_wloop 162 163Lcase_3_cloop: 164Lcase_1_cloop: # character copying loop (max_bytes <= 3) 165 cmpdeci 0,g2,g2 # is max_bytes exhausted? 166#if __i960_BIG_ENDIAN__ 167 rotate 8,g1,g1 # move next byte into position for extraction 168#endif 169 and g5,g1,g14 # extract next char 170 be.f Lstore_null # if max_bytes is exhausted, store null and quit 171 cmpo 0,g14 # check for null byte 172 stob g14,(g4) # store the byte in dest 173#if ! __i960_BIG_ENDIAN__ 174 shro 8,g1,g1 # move next byte into position for extraction 175#endif 176 lda 1(g4),g4 # post-increment dest byte addr 177 bne.t Lcase_1_cloop # branch if null not reached 178 bx (g13) # Lexit (g14 == 0) 179 180Lstore_null: 181 mov 0,g14 # store null, and set g14 to zero 182 stob g14,(g4) 183 bx (g13) 184 185 186Lsearch_for_null: 187#if __i960_BIG_ENDIAN__ 188 shlo 8,LSW,LSW # check next byte 189 shro 24,LSW,g14 190#else 191 shlo 8,g5,g5 # move mask up to next byte 192 and g5,LSW,g14 # extract byte 193#endif 194 lda 1(g4),g4 # move dest byte ptr to next byte 195 cmpobne.t 0,g14,Lsearch_for_null # branch if null is not yet found 196 197Lcase_235: 198 cmpo g1,g3 # check alignment of src 199 ld (g3),LSW # pre-fetch word with start of src 200 and 3,g1,g1 # compute shift count 201 lda 0xff,g5 # load mask for byte extraction 202 shlo 3,g1,g14 203 lda 4(g3),g3 # post-increment src word counter 204 be.t Lcase_3 # branch if src is word aligned 205 and g4,3,MSW # extract byte offset for dest string 206 cmpo MSW,g1 # < indicates first word of dest has more bytes 207 /* than first word of source. */ 208 ld (g3),MSW # fetch second word of src 209#if __i960_BIG_ENDIAN__ 210 subo g14,0,g14 # adjust shift count for big endian 211#endif 212 eshro g14,g6,g5 # extract four bytes 213#if __i960_BIG_ENDIAN__ 214 bge.f 1f 215#else 216 bg.f 1f 217#endif 218 mov MSW,LSW 219 lda 4(g3),g3 # move src word addr to second word boundary 2201: 221 mov g5,MSW 222 lda 0xff,g5 223 b Lcase_25 224 225Lcase_3: # src is word aligned; dest is not 226 mov LSW,MSW # make copy of first word of src 227 lda 32,g14 # initialize shift count to zero (mod 32) 228Lcase_25: 229 230Lcase_3_cloop_at_start: # character copying loop for start of dest str 231 cmpdeci 0,g2,g2 # is max_bytes exhausted? 232#if __i960_BIG_ENDIAN__ 233 shro 24,MSW,g5 # extract next char 234#else 235 and g5,MSW,g5 # extract next char 236#endif 237 be.f Lstore_null # Lexit if max_bytes is exhausted 238 cmpo 0,g5 # check for null byte 239 stob g5,(g4) # store the byte in dest 240 addo 1,g4,g4 # post-increment dest ptr 241 lda 0xff,g5 # re-initialize byte extraction mask 242 notand g4,3,g1 # extract word address 243 be.t Lexit_code # Lexit if null byte reached 244 cmpo g1,g4 # have we reached word boundary in dest yet? 245#if __i960_BIG_ENDIAN__ 246 lda -8(g14),g14 # augment the shift counter 247 rotate 8,MSW,MSW # move next byte into position for extraction 248#else 249 lda 8(g14),g14 # augment the shift counter 250 shro 8,MSW,MSW # move next byte into position for extraction 251#endif 252 bne.t Lcase_3_cloop_at_start # branch if reached word boundary? 253 254#if __i960_BIG_ENDIAN__ 255 cmpo 0,g14 256 ld (g3),MSW # fetch msw of operand for double shift 257 bne Lcase_3_wloop # branch if src is still unaligned. 258 259Lcase_3_wloop2: 260 cmpi g2,4 # less than four bytes to move? 261 mov LSW,g1 # extract 4 bytes of src 262 lda 4(g3),g3 # post-increment src word addr 263 bl.f Lcase_3_cloop # branch if < four bytes left to move 264 scanbyte 0,g1 # check for null byte 265 mov MSW,LSW # move msw to lsw 266 ld (g3),MSW # pre-fetch msw of operand for double shift 267 bo.f Lcase_3_cloop # branch if word contains null byte 268 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 269 st g1,(g4) # store 4 bytes to dest 270 addo 4,g4,g4 # post-increment dest ptr 271 b Lcase_3_wloop2 272Lcase_4: 273 subo g14,0,g14 # adjust shift count for big endian 274#else 275Lcase_4: 276#endif 277 278 ld (g3),MSW # fetch msw of operand for double shift 279 280Lcase_3_wloop: 281 cmpi g2,4 # less than four bytes to move? 282 eshro g14,g6,g1 # extract 4 bytes of src 283 lda 4(g3),g3 # post-increment src word addr 284 bl.f Lcase_3_cloop # branch if < four bytes left to move 285 scanbyte 0,g1 # check for null byte 286 mov MSW,LSW # move msw to lsw 287 ld (g3),MSW # pre-fetch msw of operand for double shift 288 bo.f Lcase_3_cloop # branch if word contains null byte 289 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 290 st g1,(g4) # store 4 bytes to dest 291 addo 4,g4,g4 # post-increment dest ptr 292 b Lcase_3_wloop 293 294 295Lexit_code: 296 mov 0,g14 # conform to register conventions 297 bx (g13) # g0 = addr of dest; g14 = 0 298Lrett: 299 ret 300 301/* end of strncat */ 302 303