1/*
2 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
3 *
4 *  To anyone who acknowledges that this file is provided "AS IS"
5 *  without any express or implied warranty:
6 *      permission to use, copy, modify, and distribute this file
7 *  for any purpose is hereby granted without fee, provided that
8 *  the above copyright notice and this notice appears in all
9 *  copies, and that the name of Hewlett-Packard Company not be
10 *  used in advertising or publicity pertaining to distribution
11 *  of the software without specific, written prior permission.
12 *  Hewlett-Packard Company makes no representations about the
13 *  suitability of this software for any purpose.
14 */
15
16/*HPUX_ID:	@(#) $Revision$	*/
17/* strncat(s1,s2,n) : concatonate at most n characters from s2 onto s1 */
18
19#include <picolibc.h>
20
21#include "DEFS.h"
22
23#define	d_addr  r26
24#define	s_addr  r25
25#define	count   r24
26#define	tmp1    r19
27#define	tmp2    r20
28#define	tmp3    r21
29#define	tmp4    r22
30#define	tmp5	arg3
31#define tmp6	r31
32#define	save	r1
33#define tmp7	ret1	/* source offset-- reset to orig source addr if not aligned */
34
35
36ENTRY(strncat)
37
38	comb,=		r0,s_addr,quit	/* quit if s2=NULL */
39        copy      d_addr,ret0          /* The return value is the value of d_addr. DELAY SLOT*/
40
41/* First look for end of s1 (d_addr) */
42
43        extru       d_addr,31,2,tmp1   /* Extract the low two bits of the dest address. */
44	combt,=		tmp1,r0,dont_mask
45	dep		0,31,2,d_addr	/*set word alignment */
46	ldwm		4(d_addr),tmp2
47	sh3add		tmp1,r0,save	/* build mask based on tmp1 */
48	mtctl		save,11
49	zvdepi		-2,32,save
50	or		save,tmp2,tmp2
51	uxor,nbz	tmp2,r0,save
52search:
53	b,n		found_end	/* nullified under uxor conditions above and below */
54dont_mask:
55	ldwm		4(d_addr),tmp2
56	comib,tr	r0,r0,search
57	uxor,nbz	tmp2,r0,save
58
59found_end:				/* at this point d_addr points to word */
60	extru,<>	save,7,8,r0	/* following word with null */
61	addib,tr,n	-4,d_addr,begin_copy	/*set d_addr to end of s1 */
62	extru,<>	save,15,8,r0
63	addib,tr,n	-3,d_addr,begin_copy
64	extru,<>	save,23,8,r0
65	addi		-1,d_addr,d_addr
66	addi		-1,d_addr,d_addr
67
68
69begin_copy:
70        addibt,<,n  -4,count,byteloop     /* If count is <= 4 don't get fancy.*/
71
72        extru       s_addr,31,2,tmp4   /* Extract the low two bits of the source address.*/
73        extru       d_addr,31,2,tmp5   /* Extract the low two bits of the destination address.*/
74        add         count,tmp5,count   /* pre increment the count by the byte address so that the count is*/
75	copy		s_addr,tmp6	/* save original s_addr in case we find null in first word */
76	copy	     s_addr, tmp7	/* save s_addr in case we find null before first store */
77        comb,<>       tmp5,tmp4,not_aligned /* branch if tmp5<>tmp4. */
78        dep         0,31,2,s_addr      /* Compute the word address of the source.  DELAY SLOT.*/
79/* aligned*/
80	combt,=		tmp5,r0,skip_mask
81        ldwm        4(0,s_addr),tmp1   /* tmp1 = *s_addr   s_addr += 4 (DELAY SLOT)*/
82	sh3add		tmp5,r0,save	/* compute mask in save*/
83	mtctl		save,11
84	zvdepi		-2,32,save
85	or		save,tmp1,tmp1  /* or mask with data*/
86	uxor,nbz	tmp1,r0,save 	/* check for null*/
87	b,n		null1
88	addibt,<	-4,count,back_porch
89        stbys,b,m   tmp1,4(0,d_addr)   /* store word (delay slot)*/
90
91chunks:
92	ldwm		4(0,s_addr),tmp1 /* get a word*/
93
94skip_mask:
95	uxor,nbz	tmp1,r0,save 	/* check for null*/
96	b,n		align_null1
97	addibf,<	-4,count,chunks
98        stbys,b,m   tmp1,4(0,d_addr)   /* store word (delay slot)*/
99
100back_porch:				   /* last word to store*/
101         addibt,=,n  4,count,done       /* if count = 0 we're, of course, done !*/
102         ldws        0(s_addr),tmp1     /* load up the back_porch*/
103	 sh3add		count,r0, save	/* setup right mask based on count*/
104	 mtctl		save,r11
105	 zvdepi		-2,32,save	/*save now has left-hand mask*/
106	 uaddcm		r0,save,save	/*form right hand mask */
107	 or		tmp1,save,tmp1	/*and insert data*/
108	 uxor,nbz	tmp1,r0,save	/* check for null*/
109	 b,n 		null2
110         add         d_addr,count,d_addr/* final store address  is +1 too high !*/
111	 b		done
112	 stbys,e	tmp1,0(d_addr)	/* done */
113
114/* Begin non_aligned code. */
115not_aligned:
116        sub,>=       tmp5,tmp4,tmp6     /* compute the shift amt.and skip load if tmp5 > tmp4.*/
117        ldwm         4(0,s_addr),tmp1   /* load up the first word from the source. tmp1 = *s_addr++*/
118        zdep         tmp6,28,29,tmp4    /* compute the number of bits to shift */
119        mtctl        tmp4,11            /* load the shift count into cr11 = shift count register.*/
120        addibt,<,n   -4,count,chkchnk2 /* first step in pre adjustment of count for looping.*/
121
122        ldwm        4(0,s_addr),tmp2    /* get either first or second word from source. */
123	combt,=		tmp5,r0,skip_mask4 /* don't mask if whole word is valid*/
124        vshd        tmp1,tmp2,tmp3      /* position data !  (delay slot)*/
125	sh3add		tmp5,r0,save	/* setup r1*/
126	mtctl		save,r11	/* setup mask in save*/
127	zvdepi		-2,32,save
128	or		save, tmp3, tmp3
129        mtctl           tmp4,11            /* re-load the shift count into cr11 */
130
131skip_mask4:
132	uxor,nbz	tmp3, r0, save
133	b,n		null4		/* special case for first word */
134	copy 		r0, tmp5	/* zero out tmp5 so we don't try to mask again*/
135	copy		r0, tmp7	/* zero out tmp7 so we don't try to use original s_addr anymore */
136	b		continue
137        stbys,b,m   tmp3,4(0,d_addr)    /* store ! */
138
139chunk2:
140        ldwm        4(0,s_addr),tmp2
141        vshd        tmp1,tmp2,tmp3
142
143skip_mask2:
144	uxor,nbz	tmp3, r0, save
145	b,n		null3
146        stbys,b,m   tmp3,4(0,d_addr)    /* store ! */
147
148continue:
149        ldwm        4(0,s_addr),tmp1    /* get 2nd word ! */
150        vshd        tmp2,tmp1,tmp3      /* position data ! */
151	uxor,nbz	tmp3, r0, save
152	b,n		null3
153
154        addibf,<    -8,count,chunk2    /* If count is still >= 8 do another loop.*/
155        stbys,b,m   tmp3,4(0,d_addr)    /* store !*/
156
157chkchnk2:
158        addibt,<,n  4,count,bp_0       /* if we don't have 4 bytes left then do the back porch (bp_0)*/
159
160subchnk2: /* we have less than 8 chars to copy*/
161
162        ldwm        4(0,s_addr),tmp2    /* get next word !*/
163	combt,=		tmp5,r0,skip_mask3
164        vshd        tmp1,tmp2,tmp3      /* position data !*/
165	sh3add		tmp5,r0,save	/* setup r1*/
166	mtctl		save,r11	/* setup mask in save*/
167	zvdepi		-2,32,save
168	or		save, tmp3, tmp3
169	mtctl		tmp4,11		/* restore shift value again */
170skip_mask3:
171	uxor,nbz	tmp3,r0,save
172	b,n		null3
173	copy		r0,tmp5   /* zero out tmp5 so null3 does correct alignment */
174	copy		r0,tmp7	  /* zero out tmp7 so we don't use orignal s_addr since no longer valid */
175	b		bp_1 /* we now have less than 4 bytes to move*/
176        stbys,b,m   tmp3,4(0,d_addr)    /* store !*/
177
178bp_0:
179	copy		tmp1,tmp2	/* switch registers for shift process */
180	addibt,<=,n  4,count,done        /* if count = -4 this implies that count = 0 -> done */
181
182bp_1:
183        ldwm        4(0,s_addr),tmp1    /* get final word !        */
184        vshd        tmp2,tmp1,tmp3      /* position data !*/
185	uxor,nbz	tmp3,r0,save	/* if no-byte-zero */
186	b,n		bp_null		/* don't goto no_null-find which null instead */
187no_null:
188	add	    d_addr,count,d_addr	/* set up d_addr for stbys,e */
189	b		done		/* were done*/
190        stbys,e     tmp3,0(0,d_addr)    /* store the data !*/
191
192/* here we do ye old byte-at-a-time moves.*/
193align_null1:
194	b	byteloop
195	addi	-4,s_addr,s_addr
196null1:
197	copy		tmp6,s_addr		/* restore orig s_addr (aligned only) */
198byteloop:
199	addibt,=     4,count,done
200null2:
201        ldbs,ma     1(s_addr),tmp1
202encore:
203	combt,=,n	tmp1,r0, done
204        stbs,ma     tmp1,1(d_addr)
205        addibf,=,n  -1,count,encore
206        ldbs,ma     1(s_addr),tmp1
207	b,n		done
208
209bp_null:
210	addi	-4,count,count		/* fudge count 'cause byteloop will re-increment */
211
212null3:	/* not_aligned case reset s_addr and finish byte-wise */
213	combt,=,n  r0,tmp7,null3a	/* if tmp7 is not valid address then branch below */
214	b 	byteloop		  	/* otherwise reset s_addr to tmp7 and finish */
215	copy 	tmp7, s_addr
216
217null3a: /* right shift target */
218	addibt,<,n  0,tmp6,null3b	/* if left shifting */
219	sub	r0,tmp6,tmp6		/* do null3b code */
220	addi	-4,tmp6,tmp6
221	b	byteloop
222	add	tmp6,s_addr,s_addr	/* reset s_addr by 4 + shift_amt */
223
224null3b:
225	subi	-8,tmp6,tmp6
226	add	tmp5,tmp6,tmp6  /* adjust by the dest offset if this is our first store */
227	b	byteloop
228	add	tmp6,s_addr,s_addr	/* adjust s_addr by (8-shift_amt-dest_off) */
229
230null4:
231	add,>	tmp6,r0,tmp6		/* if left shift */
232	b,n	null3			/* then do null3 */
233	b	byteloop
234	addi	-4,s_addr,s_addr	/* adj source only by 4 */
235
236done:
237	bv		0(r2)
238	stbs		r0,0(d_addr)
239quit:
240EXIT(strncat)
241