1/*
2 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
3 *
4 *  To anyone who acknowledges that this file is provided "AS IS"
5 *  without any express or implied warranty:
6 *      permission to use, copy, modify, and distribute this file
7 *  for any purpose is hereby granted without fee, provided that
8 *  the above copyright notice and this notice appears in all
9 *  copies, and that the name of Hewlett-Packard Company not be
10 *  used in advertising or publicity pertaining to distribution
11 *  of the software without specific, written prior permission.
12 *  Hewlett-Packard Company makes no representations about the
13 *  suitability of this software for any purpose.
14 */
15
16/* HPUX_ID:	@(#) $Revision$	*/
17/*
18 * strcat(s1, s2)
19 *
20 * Concatenate s2 on the end of s1.  S1's space must be large enough.
21 * Return s1.
22 */
23#include "DEFS.h"
24
25#define	d_addr  r26
26#define	s_addr  r25
27#define	tmp6    r24
28#define	tmp1    r19
29#define	tmp2    r20
30#define	tmp3    r21
31#define	tmp4    r22
32#define	tmp5	arg3
33#define	save	r1
34
35
36ENTRY(strcat)
37
38	comb,=		r0,s_addr,done	/* quit if s2=NULL */
39        copy      d_addr,ret0          /* The return value is the value of d_addr. DELAY SLOT*/
40
41/* First look for end of s1 (d_addr) */
42
43        extru       d_addr,31,2,tmp1   /* Extract the low two bits of the dest address. */
44	combt,=		tmp1,r0,dont_mask
45	dep		0,31,2,d_addr	/*set word alignment */
46	ldwm		4(d_addr),tmp2
47	sh3add		tmp1,r0,save	/* build mask based on tmp1 */
48	mtctl		save,11
49	zvdepi		-2,32,save
50	or		save,tmp2,tmp2
51	uxor,nbz	tmp2,r0,save
52search:
53	b,n		found_end	/* nullified under uxor conditions above and below */
54dont_mask:
55	ldwm		4(d_addr),tmp2
56	comib,tr	r0,r0,search
57	uxor,nbz	tmp2,r0,save
58
59found_end:				/* at this point d_addr points to word */
60	extru,<>	save,7,8,r0	/* following word with null */
61	addib,tr,n	-4,d_addr,begin_copy	/*set d_addr to end of s1 */
62	extru,<>	save,15,8,r0
63	addib,tr,n	-3,d_addr,begin_copy
64	extru,<>	save,23,8,r0
65	addi		-1,d_addr,d_addr
66	addi		-1,d_addr,d_addr
67
68
69begin_copy:
70
71        extru       s_addr,31,2,tmp1   /* Extract the low two bits of the source address. */
72        extru       d_addr,31,2,tmp6   /* Extract the low two bits of the destination address. */
73        sub,=       tmp6,tmp1,tmp3     /* Compute the shift quantity and don't branch if tmp6=tmp1. */
74        b           not_aligned        /* Not_aligned says that shifts Will be needed. */
75        dep         0,31,2,s_addr      /* Compute the word address of the source.  DELAY SLOT. */
76/* aligned */
77
78	combt,=		tmp6,r0,skip_mask
79        ldwm        	4(0,s_addr),tmp1   /* tmp1 = *s_addr   s_addr += 4 (DELAY SLOT) */
80	sh3add		tmp6,r0,save
81	mtctl		save,r11
82	zvdepi		-2,32,save
83	or		save,tmp1,tmp1
84	uxor,nbz	tmp1,r0,save
85	b,n		first_null	/* special case: null in first word */
86	b,n		skip_mask2
87
88chunks:
89	b,n		null_found	/* delay slot for uxor below */
90
91skip_mask2:
92	stbys,b,m	tmp1,4(d_addr)
93	ldwm		4(s_addr),tmp1
94skip_mask:
95	comib,tr	0,0,chunks
96	uxor,nbz	tmp1,r0,save
97
98/* Begin non_aligned code.  */
99
100not_aligned:
101        sh3add,>=       tmp3,r0,tmp4        /* compute the shift amt.and skip load if tmp6 > tmp1. */
102        ldwm         	4(0,s_addr),tmp1    /* load up the first word from the source. tmp1 = *s_addr++ */
103        ldwm        	4(0,s_addr),tmp2    /* get either first or second word from source.  */
104	combt,=		tmp6,r0,chunk2      /* don't mask if whole word is valid */
105        mtctl        	tmp4,11             /* load the shift count into cr11 = shift count register. */
106        vshd        	tmp1,tmp2,tmp3      /* position data !  (delay slot) */
107	sh3add		tmp6,r0,save  	    /* setup r1 */
108	mtctl		save,r11	    /* set-up cr11 for mask */
109	zvdepi		-2,32,save
110	or		save, tmp3, tmp3
111	uxor,nbz	tmp3,r0,save
112	b,n		first_null2
113	b		did_mask
114        mtctl        	tmp4,11            /* re-load the shift count into cr11 */
115
116chunk2:
117	vshd		tmp1,tmp2,tmp3
118	uxor,nbz	tmp3, r0, save
119	b,n		null_found
120did_mask:
121        stbys,b,m   	tmp3,4(0,d_addr)    /* store !  */
122
123        ldwm        	4(0,s_addr),tmp1    /* get next word !  */
124        vshd        	tmp2,tmp1,tmp3      /* position data !  */
125	uxor,nbz	tmp3, r0, save
126	b,n		null_found
127	stwm		tmp3,4(d_addr)
128	comib,tr	0,0,chunk2
129	ldwm		4(s_addr),tmp2
130
131
132null_found:				/* adjust d_addr and store final word */
133
134	extru,<>	save,7,8,r0
135	addib,tr,n	1,d_addr,store_final
136	extru,<>	save,15,8,r0
137	addib,tr,n	2,d_addr,store_final
138	extru,<> 	save,23,8,r0
139	addib,tr	3,d_addr,store_final2
140	bv		0(r2)
141	stw		save,0(d_addr)
142
143store_final:
144	bv		0(r2)
145store_final2:
146	stbys,e		save,0(d_addr) 	/* delay slot */
147
148first_null:			/* null found in first word of aligned (wrt d_addr) */
149	addi		-4,s_addr,s_addr
150	ldbx		tmp6(s_addr),tmp4
151	add		tmp6,s_addr,s_addr
152	comib,=		0,tmp4,done
153	stbs,ma		tmp4,1(d_addr)
154	ldbs		1(s_addr),tmp4
155	comib,=		0,tmp4,done
156	stbs,ma		tmp4,1(d_addr)
157	bv		0(r2)		/* done */
158	stbs		0,0(d_addr)
159
160first_null2:	/* null found in first word of non-aligned (wrt d_addr) */
161	addibt,=	-1,tmp6,check3	/* check last 3 bytes of word */
162	extru   	save,15,8,tmp4
163	addibt,=,n	-1,tmp6,check2	/* check last 2 bytes */
164	bv		0(r2)
165	stbys,b		save, 0(d_addr)
166
167check3:
168	combt,=		tmp4,r0,done
169	stbs,ma		tmp4,1(d_addr)
170check2:
171	extru,<>	save,23,8,tmp4
172	bv		0(r2)
173	stbs,ma		tmp4,1(d_addr)
174	bv		0(r2)
175	stbs		r0,0(d_addr)
176
177done:
178EXIT(strcat)
179