1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
6 
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8 
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15 
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22 
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 #
30 # freal.s:
31 #	This file is appended to the top of the 060FPSP package
32 # and contains the entry points into the package. The user, in
33 # effect, branches to one of the branch table entries located
34 # after _060FPSP_TABLE.
35 #	Also, subroutine stubs exist in this file (_fpsp_done for
36 # example) that are referenced by the FPSP package itself in order
37 # to call a given routine. The stub routine actually performs the
38 # callout. The FPSP code does a "bsr" to the stub routine. This
39 # extra layer of hierarchy adds a slight performance penalty but
40 # it makes the FPSP code easier to read and more mainatinable.
41 #
42 
43 set	_off_bsun,	0x00
44 set	_off_snan,	0x04
45 set	_off_operr,	0x08
46 set	_off_ovfl,	0x0c
47 set	_off_unfl,	0x10
48 set	_off_dz,	0x14
49 set	_off_inex,	0x18
50 set	_off_fline,	0x1c
51 set	_off_fpu_dis,	0x20
52 set	_off_trap,	0x24
53 set	_off_trace,	0x28
54 set	_off_access,	0x2c
55 set	_off_done,	0x30
56 
57 set	_off_imr,	0x40
58 set	_off_dmr,	0x44
59 set	_off_dmw,	0x48
60 set	_off_irw,	0x4c
61 set	_off_irl,	0x50
62 set	_off_drb,	0x54
63 set	_off_drw,	0x58
64 set	_off_drl,	0x5c
65 set	_off_dwb,	0x60
66 set	_off_dww,	0x64
67 set	_off_dwl,	0x68
68 
69 _060FPSP_TABLE:
70 
71 ###############################################################
72 
73 # Here's the table of ENTRY POINTS for those linking the package.
74 	bra.l		_fpsp_snan
75 	short		0x0000
76 	bra.l		_fpsp_operr
77 	short		0x0000
78 	bra.l		_fpsp_ovfl
79 	short		0x0000
80 	bra.l		_fpsp_unfl
81 	short		0x0000
82 	bra.l		_fpsp_dz
83 	short		0x0000
84 	bra.l		_fpsp_inex
85 	short		0x0000
86 	bra.l		_fpsp_fline
87 	short		0x0000
88 	bra.l		_fpsp_unsupp
89 	short		0x0000
90 	bra.l		_fpsp_effadd
91 	short		0x0000
92 
93 	space		56
94 
95 ###############################################################
96 	global		_fpsp_done
97 _fpsp_done:
98 	mov.l		%d0,-(%sp)
99 	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
100 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
101 	mov.l		0x4(%sp),%d0
102 	rtd		&0x4
103 
104 	global		_real_ovfl
105 _real_ovfl:
106 	mov.l		%d0,-(%sp)
107 	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
108 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
109 	mov.l		0x4(%sp),%d0
110 	rtd		&0x4
111 
112 	global		_real_unfl
113 _real_unfl:
114 	mov.l		%d0,-(%sp)
115 	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
116 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
117 	mov.l		0x4(%sp),%d0
118 	rtd		&0x4
119 
120 	global		_real_inex
121 _real_inex:
122 	mov.l		%d0,-(%sp)
123 	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
124 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
125 	mov.l		0x4(%sp),%d0
126 	rtd		&0x4
127 
128 	global		_real_bsun
129 _real_bsun:
130 	mov.l		%d0,-(%sp)
131 	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
132 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
133 	mov.l		0x4(%sp),%d0
134 	rtd		&0x4
135 
136 	global		_real_operr
137 _real_operr:
138 	mov.l		%d0,-(%sp)
139 	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
140 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
141 	mov.l		0x4(%sp),%d0
142 	rtd		&0x4
143 
144 	global		_real_snan
145 _real_snan:
146 	mov.l		%d0,-(%sp)
147 	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
148 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
149 	mov.l		0x4(%sp),%d0
150 	rtd		&0x4
151 
152 	global		_real_dz
153 _real_dz:
154 	mov.l		%d0,-(%sp)
155 	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
156 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
157 	mov.l		0x4(%sp),%d0
158 	rtd		&0x4
159 
160 	global		_real_fline
161 _real_fline:
162 	mov.l		%d0,-(%sp)
163 	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
164 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
165 	mov.l		0x4(%sp),%d0
166 	rtd		&0x4
167 
168 	global		_real_fpu_disabled
169 _real_fpu_disabled:
170 	mov.l		%d0,-(%sp)
171 	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
172 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
173 	mov.l		0x4(%sp),%d0
174 	rtd		&0x4
175 
176 	global		_real_trap
177 _real_trap:
178 	mov.l		%d0,-(%sp)
179 	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
180 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
181 	mov.l		0x4(%sp),%d0
182 	rtd		&0x4
183 
184 	global		_real_trace
185 _real_trace:
186 	mov.l		%d0,-(%sp)
187 	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
188 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
189 	mov.l		0x4(%sp),%d0
190 	rtd		&0x4
191 
192 	global		_real_access
193 _real_access:
194 	mov.l		%d0,-(%sp)
195 	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
196 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
197 	mov.l		0x4(%sp),%d0
198 	rtd		&0x4
199 
200 #######################################
201 
202 	global		_imem_read
203 _imem_read:
204 	mov.l		%d0,-(%sp)
205 	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
206 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
207 	mov.l		0x4(%sp),%d0
208 	rtd		&0x4
209 
210 	global		_dmem_read
211 _dmem_read:
212 	mov.l		%d0,-(%sp)
213 	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
214 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
215 	mov.l		0x4(%sp),%d0
216 	rtd		&0x4
217 
218 	global		_dmem_write
219 _dmem_write:
220 	mov.l		%d0,-(%sp)
221 	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
222 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
223 	mov.l		0x4(%sp),%d0
224 	rtd		&0x4
225 
226 	global		_imem_read_word
227 _imem_read_word:
228 	mov.l		%d0,-(%sp)
229 	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
230 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
231 	mov.l		0x4(%sp),%d0
232 	rtd		&0x4
233 
234 	global		_imem_read_long
235 _imem_read_long:
236 	mov.l		%d0,-(%sp)
237 	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
238 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
239 	mov.l		0x4(%sp),%d0
240 	rtd		&0x4
241 
242 	global		_dmem_read_byte
243 _dmem_read_byte:
244 	mov.l		%d0,-(%sp)
245 	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
246 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
247 	mov.l		0x4(%sp),%d0
248 	rtd		&0x4
249 
250 	global		_dmem_read_word
251 _dmem_read_word:
252 	mov.l		%d0,-(%sp)
253 	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
254 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
255 	mov.l		0x4(%sp),%d0
256 	rtd		&0x4
257 
258 	global		_dmem_read_long
259 _dmem_read_long:
260 	mov.l		%d0,-(%sp)
261 	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
262 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
263 	mov.l		0x4(%sp),%d0
264 	rtd		&0x4
265 
266 	global		_dmem_write_byte
267 _dmem_write_byte:
268 	mov.l		%d0,-(%sp)
269 	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
270 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
271 	mov.l		0x4(%sp),%d0
272 	rtd		&0x4
273 
274 	global		_dmem_write_word
275 _dmem_write_word:
276 	mov.l		%d0,-(%sp)
277 	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
278 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
279 	mov.l		0x4(%sp),%d0
280 	rtd		&0x4
281 
282 	global		_dmem_write_long
283 _dmem_write_long:
284 	mov.l		%d0,-(%sp)
285 	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
286 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
287 	mov.l		0x4(%sp),%d0
288 	rtd		&0x4
289 
290 #
291 # This file contains a set of define statements for constants
292 # in order to promote readability within the corecode itself.
293 #
294 
295 set LOCAL_SIZE,		192			# stack frame size(bytes)
296 set LV,			-LOCAL_SIZE		# stack offset
297 
298 set EXC_SR,		0x4			# stack status register
299 set EXC_PC,		0x6			# stack pc
300 set EXC_VOFF,		0xa			# stacked vector offset
301 set EXC_EA,		0xc			# stacked <ea>
302 
303 set EXC_FP,		0x0			# frame pointer
304 
305 set EXC_AREGS,		-68			# offset of all address regs
306 set EXC_DREGS,		-100			# offset of all data regs
307 set EXC_FPREGS,		-36			# offset of all fp regs
308 
309 set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
310 set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
311 set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
312 set EXC_A5,		EXC_AREGS+(5*4)
313 set EXC_A4,		EXC_AREGS+(4*4)
314 set EXC_A3,		EXC_AREGS+(3*4)
315 set EXC_A2,		EXC_AREGS+(2*4)
316 set EXC_A1,		EXC_AREGS+(1*4)
317 set EXC_A0,		EXC_AREGS+(0*4)
318 set EXC_D7,		EXC_DREGS+(7*4)
319 set EXC_D6,		EXC_DREGS+(6*4)
320 set EXC_D5,		EXC_DREGS+(5*4)
321 set EXC_D4,		EXC_DREGS+(4*4)
322 set EXC_D3,		EXC_DREGS+(3*4)
323 set EXC_D2,		EXC_DREGS+(2*4)
324 set EXC_D1,		EXC_DREGS+(1*4)
325 set EXC_D0,		EXC_DREGS+(0*4)
326 
327 set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
328 set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
329 set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
330 
331 set FP_SCR1,		LV+80			# fp scratch 1
332 set FP_SCR1_EX,		FP_SCR1+0
333 set FP_SCR1_SGN,	FP_SCR1+2
334 set FP_SCR1_HI,		FP_SCR1+4
335 set FP_SCR1_LO,		FP_SCR1+8
336 
337 set FP_SCR0,		LV+68			# fp scratch 0
338 set FP_SCR0_EX,		FP_SCR0+0
339 set FP_SCR0_SGN,	FP_SCR0+2
340 set FP_SCR0_HI,		FP_SCR0+4
341 set FP_SCR0_LO,		FP_SCR0+8
342 
343 set FP_DST,		LV+56			# fp destination operand
344 set FP_DST_EX,		FP_DST+0
345 set FP_DST_SGN,		FP_DST+2
346 set FP_DST_HI,		FP_DST+4
347 set FP_DST_LO,		FP_DST+8
348 
349 set FP_SRC,		LV+44			# fp source operand
350 set FP_SRC_EX,		FP_SRC+0
351 set FP_SRC_SGN,		FP_SRC+2
352 set FP_SRC_HI,		FP_SRC+4
353 set FP_SRC_LO,		FP_SRC+8
354 
355 set USER_FPIAR,		LV+40			# FP instr address register
356 
357 set USER_FPSR,		LV+36			# FP status register
358 set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
359 set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
360 set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
361 set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
362 
363 set USER_FPCR,		LV+32			# FP control register
364 set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
365 set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
366 
367 set L_SCR3,		LV+28			# integer scratch 3
368 set L_SCR2,		LV+24			# integer scratch 2
369 set L_SCR1,		LV+20			# integer scratch 1
370 
371 set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
372 
373 set EXC_TEMP2,		LV+24			# temporary space
374 set EXC_TEMP,		LV+16			# temporary space
375 
376 set DTAG,		LV+15			# destination operand type
377 set STAG,		LV+14			# source operand type
378 
379 set SPCOND_FLG,		LV+10			# flag: special case (see below)
380 
381 set EXC_CC,		LV+8			# saved condition codes
382 set EXC_EXTWPTR,	LV+4			# saved current PC (active)
383 set EXC_EXTWORD,	LV+2			# saved extension word
384 set EXC_CMDREG,		LV+2			# saved extension word
385 set EXC_OPWORD,		LV+0			# saved operation word
386 
387 ################################
388 
389 # Helpful macros
390 
391 set FTEMP,		0			# offsets within an
392 set FTEMP_EX,		0			# extended precision
393 set FTEMP_SGN,		2			# value saved in memory.
394 set FTEMP_HI,		4
395 set FTEMP_LO,		8
396 set FTEMP_GRS,		12
397 
398 set LOCAL,		0			# offsets within an
399 set LOCAL_EX,		0			# extended precision
400 set LOCAL_SGN,		2			# value saved in memory.
401 set LOCAL_HI,		4
402 set LOCAL_LO,		8
403 set LOCAL_GRS,		12
404 
405 set DST,		0			# offsets within an
406 set DST_EX,		0			# extended precision
407 set DST_HI,		4			# value saved in memory.
408 set DST_LO,		8
409 
410 set SRC,		0			# offsets within an
411 set SRC_EX,		0			# extended precision
412 set SRC_HI,		4			# value saved in memory.
413 set SRC_LO,		8
414 
415 set SGL_LO,		0x3f81			# min sgl prec exponent
416 set SGL_HI,		0x407e			# max sgl prec exponent
417 set DBL_LO,		0x3c01			# min dbl prec exponent
418 set DBL_HI,		0x43fe			# max dbl prec exponent
419 set EXT_LO,		0x0			# min ext prec exponent
420 set EXT_HI,		0x7ffe			# max ext prec exponent
421 
422 set EXT_BIAS,		0x3fff			# extended precision bias
423 set SGL_BIAS,		0x007f			# single precision bias
424 set DBL_BIAS,		0x03ff			# double precision bias
425 
426 set NORM,		0x00			# operand type for STAG/DTAG
427 set ZERO,		0x01			# operand type for STAG/DTAG
428 set INF,		0x02			# operand type for STAG/DTAG
429 set QNAN,		0x03			# operand type for STAG/DTAG
430 set DENORM,		0x04			# operand type for STAG/DTAG
431 set SNAN,		0x05			# operand type for STAG/DTAG
432 set UNNORM,		0x06			# operand type for STAG/DTAG
433 
434 ##################
435 # FPSR/FPCR bits #
436 ##################
437 set neg_bit,		0x3			# negative result
438 set z_bit,		0x2			# zero result
439 set inf_bit,		0x1			# infinite result
440 set nan_bit,		0x0			# NAN result
441 
442 set q_sn_bit,		0x7			# sign bit of quotient byte
443 
444 set bsun_bit,		7			# branch on unordered
445 set snan_bit,		6			# signalling NAN
446 set operr_bit,		5			# operand error
447 set ovfl_bit,		4			# overflow
448 set unfl_bit,		3			# underflow
449 set dz_bit,		2			# divide by zero
450 set inex2_bit,		1			# inexact result 2
451 set inex1_bit,		0			# inexact result 1
452 
453 set aiop_bit,		7			# accrued inexact operation bit
454 set aovfl_bit,		6			# accrued overflow bit
455 set aunfl_bit,		5			# accrued underflow bit
456 set adz_bit,		4			# accrued dz bit
457 set ainex_bit,		3			# accrued inexact bit
458 
459 #############################
460 # FPSR individual bit masks #
461 #############################
462 set neg_mask,		0x08000000		# negative bit mask (lw)
463 set inf_mask,		0x02000000		# infinity bit mask (lw)
464 set z_mask,		0x04000000		# zero bit mask (lw)
465 set nan_mask,		0x01000000		# nan bit mask (lw)
466 
467 set neg_bmask,		0x08			# negative bit mask (byte)
468 set inf_bmask,		0x02			# infinity bit mask (byte)
469 set z_bmask,		0x04			# zero bit mask (byte)
470 set nan_bmask,		0x01			# nan bit mask (byte)
471 
472 set bsun_mask,		0x00008000		# bsun exception mask
473 set snan_mask,		0x00004000		# snan exception mask
474 set operr_mask,		0x00002000		# operr exception mask
475 set ovfl_mask,		0x00001000		# overflow exception mask
476 set unfl_mask,		0x00000800		# underflow exception mask
477 set dz_mask,		0x00000400		# dz exception mask
478 set inex2_mask,		0x00000200		# inex2 exception mask
479 set inex1_mask,		0x00000100		# inex1 exception mask
480 
481 set aiop_mask,		0x00000080		# accrued illegal operation
482 set aovfl_mask,		0x00000040		# accrued overflow
483 set aunfl_mask,		0x00000020		# accrued underflow
484 set adz_mask,		0x00000010		# accrued divide by zero
485 set ainex_mask,		0x00000008		# accrued inexact
486 
487 ######################################
488 # FPSR combinations used in the FPSP #
489 ######################################
490 set dzinf_mask,		inf_mask+dz_mask+adz_mask
491 set opnan_mask,		nan_mask+operr_mask+aiop_mask
492 set nzi_mask,		0x01ffffff		#clears N, Z, and I
493 set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
494 set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
495 set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
496 set inx1a_mask,		inex1_mask+ainex_mask
497 set inx2a_mask,		inex2_mask+ainex_mask
498 set snaniop_mask,	nan_mask+snan_mask+aiop_mask
499 set snaniop2_mask,	snan_mask+aiop_mask
500 set naniop_mask,	nan_mask+aiop_mask
501 set neginf_mask,	neg_mask+inf_mask
502 set infaiop_mask,	inf_mask+aiop_mask
503 set negz_mask,		neg_mask+z_mask
504 set opaop_mask,		operr_mask+aiop_mask
505 set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
506 set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
507 
508 #########
509 # misc. #
510 #########
511 set rnd_stky_bit,	29			# stky bit pos in longword
512 
513 set sign_bit,		0x7			# sign bit
514 set signan_bit,		0x6			# signalling nan bit
515 
516 set sgl_thresh,		0x3f81			# minimum sgl exponent
517 set dbl_thresh,		0x3c01			# minimum dbl exponent
518 
519 set x_mode,		0x0			# extended precision
520 set s_mode,		0x4			# single precision
521 set d_mode,		0x8			# double precision
522 
523 set rn_mode,		0x0			# round-to-nearest
524 set rz_mode,		0x1			# round-to-zero
525 set rm_mode,		0x2			# round-tp-minus-infinity
526 set rp_mode,		0x3			# round-to-plus-infinity
527 
528 set mantissalen,	64			# length of mantissa in bits
529 
530 set BYTE,		1			# len(byte) == 1 byte
531 set WORD,		2			# len(word) == 2 bytes
532 set LONG,		4			# len(longword) == 2 bytes
533 
534 set BSUN_VEC,		0xc0			# bsun    vector offset
535 set INEX_VEC,		0xc4			# inexact vector offset
536 set DZ_VEC,		0xc8			# dz      vector offset
537 set UNFL_VEC,		0xcc			# unfl    vector offset
538 set OPERR_VEC,		0xd0			# operr   vector offset
539 set OVFL_VEC,		0xd4			# ovfl    vector offset
540 set SNAN_VEC,		0xd8			# snan    vector offset
541 
542 ###########################
543 # SPecial CONDition FLaGs #
544 ###########################
545 set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
546 set fbsun_flg,		0x02			# flag bit: bsun exception
547 set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
548 set mda7_flg,		0x08			# flag bit: -(a7) <ea>
549 set fmovm_flg,		0x40			# flag bit: fmovm instruction
550 set immed_flg,		0x80			# flag bit: &<data> <ea>
551 
552 set ftrapcc_bit,	0x0
553 set fbsun_bit,		0x1
554 set mia7_bit,		0x2
555 set mda7_bit,		0x3
556 set immed_bit,		0x7
557 
558 ##################################
559 # TRANSCENDENTAL "LAST-OP" FLAGS #
560 ##################################
561 set FMUL_OP,		0x0			# fmul instr performed last
562 set FDIV_OP,		0x1			# fdiv performed last
563 set FADD_OP,		0x2			# fadd performed last
564 set FMOV_OP,		0x3			# fmov performed last
565 
566 #############
567 # CONSTANTS #
568 #############
569 T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
570 T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
571 
572 PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
573 PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
574 
575 TWOBYPI:
576 	long		0x3FE45F30,0x6DC9C883
577 
578 #########################################################################
579 # XDEF ****************************************************************	#
580 #	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
581 #									#
582 #	This handler should be the first code executed upon taking the	#
583 #	FP Overflow exception in an operating system.			#
584 #									#
585 # XREF ****************************************************************	#
586 #	_imem_read_long() - read instruction longword			#
587 #	fix_skewed_ops() - adjust src operand in fsave frame		#
588 #	set_tag_x() - determine optype of src/dst operands		#
589 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
590 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
591 #	load_fpn2() - load dst operand from FP regfile			#
592 #	fout() - emulate an opclass 3 instruction			#
593 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
594 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
595 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
596 #	_real_inex() - "callout" for Inexact exception enabled code	#
597 #	_real_trace() - "callout" for Trace exception code		#
598 #									#
599 # INPUT ***************************************************************	#
600 #	- The system stack contains the FP Ovfl exception stack frame	#
601 #	- The fsave frame contains the source operand			#
602 #									#
603 # OUTPUT **************************************************************	#
604 #	Overflow Exception enabled:					#
605 #	- The system stack is unchanged					#
606 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
607 #	Overflow Exception disabled:					#
608 #	- The system stack is unchanged					#
609 #	- The "exception present" flag in the fsave frame is cleared	#
610 #									#
611 # ALGORITHM ***********************************************************	#
612 #	On the 060, if an FP overflow is present as the result of any	#
613 # instruction, the 060 will take an overflow exception whether the	#
614 # exception is enabled or disabled in the FPCR. For the disabled case,	#
615 # This handler emulates the instruction to determine what the correct	#
616 # default result should be for the operation. This default result is	#
617 # then stored in either the FP regfile, data regfile, or memory.	#
618 # Finally, the handler exits through the "callout" _fpsp_done()		#
619 # denoting that no exceptional conditions exist within the machine.	#
620 #	If the exception is enabled, then this handler must create the	#
621 # exceptional operand and plave it in the fsave state frame, and store	#
622 # the default result (only if the instruction is opclass 3). For	#
623 # exceptions enabled, this handler must exit through the "callout"	#
624 # _real_ovfl() so that the operating system enabled overflow handler	#
625 # can handle this case.							#
626 #	Two other conditions exist. First, if overflow was disabled	#
627 # but the inexact exception was enabled, this handler must exit		#
628 # through the "callout" _real_inex() regardless of whether the result	#
629 # was inexact.								#
630 #	Also, in the case of an opclass three instruction where		#
631 # overflow was disabled and the trace exception was enabled, this	#
632 # handler must exit through the "callout" _real_trace().		#
633 #									#
634 #########################################################################
635 
636 	global		_fpsp_ovfl
637 _fpsp_ovfl:
638 
639 #$#	sub.l		&24,%sp			# make room for src/dst
640 
641 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
642 
643 	fsave		FP_SRC(%a6)		# grab the "busy" frame
644 
645 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
646 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
647 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
648 
649 # the FPIAR holds the "current PC" of the faulting instruction
650 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
651 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
652 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
653 	bsr.l		_imem_read_long		# fetch the instruction words
654 	mov.l		%d0,EXC_OPWORD(%a6)
655 
656 ##############################################################################
657 
658 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
659 	bne.w		fovfl_out
660 
661 
662 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
663 	bsr.l		fix_skewed_ops		# fix src op
664 
665 # since, I believe, only NORMs and DENORMs can come through here,
666 # maybe we can avoid the subroutine call.
667 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
668 	bsr.l		set_tag_x		# tag the operand type
669 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
670 
671 # bit five of the fp extension word separates the monadic and dyadic operations
672 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
673 # will never take this exception.
674 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
675 	beq.b		fovfl_extract		# monadic
676 
677 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
678 	bsr.l		load_fpn2		# load dst into FP_DST
679 
680 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
681 	bsr.l		set_tag_x		# tag the operand type
682 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
683 	bne.b		fovfl_op2_done		# no
684 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
685 fovfl_op2_done:
686 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
687 
688 fovfl_extract:
689 
690 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
691 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
692 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
693 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
694 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
695 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
696 
697 	clr.l		%d0
698 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
699 
700 	mov.b		1+EXC_CMDREG(%a6),%d1
701 	andi.w		&0x007f,%d1		# extract extension
702 
703 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
704 
705 	fmov.l		&0x0,%fpcr		# zero current control regs
706 	fmov.l		&0x0,%fpsr
707 
708 	lea		FP_SRC(%a6),%a0
709 	lea		FP_DST(%a6),%a1
710 
711 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
712 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
713 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
714 
715 # the operation has been emulated. the result is in fp0.
716 # the EXOP, if an exception occurred, is in fp1.
717 # we must save the default result regardless of whether
718 # traps are enabled or disabled.
719 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
720 	bsr.l		store_fpreg
721 
722 # the exceptional possibilities we have left ourselves with are ONLY overflow
723 # and inexact. and, the inexact is such that overflow occurred and was disabled
724 # but inexact was enabled.
725 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
726 	bne.b		fovfl_ovfl_on
727 
728 	btst		&inex2_bit,FPCR_ENABLE(%a6)
729 	bne.b		fovfl_inex_on
730 
731 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
732 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
733 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
734 
735 	unlk		%a6
736 #$#	add.l		&24,%sp
737 	bra.l		_fpsp_done
738 
739 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
740 # in fp1. now, simply jump to _real_ovfl()!
741 fovfl_ovfl_on:
742 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
743 
744 	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
745 
746 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
747 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
748 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
749 
750 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
751 
752 	unlk		%a6
753 
754 	bra.l		_real_ovfl
755 
756 # overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
757 # we must jump to real_inex().
758 fovfl_inex_on:
759 
760 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
761 
762 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
763 	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
764 
765 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
766 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
767 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
768 
769 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
770 
771 	unlk		%a6
772 
773 	bra.l		_real_inex
774 
775 ########################################################################
776 fovfl_out:
777 
778 
779 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
780 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
781 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
782 
783 # the src operand is definitely a NORM(!), so tag it as such
784 	mov.b		&NORM,STAG(%a6)		# set src optype tag
785 
786 	clr.l		%d0
787 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
788 
789 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
790 
791 	fmov.l		&0x0,%fpcr		# zero current control regs
792 	fmov.l		&0x0,%fpsr
793 
794 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
795 
796 	bsr.l		fout
797 
798 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
799 	bne.w		fovfl_ovfl_on
800 
801 	btst		&inex2_bit,FPCR_ENABLE(%a6)
802 	bne.w		fovfl_inex_on
803 
804 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
805 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
806 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
807 
808 	unlk		%a6
809 #$#	add.l		&24,%sp
810 
811 	btst		&0x7,(%sp)		# is trace on?
812 	beq.l		_fpsp_done		# no
813 
814 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
815 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
816 	bra.l		_real_trace
817 
818 #########################################################################
819 # XDEF ****************************************************************	#
820 #	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
821 #									#
822 #	This handler should be the first code executed upon taking the	#
823 #	FP Underflow exception in an operating system.			#
824 #									#
825 # XREF ****************************************************************	#
826 #	_imem_read_long() - read instruction longword			#
827 #	fix_skewed_ops() - adjust src operand in fsave frame		#
828 #	set_tag_x() - determine optype of src/dst operands		#
829 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
830 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
831 #	load_fpn2() - load dst operand from FP regfile			#
832 #	fout() - emulate an opclass 3 instruction			#
833 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
834 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
835 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
836 #	_real_inex() - "callout" for Inexact exception enabled code	#
837 #	_real_trace() - "callout" for Trace exception code		#
838 #									#
839 # INPUT ***************************************************************	#
840 #	- The system stack contains the FP Unfl exception stack frame	#
841 #	- The fsave frame contains the source operand			#
842 #									#
843 # OUTPUT **************************************************************	#
844 #	Underflow Exception enabled:					#
845 #	- The system stack is unchanged					#
846 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
847 #	Underflow Exception disabled:					#
848 #	- The system stack is unchanged					#
849 #	- The "exception present" flag in the fsave frame is cleared	#
850 #									#
851 # ALGORITHM ***********************************************************	#
852 #	On the 060, if an FP underflow is present as the result of any	#
853 # instruction, the 060 will take an underflow exception whether the	#
854 # exception is enabled or disabled in the FPCR. For the disabled case,	#
855 # This handler emulates the instruction to determine what the correct	#
856 # default result should be for the operation. This default result is	#
857 # then stored in either the FP regfile, data regfile, or memory.	#
858 # Finally, the handler exits through the "callout" _fpsp_done()		#
859 # denoting that no exceptional conditions exist within the machine.	#
860 #	If the exception is enabled, then this handler must create the	#
861 # exceptional operand and plave it in the fsave state frame, and store	#
862 # the default result (only if the instruction is opclass 3). For	#
863 # exceptions enabled, this handler must exit through the "callout"	#
864 # _real_unfl() so that the operating system enabled overflow handler	#
865 # can handle this case.							#
866 #	Two other conditions exist. First, if underflow was disabled	#
867 # but the inexact exception was enabled and the result was inexact,	#
868 # this handler must exit through the "callout" _real_inex().		#
869 # was inexact.								#
870 #	Also, in the case of an opclass three instruction where		#
871 # underflow was disabled and the trace exception was enabled, this	#
872 # handler must exit through the "callout" _real_trace().		#
873 #									#
874 #########################################################################
875 
876 	global		_fpsp_unfl
877 _fpsp_unfl:
878 
879 #$#	sub.l		&24,%sp			# make room for src/dst
880 
881 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
882 
883 	fsave		FP_SRC(%a6)		# grab the "busy" frame
884 
885 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
886 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
887 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
888 
889 # the FPIAR holds the "current PC" of the faulting instruction
890 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
891 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
892 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
893 	bsr.l		_imem_read_long		# fetch the instruction words
894 	mov.l		%d0,EXC_OPWORD(%a6)
895 
896 ##############################################################################
897 
898 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
899 	bne.w		funfl_out
900 
901 
902 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
903 	bsr.l		fix_skewed_ops		# fix src op
904 
905 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
906 	bsr.l		set_tag_x		# tag the operand type
907 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
908 
909 # bit five of the fp ext word separates the monadic and dyadic operations
910 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
911 # will never take this exception.
912 	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
913 	beq.b		funfl_extract		# monadic
914 
915 # now, what's left that's not dyadic is fsincos. we can distinguish it
916 # from all dyadics by the '0110xxx pattern
917 	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
918 	bne.b		funfl_extract		# yes
919 
920 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
921 	bsr.l		load_fpn2		# load dst into FP_DST
922 
923 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
924 	bsr.l		set_tag_x		# tag the operand type
925 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
926 	bne.b		funfl_op2_done		# no
927 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
928 funfl_op2_done:
929 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
930 
931 funfl_extract:
932 
933 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
934 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
935 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
936 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
937 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
938 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
939 
940 	clr.l		%d0
941 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
942 
943 	mov.b		1+EXC_CMDREG(%a6),%d1
944 	andi.w		&0x007f,%d1		# extract extension
945 
946 	andi.l		&0x00ff01ff,USER_FPSR(%a6)
947 
948 	fmov.l		&0x0,%fpcr		# zero current control regs
949 	fmov.l		&0x0,%fpsr
950 
951 	lea		FP_SRC(%a6),%a0
952 	lea		FP_DST(%a6),%a1
953 
954 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
955 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
956 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
957 
958 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
959 	bsr.l		store_fpreg
960 
961 # The `060 FPU multiplier hardware is such that if the result of a
962 # multiply operation is the smallest possible normalized number
963 # (0x00000000_80000000_00000000), then the machine will take an
964 # underflow exception. Since this is incorrect, we need to check
965 # if our emulation, after re-doing the operation, decided that
966 # no underflow was called for. We do these checks only in
967 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
968 # special case will simply exit gracefully with the correct result.
969 
970 # the exceptional possibilities we have left ourselves with are ONLY overflow
971 # and inexact. and, the inexact is such that overflow occurred and was disabled
972 # but inexact was enabled.
973 	btst		&unfl_bit,FPCR_ENABLE(%a6)
974 	bne.b		funfl_unfl_on
975 
976 funfl_chkinex:
977 	btst		&inex2_bit,FPCR_ENABLE(%a6)
978 	bne.b		funfl_inex_on
979 
980 funfl_exit:
981 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
982 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
983 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
984 
985 	unlk		%a6
986 #$#	add.l		&24,%sp
987 	bra.l		_fpsp_done
988 
989 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
990 # in fp1 (don't forget to save fp0). what to do now?
991 # well, we simply have to get to go to _real_unfl()!
992 funfl_unfl_on:
993 
994 # The `060 FPU multiplier hardware is such that if the result of a
995 # multiply operation is the smallest possible normalized number
996 # (0x00000000_80000000_00000000), then the machine will take an
997 # underflow exception. Since this is incorrect, we check here to see
998 # if our emulation, after re-doing the operation, decided that
999 # no underflow was called for.
1000 	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1001 	beq.w		funfl_chkinex
1002 
1003 funfl_unfl_on2:
1004 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1005 
1006 	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1007 
1008 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1009 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1010 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1011 
1012 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1013 
1014 	unlk		%a6
1015 
1016 	bra.l		_real_unfl
1017 
1018 # underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1019 # we must jump to real_inex().
1020 funfl_inex_on:
1021 
1022 # The `060 FPU multiplier hardware is such that if the result of a
1023 # multiply operation is the smallest possible normalized number
1024 # (0x00000000_80000000_00000000), then the machine will take an
1025 # underflow exception.
1026 # But, whether bogus or not, if inexact is enabled AND it occurred,
1027 # then we have to branch to real_inex.
1028 
1029 	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1030 	beq.w		funfl_exit
1031 
1032 funfl_inex_on2:
1033 
1034 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1035 
1036 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1037 	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1038 
1039 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1040 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1041 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1042 
1043 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1044 
1045 	unlk		%a6
1046 
1047 	bra.l		_real_inex
1048 
1049 #######################################################################
1050 funfl_out:
1051 
1052 
1053 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1054 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1055 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1056 
1057 # the src operand is definitely a NORM(!), so tag it as such
1058 	mov.b		&NORM,STAG(%a6)		# set src optype tag
1059 
1060 	clr.l		%d0
1061 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1062 
1063 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1064 
1065 	fmov.l		&0x0,%fpcr		# zero current control regs
1066 	fmov.l		&0x0,%fpsr
1067 
1068 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1069 
1070 	bsr.l		fout
1071 
1072 	btst		&unfl_bit,FPCR_ENABLE(%a6)
1073 	bne.w		funfl_unfl_on2
1074 
1075 	btst		&inex2_bit,FPCR_ENABLE(%a6)
1076 	bne.w		funfl_inex_on2
1077 
1078 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1079 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1080 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1081 
1082 	unlk		%a6
1083 #$#	add.l		&24,%sp
1084 
1085 	btst		&0x7,(%sp)		# is trace on?
1086 	beq.l		_fpsp_done		# no
1087 
1088 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1089 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1090 	bra.l		_real_trace
1091 
1092 #########################################################################
1093 # XDEF ****************************************************************	#
1094 #	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1095 #		        Data Type" exception.				#
1096 #									#
1097 #	This handler should be the first code executed upon taking the	#
1098 #	FP Unimplemented Data Type exception in an operating system.	#
1099 #									#
1100 # XREF ****************************************************************	#
1101 #	_imem_read_{word,long}() - read instruction word/longword	#
1102 #	fix_skewed_ops() - adjust src operand in fsave frame		#
1103 #	set_tag_x() - determine optype of src/dst operands		#
1104 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1105 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1106 #	load_fpn2() - load dst operand from FP regfile			#
1107 #	load_fpn1() - load src operand from FP regfile			#
1108 #	fout() - emulate an opclass 3 instruction			#
1109 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1110 #	_real_inex() - "callout" to operating system inexact handler	#
1111 #	_fpsp_done() - "callout" for exit; work all done		#
1112 #	_real_trace() - "callout" for Trace enabled exception		#
1113 #	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1114 #	_real_snan() - "callout" for SNAN exception			#
1115 #	_real_operr() - "callout" for OPERR exception			#
1116 #	_real_ovfl() - "callout" for OVFL exception			#
1117 #	_real_unfl() - "callout" for UNFL exception			#
1118 #	get_packed() - fetch packed operand from memory			#
1119 #									#
1120 # INPUT ***************************************************************	#
1121 #	- The system stack contains the "Unimp Data Type" stk frame	#
1122 #	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1123 #									#
1124 # OUTPUT **************************************************************	#
1125 #	If Inexact exception (opclass 3):				#
1126 #	- The system stack is changed to an Inexact exception stk frame	#
1127 #	If SNAN exception (opclass 3):					#
1128 #	- The system stack is changed to an SNAN exception stk frame	#
1129 #	If OPERR exception (opclass 3):					#
1130 #	- The system stack is changed to an OPERR exception stk frame	#
1131 #	If OVFL exception (opclass 3):					#
1132 #	- The system stack is changed to an OVFL exception stk frame	#
1133 #	If UNFL exception (opclass 3):					#
1134 #	- The system stack is changed to an UNFL exception stack frame	#
1135 #	If Trace exception enabled:					#
1136 #	- The system stack is changed to a Trace exception stack frame	#
1137 #	Else: (normal case)						#
1138 #	- Correct result has been stored as appropriate			#
1139 #									#
1140 # ALGORITHM ***********************************************************	#
1141 #	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1142 # unimplemented data types. These can be either opclass 0,2 or 3	#
1143 # instructions, and (2) PACKED unimplemented data format instructions	#
1144 # also of opclasses 0,2, or 3.						#
1145 #	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1146 # operand from the fsave state frame and the dst operand (if dyadic)	#
1147 # from the FP register file. The instruction is then emulated by	#
1148 # choosing an emulation routine from a table of routines indexed by	#
1149 # instruction type. Once the instruction has been emulated and result	#
1150 # saved, then we check to see if any enabled exceptions resulted from	#
1151 # instruction emulation. If none, then we exit through the "callout"	#
1152 # _fpsp_done(). If there is an enabled FP exception, then we insert	#
1153 # this exception into the FPU in the fsave state frame and then exit	#
1154 # through _fpsp_done().							#
1155 #	PACKED opclass 0 and 2 is similar in how the instruction is	#
1156 # emulated and exceptions handled. The differences occur in how the	#
1157 # handler loads the packed op (by calling get_packed() routine) and	#
1158 # by the fact that a Trace exception could be pending for PACKED ops.	#
1159 # If a Trace exception is pending, then the current exception stack	#
1160 # frame is changed to a Trace exception stack frame and an exit is	#
1161 # made through _real_trace().						#
1162 #	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1163 # performed by calling the routine fout(). If no exception should occur	#
1164 # as the result of emulation, then an exit either occurs through	#
1165 # _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1166 # (a Trace stack frame must be created here, too). If an FP exception	#
1167 # should occur, then we must create an exception stack frame of that	#
1168 # type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1169 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1170 # emulation is performed in a similar manner.				#
1171 #									#
1172 #########################################################################
1173 
1174 #
1175 # (1) DENORM and UNNORM (unimplemented) data types:
1176 #
1177 #				post-instruction
1178 #				*****************
1179 #				*      EA	*
1180 #	 pre-instruction	*		*
1181 #	*****************	*****************
1182 #	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1183 #	*****************	*****************
1184 #	*     Next	*	*     Next	*
1185 #	*      PC	*	*      PC	*
1186 #	*****************	*****************
1187 #	*      SR	*	*      SR	*
1188 #	*****************	*****************
1189 #
1190 # (2) PACKED format (unsupported) opclasses two and three:
1191 #	*****************
1192 #	*      EA	*
1193 #	*		*
1194 #	*****************
1195 #	* 0x2 *  0x0dc	*
1196 #	*****************
1197 #	*     Next	*
1198 #	*      PC	*
1199 #	*****************
1200 #	*      SR	*
1201 #	*****************
1202 #
1203 	global		_fpsp_unsupp
1204 _fpsp_unsupp:
1205 
1206 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1207 
1208 	fsave		FP_SRC(%a6)		# save fp state
1209 
1210 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1211 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1212 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1213 
1214 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1215 	bne.b		fu_s
1216 fu_u:
1217 	mov.l		%usp,%a0		# fetch user stack pointer
1218 	mov.l		%a0,EXC_A7(%a6)		# save on stack
1219 	bra.b		fu_cont
1220 # if the exception is an opclass zero or two unimplemented data type
1221 # exception, then the a7' calculated here is wrong since it doesn't
1222 # stack an ea. however, we don't need an a7' for this case anyways.
1223 fu_s:
1224 	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1225 	mov.l		%a0,EXC_A7(%a6)		# save on stack
1226 
1227 fu_cont:
1228 
1229 # the FPIAR holds the "current PC" of the faulting instruction
1230 # the FPIAR should be set correctly for ALL exceptions passing through
1231 # this point.
1232 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1233 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1234 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1235 	bsr.l		_imem_read_long		# fetch the instruction words
1236 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1237 
1238 ############################
1239 
1240 	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1241 
1242 # Separate opclass three (fpn-to-mem) ops since they have a different
1243 # stack frame and protocol.
1244 	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1245 	bne.w		fu_out			# yes
1246 
1247 # Separate packed opclass two instructions.
1248 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1249 	cmpi.b		%d0,&0x13
1250 	beq.w		fu_in_pack
1251 
1252 
1253 # I'm not sure at this point what FPSR bits are valid for this instruction.
1254 # so, since the emulation routines re-create them anyways, zero exception field
1255 	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1256 
1257 	fmov.l		&0x0,%fpcr		# zero current control regs
1258 	fmov.l		&0x0,%fpsr
1259 
1260 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1261 # precision format if the src format was single or double and the
1262 # source data type was an INF, NAN, DENORM, or UNNORM
1263 	lea		FP_SRC(%a6),%a0		# pass ptr to input
1264 	bsr.l		fix_skewed_ops
1265 
1266 # we don't know whether the src operand or the dst operand (or both) is the
1267 # UNNORM or DENORM. call the function that tags the operand type. if the
1268 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1269 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1270 	bsr.l		set_tag_x		# tag the operand type
1271 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1272 	bne.b		fu_op2			# no
1273 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1274 
1275 fu_op2:
1276 	mov.b		%d0,STAG(%a6)		# save src optype tag
1277 
1278 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1279 
1280 # bit five of the fp extension word separates the monadic and dyadic operations
1281 # at this point
1282 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1283 	beq.b		fu_extract		# monadic
1284 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1285 	beq.b		fu_extract		# yes, so it's monadic, too
1286 
1287 	bsr.l		load_fpn2		# load dst into FP_DST
1288 
1289 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1290 	bsr.l		set_tag_x		# tag the operand type
1291 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1292 	bne.b		fu_op2_done		# no
1293 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1294 fu_op2_done:
1295 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1296 
1297 fu_extract:
1298 	clr.l		%d0
1299 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1300 
1301 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1302 
1303 	lea		FP_SRC(%a6),%a0
1304 	lea		FP_DST(%a6),%a1
1305 
1306 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1307 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1308 
1309 #
1310 # Exceptions in order of precedence:
1311 #	BSUN	: none
1312 #	SNAN	: all dyadic ops
1313 #	OPERR	: fsqrt(-NORM)
1314 #	OVFL	: all except ftst,fcmp
1315 #	UNFL	: all except ftst,fcmp
1316 #	DZ	: fdiv
1317 #	INEX2	: all except ftst,fcmp
1318 #	INEX1	: none (packed doesn't go through here)
1319 #
1320 
1321 # we determine the highest priority exception(if any) set by the
1322 # emulation routine that has also been enabled by the user.
1323 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1324 	bne.b		fu_in_ena		# some are enabled
1325 
1326 fu_in_cont:
1327 # fcmp and ftst do not store any result.
1328 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1329 	andi.b		&0x38,%d0		# extract bits 3-5
1330 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1331 	beq.b		fu_in_exit		# yes
1332 
1333 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1334 	bsr.l		store_fpreg		# store the result
1335 
1336 fu_in_exit:
1337 
1338 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1339 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1340 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1341 
1342 	unlk		%a6
1343 
1344 	bra.l		_fpsp_done
1345 
1346 fu_in_ena:
1347 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1348 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1349 	bne.b		fu_in_exc		# there is at least one set
1350 
1351 #
1352 # No exceptions occurred that were also enabled. Now:
1353 #
1354 #	if (OVFL && ovfl_disabled && inexact_enabled) {
1355 #	    branch to _real_inex() (even if the result was exact!);
1356 #	} else {
1357 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1358 #	    return;
1359 #	}
1360 #
1361 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1362 	beq.b		fu_in_cont		# no
1363 
1364 fu_in_ovflchk:
1365 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1366 	beq.b		fu_in_cont		# no
1367 	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1368 
1369 #
1370 # An exception occurred and that exception was enabled:
1371 #
1372 #	shift enabled exception field into lo byte of d0;
1373 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1374 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1375 #		/*
1376 #		 * this is the case where we must call _real_inex() now or else
1377 #		 * there will be no other way to pass it the exceptional operand
1378 #		 */
1379 #		call _real_inex();
1380 #	} else {
1381 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1382 #	}
1383 #
1384 fu_in_exc:
1385 	subi.l		&24,%d0			# fix offset to be 0-8
1386 	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1387 	bne.b		fu_in_exc_exit		# no
1388 
1389 # the enabled exception was inexact
1390 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1391 	bne.w		fu_in_exc_unfl		# yes
1392 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1393 	bne.w		fu_in_exc_ovfl		# yes
1394 
1395 # here, we insert the correct fsave status value into the fsave frame for the
1396 # corresponding exception. the operand in the fsave frame should be the original
1397 # src operand.
1398 fu_in_exc_exit:
1399 	mov.l		%d0,-(%sp)		# save d0
1400 	bsr.l		funimp_skew		# skew sgl or dbl inputs
1401 	mov.l		(%sp)+,%d0		# restore d0
1402 
1403 	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1404 
1405 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1406 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1407 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1408 
1409 	frestore	FP_SRC(%a6)		# restore src op
1410 
1411 	unlk		%a6
1412 
1413 	bra.l		_fpsp_done
1414 
1415 tbl_except:
1416 	short		0xe000,0xe006,0xe004,0xe005
1417 	short		0xe003,0xe002,0xe001,0xe001
1418 
1419 fu_in_exc_unfl:
1420 	mov.w		&0x4,%d0
1421 	bra.b		fu_in_exc_exit
1422 fu_in_exc_ovfl:
1423 	mov.w		&0x03,%d0
1424 	bra.b		fu_in_exc_exit
1425 
1426 # If the input operand to this operation was opclass two and a single
1427 # or double precision denorm, inf, or nan, the operand needs to be
1428 # "corrected" in order to have the proper equivalent extended precision
1429 # number.
1430 	global		fix_skewed_ops
1431 fix_skewed_ops:
1432 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1433 	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1434 	beq.b		fso_sgl			# yes
1435 	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1436 	beq.b		fso_dbl			# yes
1437 	rts					# no
1438 
1439 fso_sgl:
1440 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1441 	andi.w		&0x7fff,%d0		# strip sign
1442 	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1443 	beq.b		fso_sgl_dnrm_zero	# yes
1444 	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1445 	beq.b		fso_infnan		# yes
1446 	rts					# no
1447 
1448 fso_sgl_dnrm_zero:
1449 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1450 	beq.b		fso_zero		# it's a skewed zero
1451 fso_sgl_dnrm:
1452 # here, we count on norm not to alter a0...
1453 	bsr.l		norm			# normalize mantissa
1454 	neg.w		%d0			# -shft amt
1455 	addi.w		&0x3f81,%d0		# adjust new exponent
1456 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1457 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1458 	rts
1459 
1460 fso_zero:
1461 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1462 	rts
1463 
1464 fso_infnan:
1465 	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1466 	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1467 	rts
1468 
1469 fso_dbl:
1470 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1471 	andi.w		&0x7fff,%d0		# strip sign
1472 	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1473 	beq.b		fso_dbl_dnrm_zero	# yes
1474 	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1475 	beq.b		fso_infnan		# yes
1476 	rts					# no
1477 
1478 fso_dbl_dnrm_zero:
1479 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1480 	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1481 	tst.l		LOCAL_LO(%a0)		# is it a zero?
1482 	beq.b		fso_zero		# yes
1483 fso_dbl_dnrm:
1484 # here, we count on norm not to alter a0...
1485 	bsr.l		norm			# normalize mantissa
1486 	neg.w		%d0			# -shft amt
1487 	addi.w		&0x3c01,%d0		# adjust new exponent
1488 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1489 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1490 	rts
1491 
1492 #################################################################
1493 
1494 # fmove out took an unimplemented data type exception.
1495 # the src operand is in FP_SRC. Call _fout() to write out the result and
1496 # to determine which exceptions, if any, to take.
1497 fu_out:
1498 
1499 # Separate packed move outs from the UNNORM and DENORM move outs.
1500 	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1501 	cmpi.b		%d0,&0x3
1502 	beq.w		fu_out_pack
1503 	cmpi.b		%d0,&0x7
1504 	beq.w		fu_out_pack
1505 
1506 
1507 # I'm not sure at this point what FPSR bits are valid for this instruction.
1508 # so, since the emulation routines re-create them anyways, zero exception field.
1509 # fmove out doesn't affect ccodes.
1510 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1511 
1512 	fmov.l		&0x0,%fpcr		# zero current control regs
1513 	fmov.l		&0x0,%fpsr
1514 
1515 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1516 # call here. just figure out what it is...
1517 	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1518 	andi.w		&0x7fff,%d0		# strip sign
1519 	beq.b		fu_out_denorm		# it's a DENORM
1520 
1521 	lea		FP_SRC(%a6),%a0
1522 	bsr.l		unnorm_fix		# yes; fix it
1523 
1524 	mov.b		%d0,STAG(%a6)
1525 
1526 	bra.b		fu_out_cont
1527 fu_out_denorm:
1528 	mov.b		&DENORM,STAG(%a6)
1529 fu_out_cont:
1530 
1531 	clr.l		%d0
1532 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1533 
1534 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1535 
1536 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1537 	bsr.l		fout			# call fmove out routine
1538 
1539 # Exceptions in order of precedence:
1540 #	BSUN	: none
1541 #	SNAN	: none
1542 #	OPERR	: fmove.{b,w,l} out of large UNNORM
1543 #	OVFL	: fmove.{s,d}
1544 #	UNFL	: fmove.{s,d,x}
1545 #	DZ	: none
1546 #	INEX2	: all
1547 #	INEX1	: none (packed doesn't travel through here)
1548 
1549 # determine the highest priority exception(if any) set by the
1550 # emulation routine that has also been enabled by the user.
1551 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1552 	bne.w		fu_out_ena		# some are enabled
1553 
1554 fu_out_done:
1555 
1556 	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1557 
1558 # on extended precision opclass three instructions using pre-decrement or
1559 # post-increment addressing mode, the address register is not updated. is the
1560 # address register was the stack pointer used from user mode, then let's update
1561 # it here. if it was used from supervisor mode, then we have to handle this
1562 # as a special case.
1563 	btst		&0x5,EXC_SR(%a6)
1564 	bne.b		fu_out_done_s
1565 
1566 	mov.l		EXC_A7(%a6),%a0		# restore a7
1567 	mov.l		%a0,%usp
1568 
1569 fu_out_done_cont:
1570 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1571 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1572 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1573 
1574 	unlk		%a6
1575 
1576 	btst		&0x7,(%sp)		# is trace on?
1577 	bne.b		fu_out_trace		# yes
1578 
1579 	bra.l		_fpsp_done
1580 
1581 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1582 # ("fmov.x fpm,-(a7)") if so,
1583 fu_out_done_s:
1584 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1585 	bne.b		fu_out_done_cont
1586 
1587 # the extended precision result is still in fp0. but, we need to save it
1588 # somewhere on the stack until we can copy it to its final resting place.
1589 # here, we're counting on the top of the stack to be the old place-holders
1590 # for fp0/fp1 which have already been restored. that way, we can write
1591 # over those destinations with the shifted stack frame.
1592 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1593 
1594 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1595 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1596 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1597 
1598 	mov.l		(%a6),%a6		# restore frame pointer
1599 
1600 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1601 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1602 
1603 # now, copy the result to the proper place on the stack
1604 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1605 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1606 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1607 
1608 	add.l		&LOCAL_SIZE-0x8,%sp
1609 
1610 	btst		&0x7,(%sp)
1611 	bne.b		fu_out_trace
1612 
1613 	bra.l		_fpsp_done
1614 
1615 fu_out_ena:
1616 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1617 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1618 	bne.b		fu_out_exc		# there is at least one set
1619 
1620 # no exceptions were set.
1621 # if a disabled overflow occurred and inexact was enabled but the result
1622 # was exact, then a branch to _real_inex() is made.
1623 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1624 	beq.w		fu_out_done		# no
1625 
1626 fu_out_ovflchk:
1627 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1628 	beq.w		fu_out_done		# no
1629 	bra.w		fu_inex			# yes
1630 
1631 #
1632 # The fp move out that took the "Unimplemented Data Type" exception was
1633 # being traced. Since the stack frames are similar, get the "current" PC
1634 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1635 #
1636 #		  UNSUPP FRAME		   TRACE FRAME
1637 #		*****************	*****************
1638 #		*      EA	*	*    Current	*
1639 #		*		*	*      PC	*
1640 #		*****************	*****************
1641 #		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1642 #		*****************	*****************
1643 #		*     Next	*	*     Next	*
1644 #		*      PC	*	*      PC	*
1645 #		*****************	*****************
1646 #		*      SR	*	*      SR	*
1647 #		*****************	*****************
1648 #
1649 fu_out_trace:
1650 	mov.w		&0x2024,0x6(%sp)
1651 	fmov.l		%fpiar,0x8(%sp)
1652 	bra.l		_real_trace
1653 
1654 # an exception occurred and that exception was enabled.
1655 fu_out_exc:
1656 	subi.l		&24,%d0			# fix offset to be 0-8
1657 
1658 # we don't mess with the existing fsave frame. just re-insert it and
1659 # jump to the "_real_{}()" handler...
1660 	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1661 	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1662 
1663 	swbeg		&0x8
1664 tbl_fu_out:
1665 	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1666 	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1667 	short		fu_operr	- tbl_fu_out	# OPERR
1668 	short		fu_ovfl		- tbl_fu_out	# OVFL
1669 	short		fu_unfl		- tbl_fu_out	# UNFL
1670 	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1671 	short		fu_inex		- tbl_fu_out	# INEX2
1672 	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1673 
1674 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1675 # frestore it.
1676 fu_snan:
1677 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1678 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1679 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1680 
1681 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1682 	mov.w		&0xe006,2+FP_SRC(%a6)
1683 
1684 	frestore	FP_SRC(%a6)
1685 
1686 	unlk		%a6
1687 
1688 
1689 	bra.l		_real_snan
1690 
1691 fu_operr:
1692 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1693 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1694 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1695 
1696 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1697 	mov.w		&0xe004,2+FP_SRC(%a6)
1698 
1699 	frestore	FP_SRC(%a6)
1700 
1701 	unlk		%a6
1702 
1703 
1704 	bra.l		_real_operr
1705 
1706 fu_ovfl:
1707 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1708 
1709 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1710 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1711 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1712 
1713 	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1714 	mov.w		&0xe005,2+FP_SRC(%a6)
1715 
1716 	frestore	FP_SRC(%a6)		# restore EXOP
1717 
1718 	unlk		%a6
1719 
1720 	bra.l		_real_ovfl
1721 
1722 # underflow can happen for extended precision. extended precision opclass
1723 # three instruction exceptions don't update the stack pointer. so, if the
1724 # exception occurred from user mode, then simply update a7 and exit normally.
1725 # if the exception occurred from supervisor mode, check if
1726 fu_unfl:
1727 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1728 
1729 	btst		&0x5,EXC_SR(%a6)
1730 	bne.w		fu_unfl_s
1731 
1732 	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1733 	mov.l		%a0,%usp		# to or not...
1734 
1735 fu_unfl_cont:
1736 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1737 
1738 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1739 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1740 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1741 
1742 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1743 	mov.w		&0xe003,2+FP_SRC(%a6)
1744 
1745 	frestore	FP_SRC(%a6)		# restore EXOP
1746 
1747 	unlk		%a6
1748 
1749 	bra.l		_real_unfl
1750 
1751 fu_unfl_s:
1752 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1753 	bne.b		fu_unfl_cont
1754 
1755 # the extended precision result is still in fp0. but, we need to save it
1756 # somewhere on the stack until we can copy it to its final resting place
1757 # (where the exc frame is currently). make sure it's not at the top of the
1758 # frame or it will get overwritten when the exc stack frame is shifted "down".
1759 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1760 	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1761 
1762 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1763 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1764 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1765 
1766 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1767 	mov.w		&0xe003,2+FP_DST(%a6)
1768 
1769 	frestore	FP_DST(%a6)		# restore EXOP
1770 
1771 	mov.l		(%a6),%a6		# restore frame pointer
1772 
1773 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1774 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1775 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1776 
1777 # now, copy the result to the proper place on the stack
1778 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1779 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1780 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1781 
1782 	add.l		&LOCAL_SIZE-0x8,%sp
1783 
1784 	bra.l		_real_unfl
1785 
1786 # fmove in and out enter here.
1787 fu_inex:
1788 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1789 
1790 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1791 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1792 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1793 
1794 	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1795 	mov.w		&0xe001,2+FP_SRC(%a6)
1796 
1797 	frestore	FP_SRC(%a6)		# restore EXOP
1798 
1799 	unlk		%a6
1800 
1801 
1802 	bra.l		_real_inex
1803 
1804 #########################################################################
1805 #########################################################################
1806 fu_in_pack:
1807 
1808 
1809 # I'm not sure at this point what FPSR bits are valid for this instruction.
1810 # so, since the emulation routines re-create them anyways, zero exception field
1811 	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1812 
1813 	fmov.l		&0x0,%fpcr		# zero current control regs
1814 	fmov.l		&0x0,%fpsr
1815 
1816 	bsr.l		get_packed		# fetch packed src operand
1817 
1818 	lea		FP_SRC(%a6),%a0		# pass ptr to src
1819 	bsr.l		set_tag_x		# set src optype tag
1820 
1821 	mov.b		%d0,STAG(%a6)		# save src optype tag
1822 
1823 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1824 
1825 # bit five of the fp extension word separates the monadic and dyadic operations
1826 # at this point
1827 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1828 	beq.b		fu_extract_p		# monadic
1829 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1830 	beq.b		fu_extract_p		# yes, so it's monadic, too
1831 
1832 	bsr.l		load_fpn2		# load dst into FP_DST
1833 
1834 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1835 	bsr.l		set_tag_x		# tag the operand type
1836 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1837 	bne.b		fu_op2_done_p		# no
1838 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1839 fu_op2_done_p:
1840 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1841 
1842 fu_extract_p:
1843 	clr.l		%d0
1844 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1845 
1846 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1847 
1848 	lea		FP_SRC(%a6),%a0
1849 	lea		FP_DST(%a6),%a1
1850 
1851 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1852 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1853 
1854 #
1855 # Exceptions in order of precedence:
1856 #	BSUN	: none
1857 #	SNAN	: all dyadic ops
1858 #	OPERR	: fsqrt(-NORM)
1859 #	OVFL	: all except ftst,fcmp
1860 #	UNFL	: all except ftst,fcmp
1861 #	DZ	: fdiv
1862 #	INEX2	: all except ftst,fcmp
1863 #	INEX1	: all
1864 #
1865 
1866 # we determine the highest priority exception(if any) set by the
1867 # emulation routine that has also been enabled by the user.
1868 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1869 	bne.w		fu_in_ena_p		# some are enabled
1870 
1871 fu_in_cont_p:
1872 # fcmp and ftst do not store any result.
1873 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1874 	andi.b		&0x38,%d0		# extract bits 3-5
1875 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1876 	beq.b		fu_in_exit_p		# yes
1877 
1878 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1879 	bsr.l		store_fpreg		# store the result
1880 
1881 fu_in_exit_p:
1882 
1883 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1884 	bne.w		fu_in_exit_s_p		# supervisor
1885 
1886 	mov.l		EXC_A7(%a6),%a0		# update user a7
1887 	mov.l		%a0,%usp
1888 
1889 fu_in_exit_cont_p:
1890 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1891 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1892 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1893 
1894 	unlk		%a6			# unravel stack frame
1895 
1896 	btst		&0x7,(%sp)		# is trace on?
1897 	bne.w		fu_trace_p		# yes
1898 
1899 	bra.l		_fpsp_done		# exit to os
1900 
1901 # the exception occurred in supervisor mode. check to see if the
1902 # addressing mode was (a7)+. if so, we'll need to shift the
1903 # stack frame "up".
1904 fu_in_exit_s_p:
1905 	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1906 	beq.b		fu_in_exit_cont_p	# no
1907 
1908 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1909 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1910 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1911 
1912 	unlk		%a6			# unravel stack frame
1913 
1914 # shift the stack frame "up". we don't really care about the <ea> field.
1915 	mov.l		0x4(%sp),0x10(%sp)
1916 	mov.l		0x0(%sp),0xc(%sp)
1917 	add.l		&0xc,%sp
1918 
1919 	btst		&0x7,(%sp)		# is trace on?
1920 	bne.w		fu_trace_p		# yes
1921 
1922 	bra.l		_fpsp_done		# exit to os
1923 
1924 fu_in_ena_p:
1925 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1926 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1927 	bne.b		fu_in_exc_p		# at least one was set
1928 
1929 #
1930 # No exceptions occurred that were also enabled. Now:
1931 #
1932 #	if (OVFL && ovfl_disabled && inexact_enabled) {
1933 #	    branch to _real_inex() (even if the result was exact!);
1934 #	} else {
1935 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1936 #	    return;
1937 #	}
1938 #
1939 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1940 	beq.w		fu_in_cont_p		# no
1941 
1942 fu_in_ovflchk_p:
1943 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1944 	beq.w		fu_in_cont_p		# no
1945 	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1946 
1947 #
1948 # An exception occurred and that exception was enabled:
1949 #
1950 #	shift enabled exception field into lo byte of d0;
1951 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1952 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1953 #		/*
1954 #		 * this is the case where we must call _real_inex() now or else
1955 #		 * there will be no other way to pass it the exceptional operand
1956 #		 */
1957 #		call _real_inex();
1958 #	} else {
1959 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1960 #	}
1961 #
1962 fu_in_exc_p:
1963 	subi.l		&24,%d0			# fix offset to be 0-8
1964 	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1965 	blt.b		fu_in_exc_exit_p	# no
1966 
1967 # the enabled exception was inexact
1968 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1969 	bne.w		fu_in_exc_unfl_p	# yes
1970 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1971 	bne.w		fu_in_exc_ovfl_p	# yes
1972 
1973 # here, we insert the correct fsave status value into the fsave frame for the
1974 # corresponding exception. the operand in the fsave frame should be the original
1975 # src operand.
1976 # as a reminder for future predicted pain and agony, we are passing in fsave the
1977 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1978 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1979 fu_in_exc_exit_p:
1980 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1981 	bne.w		fu_in_exc_exit_s_p	# supervisor
1982 
1983 	mov.l		EXC_A7(%a6),%a0		# update user a7
1984 	mov.l		%a0,%usp
1985 
1986 fu_in_exc_exit_cont_p:
1987 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1988 
1989 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1990 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1991 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1992 
1993 	frestore	FP_SRC(%a6)		# restore src op
1994 
1995 	unlk		%a6
1996 
1997 	btst		&0x7,(%sp)		# is trace enabled?
1998 	bne.w		fu_trace_p		# yes
1999 
2000 	bra.l		_fpsp_done
2001 
2002 tbl_except_p:
2003 	short		0xe000,0xe006,0xe004,0xe005
2004 	short		0xe003,0xe002,0xe001,0xe001
2005 
2006 fu_in_exc_ovfl_p:
2007 	mov.w		&0x3,%d0
2008 	bra.w		fu_in_exc_exit_p
2009 
2010 fu_in_exc_unfl_p:
2011 	mov.w		&0x4,%d0
2012 	bra.w		fu_in_exc_exit_p
2013 
2014 fu_in_exc_exit_s_p:
2015 	btst		&mia7_bit,SPCOND_FLG(%a6)
2016 	beq.b		fu_in_exc_exit_cont_p
2017 
2018 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2019 
2020 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2021 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2022 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2023 
2024 	frestore	FP_SRC(%a6)		# restore src op
2025 
2026 	unlk		%a6			# unravel stack frame
2027 
2028 # shift stack frame "up". who cares about <ea> field.
2029 	mov.l		0x4(%sp),0x10(%sp)
2030 	mov.l		0x0(%sp),0xc(%sp)
2031 	add.l		&0xc,%sp
2032 
2033 	btst		&0x7,(%sp)		# is trace on?
2034 	bne.b		fu_trace_p		# yes
2035 
2036 	bra.l		_fpsp_done		# exit to os
2037 
2038 #
2039 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2040 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2041 # trace stack frame then jump to _real_trace().
2042 #
2043 #		  UNSUPP FRAME		   TRACE FRAME
2044 #		*****************	*****************
2045 #		*      EA	*	*    Current	*
2046 #		*		*	*      PC	*
2047 #		*****************	*****************
2048 #		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2049 #		*****************	*****************
2050 #		*     Next	*	*     Next	*
2051 #		*      PC	*	*      PC	*
2052 #		*****************	*****************
2053 #		*      SR	*	*      SR	*
2054 #		*****************	*****************
2055 fu_trace_p:
2056 	mov.w		&0x2024,0x6(%sp)
2057 	fmov.l		%fpiar,0x8(%sp)
2058 
2059 	bra.l		_real_trace
2060 
2061 #########################################################
2062 #########################################################
2063 fu_out_pack:
2064 
2065 
2066 # I'm not sure at this point what FPSR bits are valid for this instruction.
2067 # so, since the emulation routines re-create them anyways, zero exception field.
2068 # fmove out doesn't affect ccodes.
2069 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2070 
2071 	fmov.l		&0x0,%fpcr		# zero current control regs
2072 	fmov.l		&0x0,%fpsr
2073 
2074 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2075 	bsr.l		load_fpn1
2076 
2077 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2078 # able to detect all operand types.
2079 	lea		FP_SRC(%a6),%a0
2080 	bsr.l		set_tag_x		# tag the operand type
2081 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2082 	bne.b		fu_op2_p		# no
2083 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2084 
2085 fu_op2_p:
2086 	mov.b		%d0,STAG(%a6)		# save src optype tag
2087 
2088 	clr.l		%d0
2089 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2090 
2091 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2092 
2093 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2094 	bsr.l		fout			# call fmove out routine
2095 
2096 # Exceptions in order of precedence:
2097 #	BSUN	: no
2098 #	SNAN	: yes
2099 #	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2100 #	OVFL	: no
2101 #	UNFL	: no
2102 #	DZ	: no
2103 #	INEX2	: yes
2104 #	INEX1	: no
2105 
2106 # determine the highest priority exception(if any) set by the
2107 # emulation routine that has also been enabled by the user.
2108 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2109 	bne.w		fu_out_ena_p		# some are enabled
2110 
2111 fu_out_exit_p:
2112 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2113 
2114 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2115 	bne.b		fu_out_exit_s_p		# supervisor
2116 
2117 	mov.l		EXC_A7(%a6),%a0		# update user a7
2118 	mov.l		%a0,%usp
2119 
2120 fu_out_exit_cont_p:
2121 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2122 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2123 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2124 
2125 	unlk		%a6			# unravel stack frame
2126 
2127 	btst		&0x7,(%sp)		# is trace on?
2128 	bne.w		fu_trace_p		# yes
2129 
2130 	bra.l		_fpsp_done		# exit to os
2131 
2132 # the exception occurred in supervisor mode. check to see if the
2133 # addressing mode was -(a7). if so, we'll need to shift the
2134 # stack frame "down".
2135 fu_out_exit_s_p:
2136 	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2137 	beq.b		fu_out_exit_cont_p	# no
2138 
2139 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2140 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2141 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2142 
2143 	mov.l		(%a6),%a6		# restore frame pointer
2144 
2145 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2146 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2147 
2148 # now, copy the result to the proper place on the stack
2149 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2150 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2151 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2152 
2153 	add.l		&LOCAL_SIZE-0x8,%sp
2154 
2155 	btst		&0x7,(%sp)
2156 	bne.w		fu_trace_p
2157 
2158 	bra.l		_fpsp_done
2159 
2160 fu_out_ena_p:
2161 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2162 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2163 	beq.w		fu_out_exit_p
2164 
2165 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2166 
2167 # an exception occurred and that exception was enabled.
2168 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2169 fu_out_exc_p:
2170 	cmpi.b		%d0,&0x1a
2171 	bgt.w		fu_inex_p2
2172 	beq.w		fu_operr_p
2173 
2174 fu_snan_p:
2175 	btst		&0x5,EXC_SR(%a6)
2176 	bne.b		fu_snan_s_p
2177 
2178 	mov.l		EXC_A7(%a6),%a0
2179 	mov.l		%a0,%usp
2180 	bra.w		fu_snan
2181 
2182 fu_snan_s_p:
2183 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2184 	bne.w		fu_snan
2185 
2186 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2187 # the strategy is to move the exception frame "down" 12 bytes. then, we
2188 # can store the default result where the exception frame was.
2189 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2190 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2191 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2192 
2193 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2194 	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2195 
2196 	frestore	FP_SRC(%a6)		# restore src operand
2197 
2198 	mov.l		(%a6),%a6		# restore frame pointer
2199 
2200 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2201 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2202 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2203 
2204 # now, we copy the default result to its proper location
2205 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2206 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2207 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2208 
2209 	add.l		&LOCAL_SIZE-0x8,%sp
2210 
2211 
2212 	bra.l		_real_snan
2213 
2214 fu_operr_p:
2215 	btst		&0x5,EXC_SR(%a6)
2216 	bne.w		fu_operr_p_s
2217 
2218 	mov.l		EXC_A7(%a6),%a0
2219 	mov.l		%a0,%usp
2220 	bra.w		fu_operr
2221 
2222 fu_operr_p_s:
2223 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2224 	bne.w		fu_operr
2225 
2226 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2227 # the strategy is to move the exception frame "down" 12 bytes. then, we
2228 # can store the default result where the exception frame was.
2229 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2230 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2231 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2232 
2233 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2234 	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2235 
2236 	frestore	FP_SRC(%a6)		# restore src operand
2237 
2238 	mov.l		(%a6),%a6		# restore frame pointer
2239 
2240 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2241 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2242 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2243 
2244 # now, we copy the default result to its proper location
2245 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2246 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2247 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2248 
2249 	add.l		&LOCAL_SIZE-0x8,%sp
2250 
2251 
2252 	bra.l		_real_operr
2253 
2254 fu_inex_p2:
2255 	btst		&0x5,EXC_SR(%a6)
2256 	bne.w		fu_inex_s_p2
2257 
2258 	mov.l		EXC_A7(%a6),%a0
2259 	mov.l		%a0,%usp
2260 	bra.w		fu_inex
2261 
2262 fu_inex_s_p2:
2263 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2264 	bne.w		fu_inex
2265 
2266 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2267 # the strategy is to move the exception frame "down" 12 bytes. then, we
2268 # can store the default result where the exception frame was.
2269 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2270 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2271 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2272 
2273 	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2274 	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2275 
2276 	frestore	FP_SRC(%a6)		# restore src operand
2277 
2278 	mov.l		(%a6),%a6		# restore frame pointer
2279 
2280 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2281 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2282 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2283 
2284 # now, we copy the default result to its proper location
2285 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2286 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2287 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2288 
2289 	add.l		&LOCAL_SIZE-0x8,%sp
2290 
2291 
2292 	bra.l		_real_inex
2293 
2294 #########################################################################
2295 
2296 #
2297 # if we're stuffing a source operand back into an fsave frame then we
2298 # have to make sure that for single or double source operands that the
2299 # format stuffed is as weird as the hardware usually makes it.
2300 #
2301 	global		funimp_skew
2302 funimp_skew:
2303 	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2304 	cmpi.b		%d0,&0x1		# was src sgl?
2305 	beq.b		funimp_skew_sgl		# yes
2306 	cmpi.b		%d0,&0x5		# was src dbl?
2307 	beq.b		funimp_skew_dbl		# yes
2308 	rts
2309 
2310 funimp_skew_sgl:
2311 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2312 	andi.w		&0x7fff,%d0		# strip sign
2313 	beq.b		funimp_skew_sgl_not
2314 	cmpi.w		%d0,&0x3f80
2315 	bgt.b		funimp_skew_sgl_not
2316 	neg.w		%d0			# make exponent negative
2317 	addi.w		&0x3f81,%d0		# find amt to shift
2318 	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2319 	lsr.l		%d0,%d1			# shift it
2320 	bset		&31,%d1			# set j-bit
2321 	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2322 	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2323 	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2324 funimp_skew_sgl_not:
2325 	rts
2326 
2327 funimp_skew_dbl:
2328 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2329 	andi.w		&0x7fff,%d0		# strip sign
2330 	beq.b		funimp_skew_dbl_not
2331 	cmpi.w		%d0,&0x3c00
2332 	bgt.b		funimp_skew_dbl_not
2333 
2334 	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2335 	smi.b		0x2+FP_SRC(%a6)
2336 	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2337 	clr.l		%d0			# clear g,r,s
2338 	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2339 	mov.w		&0x3c01,%d1		# pass denorm threshold
2340 	bsr.l		dnrm_lp			# denorm it
2341 	mov.w		&0x3c00,%d0		# new exponent
2342 	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2343 	beq.b		fss_dbl_denorm_done	# no
2344 	bset		&15,%d0			# set sign
2345 fss_dbl_denorm_done:
2346 	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2347 	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2348 funimp_skew_dbl_not:
2349 	rts
2350 
2351 #########################################################################
2352 	global		_mem_write2
2353 _mem_write2:
2354 	btst		&0x5,EXC_SR(%a6)
2355 	beq.l		_dmem_write
2356 	mov.l		0x0(%a0),FP_DST_EX(%a6)
2357 	mov.l		0x4(%a0),FP_DST_HI(%a6)
2358 	mov.l		0x8(%a0),FP_DST_LO(%a6)
2359 	clr.l		%d1
2360 	rts
2361 
2362 #########################################################################
2363 # XDEF ****************************************************************	#
2364 #	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2365 #			effective address" exception.			#
2366 #									#
2367 #	This handler should be the first code executed upon taking the	#
2368 #	FP Unimplemented Effective Address exception in an operating	#
2369 #	system.								#
2370 #									#
2371 # XREF ****************************************************************	#
2372 #	_imem_read_long() - read instruction longword			#
2373 #	fix_skewed_ops() - adjust src operand in fsave frame		#
2374 #	set_tag_x() - determine optype of src/dst operands		#
2375 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2376 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2377 #	load_fpn2() - load dst operand from FP regfile			#
2378 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2379 #	decbin() - convert packed data to FP binary data		#
2380 #	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2381 #	_real_access() - "callout" for access error exception		#
2382 #	_mem_read() - read extended immediate operand from memory	#
2383 #	_fpsp_done() - "callout" for exit; work all done		#
2384 #	_real_trace() - "callout" for Trace enabled exception		#
2385 #	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2386 #	fmovm_ctrl() - emulate fmovm control instruction		#
2387 #									#
2388 # INPUT ***************************************************************	#
2389 #	- The system stack contains the "Unimplemented <ea>" stk frame	#
2390 #									#
2391 # OUTPUT **************************************************************	#
2392 #	If access error:						#
2393 #	- The system stack is changed to an access error stack frame	#
2394 #	If FPU disabled:						#
2395 #	- The system stack is changed to an FPU disabled stack frame	#
2396 #	If Trace exception enabled:					#
2397 #	- The system stack is changed to a Trace exception stack frame	#
2398 #	Else: (normal case)						#
2399 #	- None (correct result has been stored as appropriate)		#
2400 #									#
2401 # ALGORITHM ***********************************************************	#
2402 #	This exception handles 3 types of operations:			#
2403 # (1) FP Instructions using extended precision or packed immediate	#
2404 #     addressing mode.							#
2405 # (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2406 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2407 #									#
2408 #	For immediate data operations, the data is read in w/ a		#
2409 # _mem_read() "callout", converted to FP binary (if packed), and used	#
2410 # as the source operand to the instruction specified by the instruction	#
2411 # word. If no FP exception should be reported ads a result of the	#
2412 # emulation, then the result is stored to the destination register and	#
2413 # the handler exits through _fpsp_done(). If an enabled exc has been	#
2414 # signalled as a result of emulation, then an fsave state frame		#
2415 # corresponding to the FP exception type must be entered into the 060	#
2416 # FPU before exiting. In either the enabled or disabled cases, we	#
2417 # must also check if a Trace exception is pending, in which case, we	#
2418 # must create a Trace exception stack frame from the current exception	#
2419 # stack frame. If no Trace is pending, we simply exit through		#
2420 # _fpsp_done().								#
2421 #	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2422 # decode and emulate the instruction. No FP exceptions can be pending	#
2423 # as a result of this operation emulation. A Trace exception can be	#
2424 # pending, though, which means the current stack frame must be changed	#
2425 # to a Trace stack frame and an exit made through _real_trace().	#
2426 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2427 # was executed from supervisor mode, this handler must store the FP	#
2428 # register file values to the system stack by itself since		#
2429 # fmovm_dynamic() can't handle this. A normal exit is made through	#
2430 # fpsp_done().								#
2431 #	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2432 # Again, a Trace exception may be pending and an exit made through	#
2433 # _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2434 #									#
2435 #	Before any of the above is attempted, it must be checked to	#
2436 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2437 # before the "FPU disabled" exception, but the "FPU disabled" exception	#
2438 # has higher priority, we check the disabled bit in the PCR. If set,	#
2439 # then we must create an 8 word "FPU disabled" exception stack frame	#
2440 # from the current 4 word exception stack frame. This includes		#
2441 # reproducing the effective address of the instruction to put on the	#
2442 # new stack frame.							#
2443 #									#
2444 #	In the process of all emulation work, if a _mem_read()		#
2445 # "callout" returns a failing result indicating an access error, then	#
2446 # we must create an access error stack frame from the current stack	#
2447 # frame. This information includes a faulting address and a fault-	#
2448 # status-longword. These are created within this handler.		#
2449 #									#
2450 #########################################################################
2451 
2452 	global		_fpsp_effadd
2453 _fpsp_effadd:
2454 
2455 # This exception type takes priority over the "Line F Emulator"
2456 # exception. Therefore, the FPU could be disabled when entering here.
2457 # So, we must check to see if it's disabled and handle that case separately.
2458 	mov.l		%d0,-(%sp)		# save d0
2459 	movc		%pcr,%d0		# load proc cr
2460 	btst		&0x1,%d0		# is FPU disabled?
2461 	bne.w		iea_disabled		# yes
2462 	mov.l		(%sp)+,%d0		# restore d0
2463 
2464 	link		%a6,&-LOCAL_SIZE	# init stack frame
2465 
2466 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2467 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2468 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2469 
2470 # PC of instruction that took the exception is the PC in the frame
2471 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2472 
2473 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2474 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2475 	bsr.l		_imem_read_long		# fetch the instruction words
2476 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2477 
2478 #########################################################################
2479 
2480 	tst.w		%d0			# is operation fmovem?
2481 	bmi.w		iea_fmovm		# yes
2482 
2483 #
2484 # here, we will have:
2485 #	fabs	fdabs	fsabs		facos		fmod
2486 #	fadd	fdadd	fsadd		fasin		frem
2487 #	fcmp				fatan		fscale
2488 #	fdiv	fddiv	fsdiv		fatanh		fsin
2489 #	fint				fcos		fsincos
2490 #	fintrz				fcosh		fsinh
2491 #	fmove	fdmove	fsmove		fetox		ftan
2492 #	fmul	fdmul	fsmul		fetoxm1		ftanh
2493 #	fneg	fdneg	fsneg		fgetexp		ftentox
2494 #	fsgldiv				fgetman		ftwotox
2495 #	fsglmul				flog10
2496 #	fsqrt				flog2
2497 #	fsub	fdsub	fssub		flogn
2498 #	ftst				flognp1
2499 # which can all use f<op>.{x,p}
2500 # so, now it's immediate data extended precision AND PACKED FORMAT!
2501 #
2502 iea_op:
2503 	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2504 
2505 	btst		&0xa,%d0		# is src fmt x or p?
2506 	bne.b		iea_op_pack		# packed
2507 
2508 
2509 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2510 	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2511 	mov.l		&0xc,%d0		# pass: 12 bytes
2512 	bsr.l		_imem_read		# read extended immediate
2513 
2514 	tst.l		%d1			# did ifetch fail?
2515 	bne.w		iea_iacc		# yes
2516 
2517 	bra.b		iea_op_setsrc
2518 
2519 iea_op_pack:
2520 
2521 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2522 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2523 	mov.l		&0xc,%d0		# pass: 12 bytes
2524 	bsr.l		_imem_read		# read packed operand
2525 
2526 	tst.l		%d1			# did ifetch fail?
2527 	bne.w		iea_iacc		# yes
2528 
2529 # The packed operand is an INF or a NAN if the exponent field is all ones.
2530 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2531 	cmpi.w		%d0,&0x7fff		# INF or NAN?
2532 	beq.b		iea_op_setsrc		# operand is an INF or NAN
2533 
2534 # The packed operand is a zero if the mantissa is all zero, else it's
2535 # a normal packed op.
2536 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2537 	andi.b		&0x0f,%d0		# clear all but last nybble
2538 	bne.b		iea_op_gp_not_spec	# not a zero
2539 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2540 	bne.b		iea_op_gp_not_spec	# not a zero
2541 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2542 	beq.b		iea_op_setsrc		# operand is a ZERO
2543 iea_op_gp_not_spec:
2544 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2545 	bsr.l		decbin			# convert to extended
2546 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2547 
2548 iea_op_setsrc:
2549 	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2550 
2551 # FP_SRC now holds the src operand.
2552 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2553 	bsr.l		set_tag_x		# tag the operand type
2554 	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2555 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2556 	bne.b		iea_op_getdst		# no
2557 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2558 	mov.b		%d0,STAG(%a6)		# set new optype tag
2559 iea_op_getdst:
2560 	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2561 
2562 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2563 	beq.b		iea_op_extract		# monadic
2564 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2565 	bne.b		iea_op_spec		# yes
2566 
2567 iea_op_loaddst:
2568 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2569 	bsr.l		load_fpn2		# load dst operand
2570 
2571 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2572 	bsr.l		set_tag_x		# tag the operand type
2573 	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2574 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2575 	bne.b		iea_op_extract		# no
2576 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2577 	mov.b		%d0,DTAG(%a6)		# set new optype tag
2578 	bra.b		iea_op_extract
2579 
2580 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2581 iea_op_spec:
2582 	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2583 	beq.b		iea_op_extract		# yes
2584 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2585 # store a result. then, only fcmp will branch back and pick up a dst operand.
2586 	st		STORE_FLG(%a6)		# don't store a final result
2587 	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2588 	beq.b		iea_op_loaddst		# yes
2589 
2590 iea_op_extract:
2591 	clr.l		%d0
2592 	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2593 
2594 	mov.b		1+EXC_CMDREG(%a6),%d1
2595 	andi.w		&0x007f,%d1		# extract extension
2596 
2597 	fmov.l		&0x0,%fpcr
2598 	fmov.l		&0x0,%fpsr
2599 
2600 	lea		FP_SRC(%a6),%a0
2601 	lea		FP_DST(%a6),%a1
2602 
2603 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2604 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2605 
2606 #
2607 # Exceptions in order of precedence:
2608 #	BSUN	: none
2609 #	SNAN	: all operations
2610 #	OPERR	: all reg-reg or mem-reg operations that can normally operr
2611 #	OVFL	: same as OPERR
2612 #	UNFL	: same as OPERR
2613 #	DZ	: same as OPERR
2614 #	INEX2	: same as OPERR
2615 #	INEX1	: all packed immediate operations
2616 #
2617 
2618 # we determine the highest priority exception(if any) set by the
2619 # emulation routine that has also been enabled by the user.
2620 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2621 	bne.b		iea_op_ena		# some are enabled
2622 
2623 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2624 # these don't save results.
2625 iea_op_save:
2626 	tst.b		STORE_FLG(%a6)		# does this op store a result?
2627 	bne.b		iea_op_exit1		# exit with no frestore
2628 
2629 iea_op_store:
2630 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2631 	bsr.l		store_fpreg		# store the result
2632 
2633 iea_op_exit1:
2634 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2635 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2636 
2637 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2638 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2639 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2640 
2641 	unlk		%a6			# unravel the frame
2642 
2643 	btst		&0x7,(%sp)		# is trace on?
2644 	bne.w		iea_op_trace		# yes
2645 
2646 	bra.l		_fpsp_done		# exit to os
2647 
2648 iea_op_ena:
2649 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2650 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2651 	bne.b		iea_op_exc		# at least one was set
2652 
2653 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2654 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2655 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2656 	beq.b		iea_op_save
2657 
2658 iea_op_ovfl:
2659 	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2660 	beq.b		iea_op_store		# no
2661 	bra.b		iea_op_exc_ovfl		# yes
2662 
2663 # an enabled exception occurred. we have to insert the exception type back into
2664 # the machine.
2665 iea_op_exc:
2666 	subi.l		&24,%d0			# fix offset to be 0-8
2667 	cmpi.b		%d0,&0x6		# is exception INEX?
2668 	bne.b		iea_op_exc_force	# no
2669 
2670 # the enabled exception was inexact. so, if it occurs with an overflow
2671 # or underflow that was disabled, then we have to force an overflow or
2672 # underflow frame.
2673 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2674 	bne.b		iea_op_exc_ovfl		# yes
2675 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2676 	bne.b		iea_op_exc_unfl		# yes
2677 
2678 iea_op_exc_force:
2679 	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2680 	bra.b		iea_op_exit2		# exit with frestore
2681 
2682 tbl_iea_except:
2683 	short		0xe002, 0xe006, 0xe004, 0xe005
2684 	short		0xe003, 0xe002, 0xe001, 0xe001
2685 
2686 iea_op_exc_ovfl:
2687 	mov.w		&0xe005,2+FP_SRC(%a6)
2688 	bra.b		iea_op_exit2
2689 
2690 iea_op_exc_unfl:
2691 	mov.w		&0xe003,2+FP_SRC(%a6)
2692 
2693 iea_op_exit2:
2694 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2695 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2696 
2697 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2698 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2699 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2700 
2701 	frestore	FP_SRC(%a6)		# restore exceptional state
2702 
2703 	unlk		%a6			# unravel the frame
2704 
2705 	btst		&0x7,(%sp)		# is trace on?
2706 	bne.b		iea_op_trace		# yes
2707 
2708 	bra.l		_fpsp_done		# exit to os
2709 
2710 #
2711 # The opclass two instruction that took an "Unimplemented Effective Address"
2712 # exception was being traced. Make the "current" PC the FPIAR and put it in
2713 # the trace stack frame then jump to _real_trace().
2714 #
2715 #		 UNIMP EA FRAME		   TRACE FRAME
2716 #		*****************	*****************
2717 #		* 0x0 *  0x0f0	*	*    Current	*
2718 #		*****************	*      PC	*
2719 #		*    Current	*	*****************
2720 #		*      PC	*	* 0x2 *  0x024	*
2721 #		*****************	*****************
2722 #		*      SR	*	*     Next	*
2723 #		*****************	*      PC	*
2724 #					*****************
2725 #					*      SR	*
2726 #					*****************
2727 iea_op_trace:
2728 	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2729 	mov.w		0x8(%sp),0x4(%sp)
2730 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2731 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2732 
2733 	bra.l		_real_trace
2734 
2735 #########################################################################
2736 iea_fmovm:
2737 	btst		&14,%d0			# ctrl or data reg
2738 	beq.w		iea_fmovm_ctrl
2739 
2740 iea_fmovm_data:
2741 
2742 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2743 	bne.b		iea_fmovm_data_s
2744 
2745 iea_fmovm_data_u:
2746 	mov.l		%usp,%a0
2747 	mov.l		%a0,EXC_A7(%a6)		# store current a7
2748 	bsr.l		fmovm_dynamic		# do dynamic fmovm
2749 	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2750 	mov.l		%a0,%usp		# update usp
2751 	bra.w		iea_fmovm_exit
2752 
2753 iea_fmovm_data_s:
2754 	clr.b		SPCOND_FLG(%a6)
2755 	lea		0x2+EXC_VOFF(%a6),%a0
2756 	mov.l		%a0,EXC_A7(%a6)
2757 	bsr.l		fmovm_dynamic		# do dynamic fmovm
2758 
2759 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2760 	beq.w		iea_fmovm_data_predec
2761 	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2762 	bne.w		iea_fmovm_exit
2763 
2764 # right now, d0 = the size.
2765 # the data has been fetched from the supervisor stack, but we have not
2766 # incremented the stack pointer by the appropriate number of bytes.
2767 # do it here.
2768 iea_fmovm_data_postinc:
2769 	btst		&0x7,EXC_SR(%a6)
2770 	bne.b		iea_fmovm_data_pi_trace
2771 
2772 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2773 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2774 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2775 
2776 	lea		(EXC_SR,%a6,%d0),%a0
2777 	mov.l		%a0,EXC_SR(%a6)
2778 
2779 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2780 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2781 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2782 
2783 	unlk		%a6
2784 	mov.l		(%sp)+,%sp
2785 	bra.l		_fpsp_done
2786 
2787 iea_fmovm_data_pi_trace:
2788 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2789 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2790 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2791 	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2792 
2793 	lea		(EXC_SR-0x4,%a6,%d0),%a0
2794 	mov.l		%a0,EXC_SR(%a6)
2795 
2796 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2797 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2798 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2799 
2800 	unlk		%a6
2801 	mov.l		(%sp)+,%sp
2802 	bra.l		_real_trace
2803 
2804 # right now, d1 = size and d0 = the strg.
2805 iea_fmovm_data_predec:
2806 	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2807 	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2808 
2809 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2810 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2811 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2812 
2813 	mov.l		(%a6),-(%sp)		# make a copy of a6
2814 	mov.l		%d0,-(%sp)		# save d0
2815 	mov.l		%d1,-(%sp)		# save d1
2816 	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2817 
2818 	clr.l		%d0
2819 	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2820 	neg.l		%d0			# get negative of size
2821 
2822 	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2823 	beq.b		iea_fmovm_data_p2
2824 
2825 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2826 	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2827 	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2828 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2829 
2830 	pea		(%a6,%d0)		# create final sp
2831 	bra.b		iea_fmovm_data_p3
2832 
2833 iea_fmovm_data_p2:
2834 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2835 	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2836 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2837 
2838 	pea		(0x4,%a6,%d0)		# create final sp
2839 
2840 iea_fmovm_data_p3:
2841 	clr.l		%d1
2842 	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2843 
2844 	tst.b		%d1
2845 	bpl.b		fm_1
2846 	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2847 	addi.l		&0xc,%d0
2848 fm_1:
2849 	lsl.b		&0x1,%d1
2850 	bpl.b		fm_2
2851 	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2852 	addi.l		&0xc,%d0
2853 fm_2:
2854 	lsl.b		&0x1,%d1
2855 	bpl.b		fm_3
2856 	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2857 	addi.l		&0xc,%d0
2858 fm_3:
2859 	lsl.b		&0x1,%d1
2860 	bpl.b		fm_4
2861 	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2862 	addi.l		&0xc,%d0
2863 fm_4:
2864 	lsl.b		&0x1,%d1
2865 	bpl.b		fm_5
2866 	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2867 	addi.l		&0xc,%d0
2868 fm_5:
2869 	lsl.b		&0x1,%d1
2870 	bpl.b		fm_6
2871 	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2872 	addi.l		&0xc,%d0
2873 fm_6:
2874 	lsl.b		&0x1,%d1
2875 	bpl.b		fm_7
2876 	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2877 	addi.l		&0xc,%d0
2878 fm_7:
2879 	lsl.b		&0x1,%d1
2880 	bpl.b		fm_end
2881 	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2882 fm_end:
2883 	mov.l		0x4(%sp),%d1
2884 	mov.l		0x8(%sp),%d0
2885 	mov.l		0xc(%sp),%a6
2886 	mov.l		(%sp)+,%sp
2887 
2888 	btst		&0x7,(%sp)		# is trace enabled?
2889 	beq.l		_fpsp_done
2890 	bra.l		_real_trace
2891 
2892 #########################################################################
2893 iea_fmovm_ctrl:
2894 
2895 	bsr.l		fmovm_ctrl		# load ctrl regs
2896 
2897 iea_fmovm_exit:
2898 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2899 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2900 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2901 
2902 	btst		&0x7,EXC_SR(%a6)	# is trace on?
2903 	bne.b		iea_fmovm_trace		# yes
2904 
2905 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2906 
2907 	unlk		%a6			# unravel the frame
2908 
2909 	bra.l		_fpsp_done		# exit to os
2910 
2911 #
2912 # The control reg instruction that took an "Unimplemented Effective Address"
2913 # exception was being traced. The "Current PC" for the trace frame is the
2914 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2915 # After fixing the stack frame, jump to _real_trace().
2916 #
2917 #		 UNIMP EA FRAME		   TRACE FRAME
2918 #		*****************	*****************
2919 #		* 0x0 *  0x0f0	*	*    Current	*
2920 #		*****************	*      PC	*
2921 #		*    Current	*	*****************
2922 #		*      PC	*	* 0x2 *  0x024	*
2923 #		*****************	*****************
2924 #		*      SR	*	*     Next	*
2925 #		*****************	*      PC	*
2926 #					*****************
2927 #					*      SR	*
2928 #					*****************
2929 # this ain't a pretty solution, but it works:
2930 # -restore a6 (not with unlk)
2931 # -shift stack frame down over where old a6 used to be
2932 # -add LOCAL_SIZE to stack pointer
2933 iea_fmovm_trace:
2934 	mov.l		(%a6),%a6		# restore frame pointer
2935 	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2936 	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2937 	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2938 	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2939 	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2940 
2941 	bra.l		_real_trace
2942 
2943 #########################################################################
2944 # The FPU is disabled and so we should really have taken the "Line
2945 # F Emulator" exception. So, here we create an 8-word stack frame
2946 # from our 4-word stack frame. This means we must calculate the length
2947 # the faulting instruction to get the "next PC". This is trivial for
2948 # immediate operands but requires some extra work for fmovm dynamic
2949 # which can use most addressing modes.
2950 iea_disabled:
2951 	mov.l		(%sp)+,%d0		# restore d0
2952 
2953 	link		%a6,&-LOCAL_SIZE	# init stack frame
2954 
2955 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2956 
2957 # PC of instruction that took the exception is the PC in the frame
2958 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2959 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2960 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2961 	bsr.l		_imem_read_long		# fetch the instruction words
2962 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2963 
2964 	tst.w		%d0			# is instr fmovm?
2965 	bmi.b		iea_dis_fmovm		# yes
2966 # instruction is using an extended precision immediate operand. Therefore,
2967 # the total instruction length is 16 bytes.
2968 iea_dis_immed:
2969 	mov.l		&0x10,%d0		# 16 bytes of instruction
2970 	bra.b		iea_dis_cont
2971 iea_dis_fmovm:
2972 	btst		&0xe,%d0		# is instr fmovm ctrl
2973 	bne.b		iea_dis_fmovm_data	# no
2974 # the instruction is a fmovm.l with 2 or 3 registers.
2975 	bfextu		%d0{&19:&3},%d1
2976 	mov.l		&0xc,%d0
2977 	cmpi.b		%d1,&0x7		# move all regs?
2978 	bne.b		iea_dis_cont
2979 	addq.l		&0x4,%d0
2980 	bra.b		iea_dis_cont
2981 # the instruction is an fmovm.x dynamic which can use many addressing
2982 # modes and thus can have several different total instruction lengths.
2983 # call fmovm_calc_ea which will go through the ea calc process and,
2984 # as a by-product, will tell us how long the instruction is.
2985 iea_dis_fmovm_data:
2986 	clr.l		%d0
2987 	bsr.l		fmovm_calc_ea
2988 	mov.l		EXC_EXTWPTR(%a6),%d0
2989 	sub.l		EXC_PC(%a6),%d0
2990 iea_dis_cont:
2991 	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2992 
2993 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2994 
2995 	unlk		%a6
2996 
2997 # here, we actually create the 8-word frame from the 4-word frame,
2998 # with the "next PC" as additional info.
2999 # the <ea> field is let as undefined.
3000 	subq.l		&0x8,%sp		# make room for new stack
3001 	mov.l		%d0,-(%sp)		# save d0
3002 	mov.w		0xc(%sp),0x4(%sp)	# move SR
3003 	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3004 	clr.l		%d0
3005 	mov.w		0x12(%sp),%d0
3006 	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3007 	add.l		%d0,0x6(%sp)		# make Next PC
3008 	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3009 	mov.l		(%sp)+,%d0		# restore d0
3010 
3011 	bra.l		_real_fpu_disabled
3012 
3013 ##########
3014 
3015 iea_iacc:
3016 	movc		%pcr,%d0
3017 	btst		&0x1,%d0
3018 	bne.b		iea_iacc_cont
3019 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3020 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3021 iea_iacc_cont:
3022 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3023 
3024 	unlk		%a6
3025 
3026 	subq.w		&0x8,%sp		# make stack frame bigger
3027 	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3028 	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3029 	mov.w		&0x4008,0x6(%sp)	# store voff
3030 	mov.l		0x2(%sp),0x8(%sp)	# store ea
3031 	mov.l		&0x09428001,0xc(%sp)	# store fslw
3032 
3033 iea_acc_done:
3034 	btst		&0x5,(%sp)		# user or supervisor mode?
3035 	beq.b		iea_acc_done2		# user
3036 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3037 
3038 iea_acc_done2:
3039 	bra.l		_real_access
3040 
3041 iea_dacc:
3042 	lea		-LOCAL_SIZE(%a6),%sp
3043 
3044 	movc		%pcr,%d1
3045 	btst		&0x1,%d1
3046 	bne.b		iea_dacc_cont
3047 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3048 	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3049 iea_dacc_cont:
3050 	mov.l		(%a6),%a6
3051 
3052 	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3053 	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3054 	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3055 	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3056 	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3057 	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3058 
3059 	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3060 	add.w		&LOCAL_SIZE-0x4,%sp
3061 
3062 	bra.b		iea_acc_done
3063 
3064 #########################################################################
3065 # XDEF ****************************************************************	#
3066 #	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3067 #									#
3068 #	This handler should be the first code executed upon taking the	#
3069 #	FP Operand Error exception in an operating system.		#
3070 #									#
3071 # XREF ****************************************************************	#
3072 #	_imem_read_long() - read instruction longword			#
3073 #	fix_skewed_ops() - adjust src operand in fsave frame		#
3074 #	_real_operr() - "callout" to operating system operr handler	#
3075 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3076 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3077 #	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3078 #									#
3079 # INPUT ***************************************************************	#
3080 #	- The system stack contains the FP Operr exception frame	#
3081 #	- The fsave frame contains the source operand			#
3082 #									#
3083 # OUTPUT **************************************************************	#
3084 #	No access error:						#
3085 #	- The system stack is unchanged					#
3086 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
3087 #									#
3088 # ALGORITHM ***********************************************************	#
3089 #	In a system where the FP Operr exception is enabled, the goal	#
3090 # is to get to the handler specified at _real_operr(). But, on the 060,	#
3091 # for opclass zero and two instruction taking this exception, the	#
3092 # input operand in the fsave frame may be incorrect for some cases	#
3093 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
3094 # do just this and then exits through _real_operr().			#
3095 #	For opclass 3 instructions, the 060 doesn't store the default	#
3096 # operr result out to memory or data register file as it should.	#
3097 # This code must emulate the move out before finally exiting through	#
3098 # _real_inex(). The move out, if to memory, is performed using		#
3099 # _mem_write() "callout" routines that may return a failing result.	#
3100 # In this special case, the handler must exit through facc_out()	#
3101 # which creates an access error stack frame from the current operr	#
3102 # stack frame.								#
3103 #									#
3104 #########################################################################
3105 
3106 	global		_fpsp_operr
3107 _fpsp_operr:
3108 
3109 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3110 
3111 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3112 
3113 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3114 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3115 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3116 
3117 # the FPIAR holds the "current PC" of the faulting instruction
3118 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3119 
3120 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3121 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3122 	bsr.l		_imem_read_long		# fetch the instruction words
3123 	mov.l		%d0,EXC_OPWORD(%a6)
3124 
3125 ##############################################################################
3126 
3127 	btst		&13,%d0			# is instr an fmove out?
3128 	bne.b		foperr_out		# fmove out
3129 
3130 
3131 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3132 # this would be the case for opclass two operations with a source infinity or
3133 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3134 # cause an operr so we don't need to check for them here.
3135 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3136 	bsr.l		fix_skewed_ops		# fix src op
3137 
3138 foperr_exit:
3139 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3140 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3141 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3142 
3143 	frestore	FP_SRC(%a6)
3144 
3145 	unlk		%a6
3146 	bra.l		_real_operr
3147 
3148 ########################################################################
3149 
3150 #
3151 # the hardware does not save the default result to memory on enabled
3152 # operand error exceptions. we do this here before passing control to
3153 # the user operand error handler.
3154 #
3155 # byte, word, and long destination format operations can pass
3156 # through here. we simply need to test the sign of the src
3157 # operand and save the appropriate minimum or maximum integer value
3158 # to the effective address as pointed to by the stacked effective address.
3159 #
3160 # although packed opclass three operations can take operand error
3161 # exceptions, they won't pass through here since they are caught
3162 # first by the unsupported data format exception handler. that handler
3163 # sends them directly to _real_operr() if necessary.
3164 #
3165 foperr_out:
3166 
3167 	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3168 	andi.w		&0x7fff,%d1
3169 	cmpi.w		%d1,&0x7fff
3170 	bne.b		foperr_out_not_qnan
3171 # the operand is either an infinity or a QNAN.
3172 	tst.l		FP_SRC_LO(%a6)
3173 	bne.b		foperr_out_qnan
3174 	mov.l		FP_SRC_HI(%a6),%d1
3175 	andi.l		&0x7fffffff,%d1
3176 	beq.b		foperr_out_not_qnan
3177 foperr_out_qnan:
3178 	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3179 	bra.b		foperr_out_jmp
3180 
3181 foperr_out_not_qnan:
3182 	mov.l		&0x7fffffff,%d1
3183 	tst.b		FP_SRC_EX(%a6)
3184 	bpl.b		foperr_out_not_qnan2
3185 	addq.l		&0x1,%d1
3186 foperr_out_not_qnan2:
3187 	mov.l		%d1,L_SCR1(%a6)
3188 
3189 foperr_out_jmp:
3190 	bfextu		%d0{&19:&3},%d0		# extract dst format field
3191 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3192 	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3193 	jmp		(tbl_operr.b,%pc,%a0)
3194 
3195 tbl_operr:
3196 	short		foperr_out_l - tbl_operr # long word integer
3197 	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3198 	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3199 	short		foperr_exit  - tbl_operr # packed won't enter here
3200 	short		foperr_out_w - tbl_operr # word integer
3201 	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3202 	short		foperr_out_b - tbl_operr # byte integer
3203 	short		tbl_operr    - tbl_operr # packed won't enter here
3204 
3205 foperr_out_b:
3206 	mov.b		L_SCR1(%a6),%d0		# load positive default result
3207 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3208 	ble.b		foperr_out_b_save_dn	# yes
3209 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3210 	bsr.l		_dmem_write_byte	# write the default result
3211 
3212 	tst.l		%d1			# did dstore fail?
3213 	bne.l		facc_out_b		# yes
3214 
3215 	bra.w		foperr_exit
3216 foperr_out_b_save_dn:
3217 	andi.w		&0x0007,%d1
3218 	bsr.l		store_dreg_b		# store result to regfile
3219 	bra.w		foperr_exit
3220 
3221 foperr_out_w:
3222 	mov.w		L_SCR1(%a6),%d0		# load positive default result
3223 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3224 	ble.b		foperr_out_w_save_dn	# yes
3225 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3226 	bsr.l		_dmem_write_word	# write the default result
3227 
3228 	tst.l		%d1			# did dstore fail?
3229 	bne.l		facc_out_w		# yes
3230 
3231 	bra.w		foperr_exit
3232 foperr_out_w_save_dn:
3233 	andi.w		&0x0007,%d1
3234 	bsr.l		store_dreg_w		# store result to regfile
3235 	bra.w		foperr_exit
3236 
3237 foperr_out_l:
3238 	mov.l		L_SCR1(%a6),%d0		# load positive default result
3239 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3240 	ble.b		foperr_out_l_save_dn	# yes
3241 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3242 	bsr.l		_dmem_write_long	# write the default result
3243 
3244 	tst.l		%d1			# did dstore fail?
3245 	bne.l		facc_out_l		# yes
3246 
3247 	bra.w		foperr_exit
3248 foperr_out_l_save_dn:
3249 	andi.w		&0x0007,%d1
3250 	bsr.l		store_dreg_l		# store result to regfile
3251 	bra.w		foperr_exit
3252 
3253 #########################################################################
3254 # XDEF ****************************************************************	#
3255 #	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3256 #									#
3257 #	This handler should be the first code executed upon taking the	#
3258 #	FP Signalling NAN exception in an operating system.		#
3259 #									#
3260 # XREF ****************************************************************	#
3261 #	_imem_read_long() - read instruction longword			#
3262 #	fix_skewed_ops() - adjust src operand in fsave frame		#
3263 #	_real_snan() - "callout" to operating system SNAN handler	#
3264 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3265 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3266 #	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3267 #	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3268 #									#
3269 # INPUT ***************************************************************	#
3270 #	- The system stack contains the FP SNAN exception frame		#
3271 #	- The fsave frame contains the source operand			#
3272 #									#
3273 # OUTPUT **************************************************************	#
3274 #	No access error:						#
3275 #	- The system stack is unchanged					#
3276 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
3277 #									#
3278 # ALGORITHM ***********************************************************	#
3279 #	In a system where the FP SNAN exception is enabled, the goal	#
3280 # is to get to the handler specified at _real_snan(). But, on the 060,	#
3281 # for opclass zero and two instructions taking this exception, the	#
3282 # input operand in the fsave frame may be incorrect for some cases	#
3283 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
3284 # do just this and then exits through _real_snan().			#
3285 #	For opclass 3 instructions, the 060 doesn't store the default	#
3286 # SNAN result out to memory or data register file as it should.		#
3287 # This code must emulate the move out before finally exiting through	#
3288 # _real_snan(). The move out, if to memory, is performed using		#
3289 # _mem_write() "callout" routines that may return a failing result.	#
3290 # In this special case, the handler must exit through facc_out()	#
3291 # which creates an access error stack frame from the current SNAN	#
3292 # stack frame.								#
3293 #	For the case of an extended precision opclass 3 instruction,	#
3294 # if the effective addressing mode was -() or ()+, then the address	#
3295 # register must get updated by calling _calc_ea_fout(). If the <ea>	#
3296 # was -(a7) from supervisor mode, then the exception frame currently	#
3297 # on the system stack must be carefully moved "down" to make room	#
3298 # for the operand being moved.						#
3299 #									#
3300 #########################################################################
3301 
3302 	global		_fpsp_snan
3303 _fpsp_snan:
3304 
3305 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3306 
3307 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3308 
3309 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3310 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3311 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3312 
3313 # the FPIAR holds the "current PC" of the faulting instruction
3314 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3315 
3316 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3317 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3318 	bsr.l		_imem_read_long		# fetch the instruction words
3319 	mov.l		%d0,EXC_OPWORD(%a6)
3320 
3321 ##############################################################################
3322 
3323 	btst		&13,%d0			# is instr an fmove out?
3324 	bne.w		fsnan_out		# fmove out
3325 
3326 
3327 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3328 # this would be the case for opclass two operations with a source infinity or
3329 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3330 # fixed here.
3331 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3332 	bsr.l		fix_skewed_ops		# fix src op
3333 
3334 fsnan_exit:
3335 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3336 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3337 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3338 
3339 	frestore	FP_SRC(%a6)
3340 
3341 	unlk		%a6
3342 	bra.l		_real_snan
3343 
3344 ########################################################################
3345 
3346 #
3347 # the hardware does not save the default result to memory on enabled
3348 # snan exceptions. we do this here before passing control to
3349 # the user snan handler.
3350 #
3351 # byte, word, long, and packed destination format operations can pass
3352 # through here. since packed format operations already were handled by
3353 # fpsp_unsupp(), then we need to do nothing else for them here.
3354 # for byte, word, and long, we simply need to test the sign of the src
3355 # operand and save the appropriate minimum or maximum integer value
3356 # to the effective address as pointed to by the stacked effective address.
3357 #
3358 fsnan_out:
3359 
3360 	bfextu		%d0{&19:&3},%d0		# extract dst format field
3361 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3362 	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3363 	jmp		(tbl_snan.b,%pc,%a0)
3364 
3365 tbl_snan:
3366 	short		fsnan_out_l - tbl_snan # long word integer
3367 	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3368 	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3369 	short		tbl_snan    - tbl_snan # packed needs no help
3370 	short		fsnan_out_w - tbl_snan # word integer
3371 	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3372 	short		fsnan_out_b - tbl_snan # byte integer
3373 	short		tbl_snan    - tbl_snan # packed needs no help
3374 
3375 fsnan_out_b:
3376 	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3377 	bset		&6,%d0			# set SNAN bit
3378 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3379 	ble.b		fsnan_out_b_dn		# yes
3380 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3381 	bsr.l		_dmem_write_byte	# write the default result
3382 
3383 	tst.l		%d1			# did dstore fail?
3384 	bne.l		facc_out_b		# yes
3385 
3386 	bra.w		fsnan_exit
3387 fsnan_out_b_dn:
3388 	andi.w		&0x0007,%d1
3389 	bsr.l		store_dreg_b		# store result to regfile
3390 	bra.w		fsnan_exit
3391 
3392 fsnan_out_w:
3393 	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3394 	bset		&14,%d0			# set SNAN bit
3395 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3396 	ble.b		fsnan_out_w_dn		# yes
3397 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3398 	bsr.l		_dmem_write_word	# write the default result
3399 
3400 	tst.l		%d1			# did dstore fail?
3401 	bne.l		facc_out_w		# yes
3402 
3403 	bra.w		fsnan_exit
3404 fsnan_out_w_dn:
3405 	andi.w		&0x0007,%d1
3406 	bsr.l		store_dreg_w		# store result to regfile
3407 	bra.w		fsnan_exit
3408 
3409 fsnan_out_l:
3410 	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3411 	bset		&30,%d0			# set SNAN bit
3412 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3413 	ble.b		fsnan_out_l_dn		# yes
3414 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3415 	bsr.l		_dmem_write_long	# write the default result
3416 
3417 	tst.l		%d1			# did dstore fail?
3418 	bne.l		facc_out_l		# yes
3419 
3420 	bra.w		fsnan_exit
3421 fsnan_out_l_dn:
3422 	andi.w		&0x0007,%d1
3423 	bsr.l		store_dreg_l		# store result to regfile
3424 	bra.w		fsnan_exit
3425 
3426 fsnan_out_s:
3427 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3428 	ble.b		fsnan_out_d_dn		# yes
3429 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3430 	andi.l		&0x80000000,%d0		# keep sign
3431 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3432 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3433 	lsr.l		&0x8,%d1		# shift mantissa for sgl
3434 	or.l		%d1,%d0			# create sgl SNAN
3435 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3436 	bsr.l		_dmem_write_long	# write the default result
3437 
3438 	tst.l		%d1			# did dstore fail?
3439 	bne.l		facc_out_l		# yes
3440 
3441 	bra.w		fsnan_exit
3442 fsnan_out_d_dn:
3443 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3444 	andi.l		&0x80000000,%d0		# keep sign
3445 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3446 	mov.l		%d1,-(%sp)
3447 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3448 	lsr.l		&0x8,%d1		# shift mantissa for sgl
3449 	or.l		%d1,%d0			# create sgl SNAN
3450 	mov.l		(%sp)+,%d1
3451 	andi.w		&0x0007,%d1
3452 	bsr.l		store_dreg_l		# store result to regfile
3453 	bra.w		fsnan_exit
3454 
3455 fsnan_out_d:
3456 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3457 	andi.l		&0x80000000,%d0		# keep sign
3458 	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3459 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3460 	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3461 	mov.l		&11,%d0			# load shift amt
3462 	lsr.l		%d0,%d1
3463 	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3464 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3465 	andi.l		&0x000007ff,%d1
3466 	ror.l		%d0,%d1
3467 	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3468 	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3469 	lsr.l		%d0,%d1
3470 	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3471 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3472 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3473 	movq.l		&0x8,%d0		# pass: size of 8 bytes
3474 	bsr.l		_dmem_write		# write the default result
3475 
3476 	tst.l		%d1			# did dstore fail?
3477 	bne.l		facc_out_d		# yes
3478 
3479 	bra.w		fsnan_exit
3480 
3481 # for extended precision, if the addressing mode is pre-decrement or
3482 # post-increment, then the address register did not get updated.
3483 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3484 fsnan_out_x:
3485 	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3486 
3487 	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3488 	clr.w		2+FP_SCR0(%a6)
3489 	mov.l		FP_SRC_HI(%a6),%d0
3490 	bset		&30,%d0
3491 	mov.l		%d0,FP_SCR0_HI(%a6)
3492 	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3493 
3494 	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3495 	bne.b		fsnan_out_x_s		# yes
3496 
3497 	mov.l		%usp,%a0		# fetch user stack pointer
3498 	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3499 	mov.l		(%a6),EXC_A6(%a6)
3500 
3501 	bsr.l		_calc_ea_fout		# find the correct ea,update An
3502 	mov.l		%a0,%a1
3503 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3504 
3505 	mov.l		EXC_A7(%a6),%a0
3506 	mov.l		%a0,%usp		# restore user stack pointer
3507 	mov.l		EXC_A6(%a6),(%a6)
3508 
3509 fsnan_out_x_save:
3510 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3511 	movq.l		&0xc,%d0		# pass: size of extended
3512 	bsr.l		_dmem_write		# write the default result
3513 
3514 	tst.l		%d1			# did dstore fail?
3515 	bne.l		facc_out_x		# yes
3516 
3517 	bra.w		fsnan_exit
3518 
3519 fsnan_out_x_s:
3520 	mov.l		(%a6),EXC_A6(%a6)
3521 
3522 	bsr.l		_calc_ea_fout		# find the correct ea,update An
3523 	mov.l		%a0,%a1
3524 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3525 
3526 	mov.l		EXC_A6(%a6),(%a6)
3527 
3528 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3529 	bne.b		fsnan_out_x_save	# no
3530 
3531 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3532 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3533 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3534 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3535 
3536 	frestore	FP_SRC(%a6)
3537 
3538 	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3539 
3540 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3541 	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3542 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3543 
3544 	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3545 	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3546 	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3547 
3548 	add.l		&LOCAL_SIZE-0x8,%sp
3549 
3550 	bra.l		_real_snan
3551 
3552 #########################################################################
3553 # XDEF ****************************************************************	#
3554 #	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3555 #									#
3556 #	This handler should be the first code executed upon taking the	#
3557 #	FP Inexact exception in an operating system.			#
3558 #									#
3559 # XREF ****************************************************************	#
3560 #	_imem_read_long() - read instruction longword			#
3561 #	fix_skewed_ops() - adjust src operand in fsave frame		#
3562 #	set_tag_x() - determine optype of src/dst operands		#
3563 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3564 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3565 #	load_fpn2() - load dst operand from FP regfile			#
3566 #	smovcr() - emulate an "fmovcr" instruction			#
3567 #	fout() - emulate an opclass 3 instruction			#
3568 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3569 #	_real_inex() - "callout" to operating system inexact handler	#
3570 #									#
3571 # INPUT ***************************************************************	#
3572 #	- The system stack contains the FP Inexact exception frame	#
3573 #	- The fsave frame contains the source operand			#
3574 #									#
3575 # OUTPUT **************************************************************	#
3576 #	- The system stack is unchanged					#
3577 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
3578 #									#
3579 # ALGORITHM ***********************************************************	#
3580 #	In a system where the FP Inexact exception is enabled, the goal	#
3581 # is to get to the handler specified at _real_inex(). But, on the 060,	#
3582 # for opclass zero and two instruction taking this exception, the	#
3583 # hardware doesn't store the correct result to the destination FP	#
3584 # register as did the '040 and '881/2. This handler must emulate the	#
3585 # instruction in order to get this value and then store it to the	#
3586 # correct register before calling _real_inex().				#
3587 #	For opclass 3 instructions, the 060 doesn't store the default	#
3588 # inexact result out to memory or data register file as it should.	#
3589 # This code must emulate the move out by calling fout() before finally	#
3590 # exiting through _real_inex().						#
3591 #									#
3592 #########################################################################
3593 
3594 	global		_fpsp_inex
3595 _fpsp_inex:
3596 
3597 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3598 
3599 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3600 
3601 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3602 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3603 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3604 
3605 # the FPIAR holds the "current PC" of the faulting instruction
3606 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3607 
3608 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3609 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3610 	bsr.l		_imem_read_long		# fetch the instruction words
3611 	mov.l		%d0,EXC_OPWORD(%a6)
3612 
3613 ##############################################################################
3614 
3615 	btst		&13,%d0			# is instr an fmove out?
3616 	bne.w		finex_out		# fmove out
3617 
3618 
3619 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3620 # longword integer directly into the upper longword of the mantissa along
3621 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3622 	bfextu		%d0{&19:&3},%d0		# fetch instr size
3623 	bne.b		finex_cont		# instr size is not long
3624 	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3625 	bne.b		finex_cont		# no
3626 	fmov.l		&0x0,%fpcr
3627 	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3628 	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3629 	mov.w		&0xe001,0x2+FP_SRC(%a6)
3630 
3631 finex_cont:
3632 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3633 	bsr.l		fix_skewed_ops		# fix src op
3634 
3635 # Here, we zero the ccode and exception byte field since we're going to
3636 # emulate the whole instruction. Notice, though, that we don't kill the
3637 # INEX1 bit. This is because a packed op has long since been converted
3638 # to extended before arriving here. Therefore, we need to retain the
3639 # INEX1 bit from when the operand was first converted.
3640 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3641 
3642 	fmov.l		&0x0,%fpcr		# zero current control regs
3643 	fmov.l		&0x0,%fpsr
3644 
3645 	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3646 	cmpi.b		%d1,&0x17		# is op an fmovecr?
3647 	beq.w		finex_fmovcr		# yes
3648 
3649 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3650 	bsr.l		set_tag_x		# tag the operand type
3651 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3652 
3653 # bits four and five of the fp extension word separate the monadic and dyadic
3654 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3655 # will never take this exception, but fsincos will.
3656 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3657 	beq.b		finex_extract		# monadic
3658 
3659 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3660 	bne.b		finex_extract		# yes
3661 
3662 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3663 	bsr.l		load_fpn2		# load dst into FP_DST
3664 
3665 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3666 	bsr.l		set_tag_x		# tag the operand type
3667 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3668 	bne.b		finex_op2_done		# no
3669 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3670 finex_op2_done:
3671 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3672 
3673 finex_extract:
3674 	clr.l		%d0
3675 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3676 
3677 	mov.b		1+EXC_CMDREG(%a6),%d1
3678 	andi.w		&0x007f,%d1		# extract extension
3679 
3680 	lea		FP_SRC(%a6),%a0
3681 	lea		FP_DST(%a6),%a1
3682 
3683 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3684 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3685 
3686 # the operation has been emulated. the result is in fp0.
3687 finex_save:
3688 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3689 	bsr.l		store_fpreg
3690 
3691 finex_exit:
3692 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3693 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3694 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3695 
3696 	frestore	FP_SRC(%a6)
3697 
3698 	unlk		%a6
3699 	bra.l		_real_inex
3700 
3701 finex_fmovcr:
3702 	clr.l		%d0
3703 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3704 	mov.b		1+EXC_CMDREG(%a6),%d1
3705 	andi.l		&0x0000007f,%d1		# pass rom offset
3706 	bsr.l		smovcr
3707 	bra.b		finex_save
3708 
3709 ########################################################################
3710 
3711 #
3712 # the hardware does not save the default result to memory on enabled
3713 # inexact exceptions. we do this here before passing control to
3714 # the user inexact handler.
3715 #
3716 # byte, word, and long destination format operations can pass
3717 # through here. so can double and single precision.
3718 # although packed opclass three operations can take inexact
3719 # exceptions, they won't pass through here since they are caught
3720 # first by the unsupported data format exception handler. that handler
3721 # sends them directly to _real_inex() if necessary.
3722 #
3723 finex_out:
3724 
3725 	mov.b		&NORM,STAG(%a6)		# src is a NORM
3726 
3727 	clr.l		%d0
3728 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3729 
3730 	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3731 
3732 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3733 
3734 	bsr.l		fout			# store the default result
3735 
3736 	bra.b		finex_exit
3737 
3738 #########################################################################
3739 # XDEF ****************************************************************	#
3740 #	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3741 #									#
3742 #	This handler should be the first code executed upon taking	#
3743 #	the FP DZ exception in an operating system.			#
3744 #									#
3745 # XREF ****************************************************************	#
3746 #	_imem_read_long() - read instruction longword from memory	#
3747 #	fix_skewed_ops() - adjust fsave operand				#
3748 #	_real_dz() - "callout" exit point from FP DZ handler		#
3749 #									#
3750 # INPUT ***************************************************************	#
3751 #	- The system stack contains the FP DZ exception stack.		#
3752 #	- The fsave frame contains the source operand.			#
3753 #									#
3754 # OUTPUT **************************************************************	#
3755 #	- The system stack contains the FP DZ exception stack.		#
3756 #	- The fsave frame contains the adjusted source operand.		#
3757 #									#
3758 # ALGORITHM ***********************************************************	#
3759 #	In a system where the DZ exception is enabled, the goal is to	#
3760 # get to the handler specified at _real_dz(). But, on the 060, when the	#
3761 # exception is taken, the input operand in the fsave state frame may	#
3762 # be incorrect for some cases and need to be adjusted. So, this package	#
3763 # adjusts the operand using fix_skewed_ops() and then branches to	#
3764 # _real_dz().								#
3765 #									#
3766 #########################################################################
3767 
3768 	global		_fpsp_dz
3769 _fpsp_dz:
3770 
3771 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3772 
3773 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3774 
3775 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3776 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3777 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3778 
3779 # the FPIAR holds the "current PC" of the faulting instruction
3780 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3781 
3782 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3783 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3784 	bsr.l		_imem_read_long		# fetch the instruction words
3785 	mov.l		%d0,EXC_OPWORD(%a6)
3786 
3787 ##############################################################################
3788 
3789 
3790 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3791 # this would be the case for opclass two operations with a source zero
3792 # in the sgl or dbl format.
3793 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3794 	bsr.l		fix_skewed_ops		# fix src op
3795 
3796 fdz_exit:
3797 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3798 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3799 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3800 
3801 	frestore	FP_SRC(%a6)
3802 
3803 	unlk		%a6
3804 	bra.l		_real_dz
3805 
3806 #########################################################################
3807 # XDEF ****************************************************************	#
3808 #	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
3809 #									#
3810 #	This handler should be the first code executed upon taking the	#
3811 #	"Line F Emulator" exception in an operating system.		#
3812 #									#
3813 # XREF ****************************************************************	#
3814 #	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
3815 #	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
3816 #	_real_fline() - handle "FLINE" exceptions			#
3817 #	_imem_read_long() - read instruction longword			#
3818 #									#
3819 # INPUT ***************************************************************	#
3820 #	- The system stack contains a "Line F Emulator" exception	#
3821 #	  stack frame.							#
3822 #									#
3823 # OUTPUT **************************************************************	#
3824 #	- The system stack is unchanged					#
3825 #									#
3826 # ALGORITHM ***********************************************************	#
3827 #	When a "Line F Emulator" exception occurs, there are 3 possible	#
3828 # exception types, denoted by the exception stack frame format number:	#
3829 #	(1) FPU unimplemented instruction (6 word stack frame)		#
3830 #	(2) FPU disabled (8 word stack frame)				#
3831 #	(3) Line F (4 word stack frame)					#
3832 #									#
3833 #	This module determines which and forks the flow off to the	#
3834 # appropriate "callout" (for "disabled" and "Line F") or to the		#
3835 # correct emulation code (for "FPU unimplemented").			#
3836 #	This code also must check for "fmovecr" instructions w/ a	#
3837 # non-zero <ea> field. These may get flagged as "Line F" but should	#
3838 # really be flagged as "FPU Unimplemented". (This is a "feature" on	#
3839 # the '060.								#
3840 #									#
3841 #########################################################################
3842 
3843 	global		_fpsp_fline
3844 _fpsp_fline:
3845 
3846 # check to see if this exception is a "FP Unimplemented Instruction"
3847 # exception. if so, branch directly to that handler's entry point.
3848 	cmpi.w		0x6(%sp),&0x202c
3849 	beq.l		_fpsp_unimp
3850 
3851 # check to see if the FPU is disabled. if so, jump to the OS entry
3852 # point for that condition.
3853 	cmpi.w		0x6(%sp),&0x402c
3854 	beq.l		_real_fpu_disabled
3855 
3856 # the exception was an "F-Line Illegal" exception. we check to see
3857 # if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3858 # so, convert the F-Line exception stack frame to an FP Unimplemented
3859 # Instruction exception stack frame else branch to the OS entry
3860 # point for the F-Line exception handler.
3861 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3862 
3863 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3864 
3865 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
3866 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3867 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3868 	bsr.l		_imem_read_long		# fetch instruction words
3869 
3870 	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
3871 	cmpi.w		%d1,&0x03c8
3872 	bne.b		fline_fline		# no
3873 
3874 	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
3875 	cmpi.b		%d1,&0x17
3876 	bne.b		fline_fline		# no
3877 
3878 # it's an fmovecr w/ a non-zero <ea> that has entered through
3879 # the F-Line Illegal exception.
3880 # so, we need to convert the F-Line exception stack frame into an
3881 # FP Unimplemented Instruction stack frame and jump to that entry
3882 # point.
3883 #
3884 # but, if the FPU is disabled, then we need to jump to the FPU disabled
3885 # entry point.
3886 	movc		%pcr,%d0
3887 	btst		&0x1,%d0
3888 	beq.b		fline_fmovcr
3889 
3890 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3891 
3892 	unlk		%a6
3893 
3894 	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
3895 	mov.w		0x8(%sp),(%sp)
3896 	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
3897 	mov.w		&0x402c,0x6(%sp)
3898 	mov.l		0x2(%sp),0xc(%sp)
3899 	addq.l		&0x4,0x2(%sp)		# set "Next PC"
3900 
3901 	bra.l		_real_fpu_disabled
3902 
3903 fline_fmovcr:
3904 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3905 
3906 	unlk		%a6
3907 
3908 	fmov.l		0x2(%sp),%fpiar		# set current PC
3909 	addq.l		&0x4,0x2(%sp)		# set Next PC
3910 
3911 	mov.l		(%sp),-(%sp)
3912 	mov.l		0x8(%sp),0x4(%sp)
3913 	mov.b		&0x20,0x6(%sp)
3914 
3915 	bra.l		_fpsp_unimp
3916 
3917 fline_fline:
3918 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3919 
3920 	unlk		%a6
3921 
3922 	bra.l		_real_fline
3923 
3924 #########################################################################
3925 # XDEF ****************************************************************	#
3926 #	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
3927 #		       Instruction" exception.				#
3928 #									#
3929 #	This handler should be the first code executed upon taking the	#
3930 #	FP Unimplemented Instruction exception in an operating system.	#
3931 #									#
3932 # XREF ****************************************************************	#
3933 #	_imem_read_{word,long}() - read instruction word/longword	#
3934 #	load_fop() - load src/dst ops from memory and/or FP regfile	#
3935 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3936 #	tbl_trans - addr of table of emulation routines for trnscndls	#
3937 #	_real_access() - "callout" for access error exception		#
3938 #	_fpsp_done() - "callout" for exit; work all done		#
3939 #	_real_trace() - "callout" for Trace enabled exception		#
3940 #	smovcr() - emulate "fmovecr" instruction			#
3941 #	funimp_skew() - adjust fsave src ops to "incorrect" value	#
3942 #	_ftrapcc() - emulate an "ftrapcc" instruction			#
3943 #	_fdbcc() - emulate an "fdbcc" instruction			#
3944 #	_fscc() - emulate an "fscc" instruction				#
3945 #	_real_trap() - "callout" for Trap exception			#
3946 #	_real_bsun() - "callout" for enabled Bsun exception		#
3947 #									#
3948 # INPUT ***************************************************************	#
3949 #	- The system stack contains the "Unimplemented Instr" stk frame	#
3950 #									#
3951 # OUTPUT **************************************************************	#
3952 #	If access error:						#
3953 #	- The system stack is changed to an access error stack frame	#
3954 #	If Trace exception enabled:					#
3955 #	- The system stack is changed to a Trace exception stack frame	#
3956 #	Else: (normal case)						#
3957 #	- Correct result has been stored as appropriate			#
3958 #									#
3959 # ALGORITHM ***********************************************************	#
3960 #	There are two main cases of instructions that may enter here to	#
3961 # be emulated: (1) the FPgen instructions, most of which were also	#
3962 # unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
3963 #	For the first set, this handler calls the routine load_fop()	#
3964 # to load the source and destination (for dyadic) operands to be used	#
3965 # for instruction emulation. The correct emulation routine is then	#
3966 # chosen by decoding the instruction type and indexing into an		#
3967 # emulation subroutine index table. After emulation returns, this	#
3968 # handler checks to see if an exception should occur as a result of the #
3969 # FP instruction emulation. If so, then an FP exception of the correct	#
3970 # type is inserted into the FPU state frame using the "frestore"	#
3971 # instruction before exiting through _fpsp_done(). In either the	#
3972 # exceptional or non-exceptional cases, we must check to see if the	#
3973 # Trace exception is enabled. If so, then we must create a Trace	#
3974 # exception frame from the current exception frame and exit through	#
3975 # _real_trace().							#
3976 #	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
3977 # _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
3978 # may flag that a BSUN exception should be taken. If so, then the	#
3979 # current exception stack frame is converted into a BSUN exception	#
3980 # stack frame and an exit is made through _real_bsun(). If the		#
3981 # instruction was "ftrapcc" and a Trap exception should result, a Trap	#
3982 # exception stack frame is created from the current frame and an exit	#
3983 # is made through _real_trap(). If a Trace exception is pending, then	#
3984 # a Trace exception frame is created from the current frame and a jump	#
3985 # is made to _real_trace(). Finally, if none of these conditions exist,	#
3986 # then the handler exits though the callout _fpsp_done().		#
3987 #									#
3988 #	In any of the above scenarios, if a _mem_read() or _mem_write()	#
3989 # "callout" returns a failing value, then an access error stack frame	#
3990 # is created from the current stack frame and an exit is made through	#
3991 # _real_access().							#
3992 #									#
3993 #########################################################################
3994 
3995 #
3996 # FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
3997 #
3998 #	*****************
3999 #	*		* => <ea> of fp unimp instr.
4000 #	-      EA	-
4001 #	*		*
4002 #	*****************
4003 #	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
4004 #	*****************
4005 #	*		*
4006 #	-    Next PC	- => PC of instr to execute after exc handling
4007 #	*		*
4008 #	*****************
4009 #	*      SR	* => SR at the time the exception was taken
4010 #	*****************
4011 #
4012 # Note: the !NULL bit does not get set in the fsave frame when the
4013 # machine encounters an fp unimp exception. Therefore, it must be set
4014 # before leaving this handler.
4015 #
4016 	global		_fpsp_unimp
4017 _fpsp_unimp:
4018 
4019 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
4020 
4021 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
4022 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4023 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
4024 
4025 	btst		&0x5,EXC_SR(%a6)	# user mode exception?
4026 	bne.b		funimp_s		# no; supervisor mode
4027 
4028 # save the value of the user stack pointer onto the stack frame
4029 funimp_u:
4030 	mov.l		%usp,%a0		# fetch user stack pointer
4031 	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
4032 	bra.b		funimp_cont
4033 
4034 # store the value of the supervisor stack pointer BEFORE the exc occurred.
4035 # old_sp is address just above stacked effective address.
4036 funimp_s:
4037 	lea		4+EXC_EA(%a6),%a0	# load old a7'
4038 	mov.l		%a0,EXC_A7(%a6)		# store a7'
4039 	mov.l		%a0,OLD_A7(%a6)		# make a copy
4040 
4041 funimp_cont:
4042 
4043 # the FPIAR holds the "current PC" of the faulting instruction.
4044 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4045 
4046 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4047 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
4048 	bsr.l		_imem_read_long		# fetch the instruction words
4049 	mov.l		%d0,EXC_OPWORD(%a6)
4050 
4051 ############################################################################
4052 
4053 	fmov.l		&0x0,%fpcr		# clear FPCR
4054 	fmov.l		&0x0,%fpsr		# clear FPSR
4055 
4056 	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
4057 
4058 # Divide the fp instructions into 8 types based on the TYPE field in
4059 # bits 6-8 of the opword(classes 6,7 are undefined).
4060 # (for the '060, only two types  can take this exception)
4061 #	bftst		%d0{&7:&3}		# test TYPE
4062 	btst		&22,%d0			# type 0 or 1 ?
4063 	bne.w		funimp_misc		# type 1
4064 
4065 #########################################
4066 # TYPE == 0: General instructions	#
4067 #########################################
4068 funimp_gen:
4069 
4070 	clr.b		STORE_FLG(%a6)		# clear "store result" flag
4071 
4072 # clear the ccode byte and exception status byte
4073 	andi.l		&0x00ff00ff,USER_FPSR(%a6)
4074 
4075 	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
4076 	cmpi.b		%d1,&0x17		# is op an fmovecr?
4077 	beq.w		funimp_fmovcr		# yes
4078 
4079 funimp_gen_op:
4080 	bsr.l		_load_fop		# load
4081 
4082 	clr.l		%d0
4083 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
4084 
4085 	mov.b		1+EXC_CMDREG(%a6),%d1
4086 	andi.w		&0x003f,%d1		# extract extension bits
4087 	lsl.w		&0x3,%d1		# shift right 3 bits
4088 	or.b		STAG(%a6),%d1		# insert src optag bits
4089 
4090 	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
4091 	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
4092 
4093 	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
4094 	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
4095 
4096 funimp_fsave:
4097 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
4098 	bne.w		funimp_ena		# some are enabled
4099 
4100 funimp_store:
4101 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4102 	bsr.l		store_fpreg		# store result to fp regfile
4103 
4104 funimp_gen_exit:
4105 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4106 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4107 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4108 
4109 funimp_gen_exit_cmp:
4110 	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4111 	beq.b		funimp_gen_exit_a7	# yes
4112 
4113 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4114 	beq.b		funimp_gen_exit_a7	# yes
4115 
4116 funimp_gen_exit_cont:
4117 	unlk		%a6
4118 
4119 funimp_gen_exit_cont2:
4120 	btst		&0x7,(%sp)		# is trace on?
4121 	beq.l		_fpsp_done		# no
4122 
4123 # this catches a problem with the case where an exception will be re-inserted
4124 # into the machine. the frestore has already been executed...so, the fmov.l
4125 # alone of the control register would trigger an unwanted exception.
4126 # until I feel like fixing this, we'll sidestep the exception.
4127 	fsave		-(%sp)
4128 	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
4129 	frestore	(%sp)+
4130 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
4131 	bra.l		_real_trace
4132 
4133 funimp_gen_exit_a7:
4134 	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
4135 	bne.b		funimp_gen_exit_a7_s	# supervisor
4136 
4137 	mov.l		%a0,-(%sp)
4138 	mov.l		EXC_A7(%a6),%a0
4139 	mov.l		%a0,%usp
4140 	mov.l		(%sp)+,%a0
4141 	bra.b		funimp_gen_exit_cont
4142 
4143 # if the instruction was executed from supervisor mode and the addressing
4144 # mode was (a7)+, then the stack frame for the rte must be shifted "up"
4145 # "n" bytes where "n" is the size of the src operand type.
4146 # f<op>.{b,w,l,s,d,x,p}
4147 funimp_gen_exit_a7_s:
4148 	mov.l		%d0,-(%sp)		# save d0
4149 	mov.l		EXC_A7(%a6),%d0		# load new a7'
4150 	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
4151 	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4152 	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4153 	mov.w		%d0,EXC_SR(%a6)		# store incr number
4154 	mov.l		(%sp)+,%d0		# restore d0
4155 
4156 	unlk		%a6
4157 
4158 	add.w		(%sp),%sp		# stack frame shifted
4159 	bra.b		funimp_gen_exit_cont2
4160 
4161 ######################
4162 # fmovecr.x #ccc,fpn #
4163 ######################
4164 funimp_fmovcr:
4165 	clr.l		%d0
4166 	mov.b		FPCR_MODE(%a6),%d0
4167 	mov.b		1+EXC_CMDREG(%a6),%d1
4168 	andi.l		&0x0000007f,%d1		# pass rom offset in d1
4169 	bsr.l		smovcr
4170 	bra.w		funimp_fsave
4171 
4172 #########################################################################
4173 
4174 #
4175 # the user has enabled some exceptions. we figure not to see this too
4176 # often so that's why it gets lower priority.
4177 #
4178 funimp_ena:
4179 
4180 # was an exception set that was also enabled?
4181 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
4182 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
4183 	bne.b		funimp_exc		# at least one was set
4184 
4185 # no exception that was enabled was set BUT if we got an exact overflow
4186 # and overflow wasn't enabled but inexact was (yech!) then this is
4187 # an inexact exception; otherwise, return to normal non-exception flow.
4188 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4189 	beq.w		funimp_store		# no; return to normal flow
4190 
4191 # the overflow w/ exact result happened but was inexact set in the FPCR?
4192 funimp_ovfl:
4193 	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4194 	beq.w		funimp_store		# no; return to normal flow
4195 	bra.b		funimp_exc_ovfl		# yes
4196 
4197 # some exception happened that was actually enabled.
4198 # we'll insert this new exception into the FPU and then return.
4199 funimp_exc:
4200 	subi.l		&24,%d0			# fix offset to be 0-8
4201 	cmpi.b		%d0,&0x6		# is exception INEX?
4202 	bne.b		funimp_exc_force	# no
4203 
4204 # the enabled exception was inexact. so, if it occurs with an overflow
4205 # or underflow that was disabled, then we have to force an overflow or
4206 # underflow frame. the eventual overflow or underflow handler will see that
4207 # it's actually an inexact and act appropriately. this is the only easy
4208 # way to have the EXOP available for the enabled inexact handler when
4209 # a disabled overflow or underflow has also happened.
4210 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4211 	bne.b		funimp_exc_ovfl		# yes
4212 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4213 	bne.b		funimp_exc_unfl		# yes
4214 
4215 # force the fsave exception status bits to signal an exception of the
4216 # appropriate type. don't forget to "skew" the source operand in case we
4217 # "unskewed" the one the hardware initially gave us.
4218 funimp_exc_force:
4219 	mov.l		%d0,-(%sp)		# save d0
4220 	bsr.l		funimp_skew		# check for special case
4221 	mov.l		(%sp)+,%d0		# restore d0
4222 	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4223 	bra.b		funimp_gen_exit2	# exit with frestore
4224 
4225 tbl_funimp_except:
4226 	short		0xe002, 0xe006, 0xe004, 0xe005
4227 	short		0xe003, 0xe002, 0xe001, 0xe001
4228 
4229 # insert an overflow frame
4230 funimp_exc_ovfl:
4231 	bsr.l		funimp_skew		# check for special case
4232 	mov.w		&0xe005,2+FP_SRC(%a6)
4233 	bra.b		funimp_gen_exit2
4234 
4235 # insert an underflow frame
4236 funimp_exc_unfl:
4237 	bsr.l		funimp_skew		# check for special case
4238 	mov.w		&0xe003,2+FP_SRC(%a6)
4239 
4240 # this is the general exit point for an enabled exception that will be
4241 # restored into the machine for the instruction just emulated.
4242 funimp_gen_exit2:
4243 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4244 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4245 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4246 
4247 	frestore	FP_SRC(%a6)		# insert exceptional status
4248 
4249 	bra.w		funimp_gen_exit_cmp
4250 
4251 ############################################################################
4252 
4253 #
4254 # TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4255 #
4256 # These instructions were implemented on the '881/2 and '040 in hardware but
4257 # are emulated in software on the '060.
4258 #
4259 funimp_misc:
4260 	bfextu		%d0{&10:&3},%d1		# extract mode field
4261 	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
4262 	beq.w		funimp_fdbcc		# yes
4263 	cmpi.b		%d1,&0x7		# is it an fs<cc>?
4264 	bne.w		funimp_fscc		# yes
4265 	bfextu		%d0{&13:&3},%d1
4266 	cmpi.b		%d1,&0x2		# is it an fs<cc>?
4267 	blt.w		funimp_fscc		# yes
4268 
4269 #########################
4270 # ftrap<cc>		#
4271 # ftrap<cc>.w #<data>	#
4272 # ftrap<cc>.l #<data>	#
4273 #########################
4274 funimp_ftrapcc:
4275 
4276 	bsr.l		_ftrapcc		# FTRAP<cc>()
4277 
4278 	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4279 	beq.w		funimp_bsun		# yes
4280 
4281 	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4282 	bne.w		funimp_done		# no
4283 
4284 #	 FP UNIMP FRAME		   TRAP  FRAME
4285 #	*****************	*****************
4286 #	**    <EA>     **	**  Current PC **
4287 #	*****************	*****************
4288 #	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
4289 #	*****************	*****************
4290 #	**   Next PC   **	**   Next PC   **
4291 #	*****************	*****************
4292 #	*      SR	*	*      SR	*
4293 #	*****************	*****************
4294 #	    (6 words)		    (6 words)
4295 #
4296 # the ftrapcc instruction should take a trap. so, here we must create a
4297 # trap stack frame from an unimplemented fp instruction stack frame and
4298 # jump to the user supplied entry point for the trap exception
4299 funimp_ftrapcc_tp:
4300 	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4301 	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
4302 
4303 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4304 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4305 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4306 
4307 	unlk		%a6
4308 	bra.l		_real_trap
4309 
4310 #########################
4311 # fdb<cc> Dn,<label>	#
4312 #########################
4313 funimp_fdbcc:
4314 
4315 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4316 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4317 	bsr.l		_imem_read_word		# read displacement
4318 
4319 	tst.l		%d1			# did ifetch fail?
4320 	bne.w		funimp_iacc		# yes
4321 
4322 	ext.l		%d0			# sign extend displacement
4323 
4324 	bsr.l		_fdbcc			# FDB<cc>()
4325 
4326 	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4327 	beq.w		funimp_bsun
4328 
4329 	bra.w		funimp_done		# branch to finish
4330 
4331 #################
4332 # fs<cc>.b <ea>	#
4333 #################
4334 funimp_fscc:
4335 
4336 	bsr.l		_fscc			# FS<cc>()
4337 
4338 # I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4339 # does not need to update "An" before taking a bsun exception.
4340 	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4341 	beq.w		funimp_bsun
4342 
4343 	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
4344 	bne.b		funimp_fscc_s		# no
4345 
4346 funimp_fscc_u:
4347 	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
4348 	mov.l		%a0,%usp
4349 	bra.w		funimp_done		# branch to finish
4350 
4351 # remember, I'm assuming that post-increment is bogus...(it IS!!!)
4352 # so, the least significant WORD of the stacked effective address got
4353 # overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4354 # so that the rte will work correctly without destroying the result.
4355 # even though the operation size is byte, the stack ptr is decr by 2.
4356 #
4357 # remember, also, this instruction may be traced.
4358 funimp_fscc_s:
4359 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4360 	bne.w		funimp_done		# no
4361 
4362 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4363 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4364 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4365 
4366 	unlk		%a6
4367 
4368 	btst		&0x7,(%sp)		# is trace enabled?
4369 	bne.b		funimp_fscc_s_trace	# yes
4370 
4371 	subq.l		&0x2,%sp
4372 	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4373 	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
4374 	bra.l		_fpsp_done
4375 
4376 funimp_fscc_s_trace:
4377 	subq.l		&0x2,%sp
4378 	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4379 	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
4380 	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
4381 	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
4382 
4383 	bra.l		_real_trace
4384 
4385 #
4386 # The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4387 # the fp unimplemented instruction exception stack frame into a bsun stack frame,
4388 # restore a bsun exception into the machine, and branch to the user
4389 # supplied bsun hook.
4390 #
4391 #	 FP UNIMP FRAME		   BSUN FRAME
4392 #	*****************	*****************
4393 #	**    <EA>     **	* 0x0 * 0x0c0	*
4394 #	*****************	*****************
4395 #	* 0x2 *  0x02c  *	** Current PC  **
4396 #	*****************	*****************
4397 #	**   Next PC   **	*      SR	*
4398 #	*****************	*****************
4399 #	*      SR	*	    (4 words)
4400 #	*****************
4401 #	    (6 words)
4402 #
4403 funimp_bsun:
4404 	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
4405 	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4406 	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4407 
4408 	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
4409 
4410 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4411 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4412 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4413 
4414 	frestore	FP_SRC(%a6)		# restore bsun exception
4415 
4416 	unlk		%a6
4417 
4418 	addq.l		&0x4,%sp		# erase sludge
4419 
4420 	bra.l		_real_bsun		# branch to user bsun hook
4421 
4422 #
4423 # all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4424 # and return.
4425 #
4426 # as usual, we have to check for trace mode being on here. since instructions
4427 # modifying the supervisor stack frame don't pass through here, this is a
4428 # relatively easy task.
4429 #
4430 funimp_done:
4431 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4432 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4433 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4434 
4435 	unlk		%a6
4436 
4437 	btst		&0x7,(%sp)		# is trace enabled?
4438 	bne.b		funimp_trace		# yes
4439 
4440 	bra.l		_fpsp_done
4441 
4442 #	 FP UNIMP FRAME		  TRACE  FRAME
4443 #	*****************	*****************
4444 #	**    <EA>     **	**  Current PC **
4445 #	*****************	*****************
4446 #	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
4447 #	*****************	*****************
4448 #	**   Next PC   **	**   Next PC   **
4449 #	*****************	*****************
4450 #	*      SR	*	*      SR	*
4451 #	*****************	*****************
4452 #	    (6 words)		    (6 words)
4453 #
4454 # the fscc instruction should take a trace trap. so, here we must create a
4455 # trace stack frame from an unimplemented fp instruction stack frame and
4456 # jump to the user supplied entry point for the trace exception
4457 funimp_trace:
4458 	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
4459 	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
4460 
4461 	bra.l		_real_trace
4462 
4463 ################################################################
4464 
4465 	global		tbl_trans
4466 	swbeg		&0x1c0
4467 tbl_trans:
4468 	short		tbl_trans - tbl_trans	# $00-0 fmovecr all
4469 	short		tbl_trans - tbl_trans	# $00-1 fmovecr all
4470 	short		tbl_trans - tbl_trans	# $00-2 fmovecr all
4471 	short		tbl_trans - tbl_trans	# $00-3 fmovecr all
4472 	short		tbl_trans - tbl_trans	# $00-4 fmovecr all
4473 	short		tbl_trans - tbl_trans	# $00-5 fmovecr all
4474 	short		tbl_trans - tbl_trans	# $00-6 fmovecr all
4475 	short		tbl_trans - tbl_trans	# $00-7 fmovecr all
4476 
4477 	short		tbl_trans - tbl_trans	# $01-0 fint norm
4478 	short		tbl_trans - tbl_trans	# $01-1 fint zero
4479 	short		tbl_trans - tbl_trans	# $01-2 fint inf
4480 	short		tbl_trans - tbl_trans	# $01-3 fint qnan
4481 	short		tbl_trans - tbl_trans	# $01-5 fint denorm
4482 	short		tbl_trans - tbl_trans	# $01-4 fint snan
4483 	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
4484 	short		tbl_trans - tbl_trans	# $01-7 ERROR
4485 
4486 	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
4487 	short		src_zero - tbl_trans	# $02-1 fsinh zero
4488 	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
4489 	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
4490 	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
4491 	short		src_snan - tbl_trans	# $02-4 fsinh snan
4492 	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
4493 	short		tbl_trans - tbl_trans	# $02-7 ERROR
4494 
4495 	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
4496 	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
4497 	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
4498 	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
4499 	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
4500 	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
4501 	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
4502 	short		tbl_trans - tbl_trans	# $03-7 ERROR
4503 
4504 	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
4505 	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
4506 	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
4507 	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
4508 	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
4509 	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
4510 	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
4511 	short		tbl_trans - tbl_trans	# $04-7 ERROR
4512 
4513 	short		tbl_trans - tbl_trans	# $05-0 ERROR
4514 	short		tbl_trans - tbl_trans	# $05-1 ERROR
4515 	short		tbl_trans - tbl_trans	# $05-2 ERROR
4516 	short		tbl_trans - tbl_trans	# $05-3 ERROR
4517 	short		tbl_trans - tbl_trans	# $05-4 ERROR
4518 	short		tbl_trans - tbl_trans	# $05-5 ERROR
4519 	short		tbl_trans - tbl_trans	# $05-6 ERROR
4520 	short		tbl_trans - tbl_trans	# $05-7 ERROR
4521 
4522 	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
4523 	short		src_zero - tbl_trans	# $06-1 flognp1 zero
4524 	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
4525 	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
4526 	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
4527 	short		src_snan - tbl_trans	# $06-4 flognp1 snan
4528 	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
4529 	short		tbl_trans - tbl_trans	# $06-7 ERROR
4530 
4531 	short		tbl_trans - tbl_trans	# $07-0 ERROR
4532 	short		tbl_trans - tbl_trans	# $07-1 ERROR
4533 	short		tbl_trans - tbl_trans	# $07-2 ERROR
4534 	short		tbl_trans - tbl_trans	# $07-3 ERROR
4535 	short		tbl_trans - tbl_trans	# $07-4 ERROR
4536 	short		tbl_trans - tbl_trans	# $07-5 ERROR
4537 	short		tbl_trans - tbl_trans	# $07-6 ERROR
4538 	short		tbl_trans - tbl_trans	# $07-7 ERROR
4539 
4540 	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
4541 	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
4542 	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
4543 	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
4544 	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
4545 	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
4546 	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
4547 	short		tbl_trans - tbl_trans	# $08-7 ERROR
4548 
4549 	short		stanh	 - tbl_trans	# $09-0 ftanh norm
4550 	short		src_zero - tbl_trans	# $09-1 ftanh zero
4551 	short		src_one	 - tbl_trans	# $09-2 ftanh inf
4552 	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
4553 	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
4554 	short		src_snan - tbl_trans	# $09-4 ftanh snan
4555 	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
4556 	short		tbl_trans - tbl_trans	# $09-7 ERROR
4557 
4558 	short		satan	 - tbl_trans	# $0a-0 fatan norm
4559 	short		src_zero - tbl_trans	# $0a-1 fatan zero
4560 	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
4561 	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
4562 	short		satand	 - tbl_trans	# $0a-5 fatan denorm
4563 	short		src_snan - tbl_trans	# $0a-4 fatan snan
4564 	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
4565 	short		tbl_trans - tbl_trans	# $0a-7 ERROR
4566 
4567 	short		tbl_trans - tbl_trans	# $0b-0 ERROR
4568 	short		tbl_trans - tbl_trans	# $0b-1 ERROR
4569 	short		tbl_trans - tbl_trans	# $0b-2 ERROR
4570 	short		tbl_trans - tbl_trans	# $0b-3 ERROR
4571 	short		tbl_trans - tbl_trans	# $0b-4 ERROR
4572 	short		tbl_trans - tbl_trans	# $0b-5 ERROR
4573 	short		tbl_trans - tbl_trans	# $0b-6 ERROR
4574 	short		tbl_trans - tbl_trans	# $0b-7 ERROR
4575 
4576 	short		sasin	 - tbl_trans	# $0c-0 fasin norm
4577 	short		src_zero - tbl_trans	# $0c-1 fasin zero
4578 	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
4579 	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
4580 	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
4581 	short		src_snan - tbl_trans	# $0c-4 fasin snan
4582 	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
4583 	short		tbl_trans - tbl_trans	# $0c-7 ERROR
4584 
4585 	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
4586 	short		src_zero - tbl_trans	# $0d-1 fatanh zero
4587 	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
4588 	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
4589 	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
4590 	short		src_snan - tbl_trans	# $0d-4 fatanh snan
4591 	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
4592 	short		tbl_trans - tbl_trans	# $0d-7 ERROR
4593 
4594 	short		ssin	 - tbl_trans	# $0e-0 fsin norm
4595 	short		src_zero - tbl_trans	# $0e-1 fsin zero
4596 	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
4597 	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
4598 	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
4599 	short		src_snan - tbl_trans	# $0e-4 fsin snan
4600 	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
4601 	short		tbl_trans - tbl_trans	# $0e-7 ERROR
4602 
4603 	short		stan	 - tbl_trans	# $0f-0 ftan norm
4604 	short		src_zero - tbl_trans	# $0f-1 ftan zero
4605 	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
4606 	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
4607 	short		stand	 - tbl_trans	# $0f-5 ftan denorm
4608 	short		src_snan - tbl_trans	# $0f-4 ftan snan
4609 	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
4610 	short		tbl_trans - tbl_trans	# $0f-7 ERROR
4611 
4612 	short		setox	 - tbl_trans	# $10-0 fetox norm
4613 	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
4614 	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
4615 	short		src_qnan - tbl_trans	# $10-3 fetox qnan
4616 	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
4617 	short		src_snan - tbl_trans	# $10-4 fetox snan
4618 	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
4619 	short		tbl_trans - tbl_trans	# $10-7 ERROR
4620 
4621 	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
4622 	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
4623 	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
4624 	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
4625 	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
4626 	short		src_snan - tbl_trans	# $11-4 ftwotox snan
4627 	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
4628 	short		tbl_trans - tbl_trans	# $11-7 ERROR
4629 
4630 	short		stentox	 - tbl_trans	# $12-0 ftentox norm
4631 	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
4632 	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
4633 	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
4634 	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
4635 	short		src_snan - tbl_trans	# $12-4 ftentox snan
4636 	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
4637 	short		tbl_trans - tbl_trans	# $12-7 ERROR
4638 
4639 	short		tbl_trans - tbl_trans	# $13-0 ERROR
4640 	short		tbl_trans - tbl_trans	# $13-1 ERROR
4641 	short		tbl_trans - tbl_trans	# $13-2 ERROR
4642 	short		tbl_trans - tbl_trans	# $13-3 ERROR
4643 	short		tbl_trans - tbl_trans	# $13-4 ERROR
4644 	short		tbl_trans - tbl_trans	# $13-5 ERROR
4645 	short		tbl_trans - tbl_trans	# $13-6 ERROR
4646 	short		tbl_trans - tbl_trans	# $13-7 ERROR
4647 
4648 	short		slogn	 - tbl_trans	# $14-0 flogn norm
4649 	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
4650 	short		sopr_inf - tbl_trans	# $14-2 flogn inf
4651 	short		src_qnan - tbl_trans	# $14-3 flogn qnan
4652 	short		slognd	 - tbl_trans	# $14-5 flogn denorm
4653 	short		src_snan - tbl_trans	# $14-4 flogn snan
4654 	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
4655 	short		tbl_trans - tbl_trans	# $14-7 ERROR
4656 
4657 	short		slog10	 - tbl_trans	# $15-0 flog10 norm
4658 	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
4659 	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
4660 	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
4661 	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
4662 	short		src_snan - tbl_trans	# $15-4 flog10 snan
4663 	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
4664 	short		tbl_trans - tbl_trans	# $15-7 ERROR
4665 
4666 	short		slog2	 - tbl_trans	# $16-0 flog2 norm
4667 	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
4668 	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
4669 	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
4670 	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
4671 	short		src_snan - tbl_trans	# $16-4 flog2 snan
4672 	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
4673 	short		tbl_trans - tbl_trans	# $16-7 ERROR
4674 
4675 	short		tbl_trans - tbl_trans	# $17-0 ERROR
4676 	short		tbl_trans - tbl_trans	# $17-1 ERROR
4677 	short		tbl_trans - tbl_trans	# $17-2 ERROR
4678 	short		tbl_trans - tbl_trans	# $17-3 ERROR
4679 	short		tbl_trans - tbl_trans	# $17-4 ERROR
4680 	short		tbl_trans - tbl_trans	# $17-5 ERROR
4681 	short		tbl_trans - tbl_trans	# $17-6 ERROR
4682 	short		tbl_trans - tbl_trans	# $17-7 ERROR
4683 
4684 	short		tbl_trans - tbl_trans	# $18-0 fabs norm
4685 	short		tbl_trans - tbl_trans	# $18-1 fabs zero
4686 	short		tbl_trans - tbl_trans	# $18-2 fabs inf
4687 	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
4688 	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
4689 	short		tbl_trans - tbl_trans	# $18-4 fabs snan
4690 	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
4691 	short		tbl_trans - tbl_trans	# $18-7 ERROR
4692 
4693 	short		scosh	 - tbl_trans	# $19-0 fcosh norm
4694 	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
4695 	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
4696 	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
4697 	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
4698 	short		src_snan - tbl_trans	# $19-4 fcosh snan
4699 	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
4700 	short		tbl_trans - tbl_trans	# $19-7 ERROR
4701 
4702 	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
4703 	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
4704 	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
4705 	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
4706 	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
4707 	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
4708 	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
4709 	short		tbl_trans - tbl_trans	# $1a-7 ERROR
4710 
4711 	short		tbl_trans - tbl_trans	# $1b-0 ERROR
4712 	short		tbl_trans - tbl_trans	# $1b-1 ERROR
4713 	short		tbl_trans - tbl_trans	# $1b-2 ERROR
4714 	short		tbl_trans - tbl_trans	# $1b-3 ERROR
4715 	short		tbl_trans - tbl_trans	# $1b-4 ERROR
4716 	short		tbl_trans - tbl_trans	# $1b-5 ERROR
4717 	short		tbl_trans - tbl_trans	# $1b-6 ERROR
4718 	short		tbl_trans - tbl_trans	# $1b-7 ERROR
4719 
4720 	short		sacos	 - tbl_trans	# $1c-0 facos norm
4721 	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
4722 	short		t_operr	 - tbl_trans	# $1c-2 facos inf
4723 	short		src_qnan - tbl_trans	# $1c-3 facos qnan
4724 	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
4725 	short		src_snan - tbl_trans	# $1c-4 facos snan
4726 	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
4727 	short		tbl_trans - tbl_trans	# $1c-7 ERROR
4728 
4729 	short		scos	 - tbl_trans	# $1d-0 fcos norm
4730 	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
4731 	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
4732 	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
4733 	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
4734 	short		src_snan - tbl_trans	# $1d-4 fcos snan
4735 	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
4736 	short		tbl_trans - tbl_trans	# $1d-7 ERROR
4737 
4738 	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
4739 	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
4740 	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
4741 	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
4742 	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
4743 	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
4744 	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
4745 	short		tbl_trans - tbl_trans	# $1e-7 ERROR
4746 
4747 	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
4748 	short		src_zero - tbl_trans	# $1f-1 fgetman zero
4749 	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
4750 	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
4751 	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
4752 	short		src_snan - tbl_trans	# $1f-4 fgetman snan
4753 	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
4754 	short		tbl_trans - tbl_trans	# $1f-7 ERROR
4755 
4756 	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
4757 	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
4758 	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
4759 	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
4760 	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
4761 	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
4762 	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
4763 	short		tbl_trans - tbl_trans	# $20-7 ERROR
4764 
4765 	short		smod_snorm - tbl_trans	# $21-0 fmod norm
4766 	short		smod_szero - tbl_trans	# $21-1 fmod zero
4767 	short		smod_sinf - tbl_trans	# $21-2 fmod inf
4768 	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
4769 	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
4770 	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
4771 	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
4772 	short		tbl_trans - tbl_trans	# $21-7 ERROR
4773 
4774 	short		tbl_trans - tbl_trans	# $22-0 fadd norm
4775 	short		tbl_trans - tbl_trans	# $22-1 fadd zero
4776 	short		tbl_trans - tbl_trans	# $22-2 fadd inf
4777 	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
4778 	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
4779 	short		tbl_trans - tbl_trans	# $22-4 fadd snan
4780 	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
4781 	short		tbl_trans - tbl_trans	# $22-7 ERROR
4782 
4783 	short		tbl_trans - tbl_trans	# $23-0 fmul norm
4784 	short		tbl_trans - tbl_trans	# $23-1 fmul zero
4785 	short		tbl_trans - tbl_trans	# $23-2 fmul inf
4786 	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
4787 	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
4788 	short		tbl_trans - tbl_trans	# $23-4 fmul snan
4789 	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
4790 	short		tbl_trans - tbl_trans	# $23-7 ERROR
4791 
4792 	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
4793 	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
4794 	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
4795 	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
4796 	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
4797 	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
4798 	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
4799 	short		tbl_trans - tbl_trans	# $24-7 ERROR
4800 
4801 	short		srem_snorm - tbl_trans	# $25-0 frem norm
4802 	short		srem_szero - tbl_trans	# $25-1 frem zero
4803 	short		srem_sinf - tbl_trans	# $25-2 frem inf
4804 	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
4805 	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
4806 	short		sop_ssnan - tbl_trans	# $25-4 frem snan
4807 	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
4808 	short		tbl_trans - tbl_trans	# $25-7 ERROR
4809 
4810 	short		sscale_snorm - tbl_trans # $26-0 fscale norm
4811 	short		sscale_szero - tbl_trans # $26-1 fscale zero
4812 	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
4813 	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
4814 	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4815 	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
4816 	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
4817 	short		tbl_trans - tbl_trans	# $26-7 ERROR
4818 
4819 	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
4820 	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
4821 	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
4822 	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
4823 	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
4824 	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
4825 	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
4826 	short		tbl_trans - tbl_trans	# $27-7 ERROR
4827 
4828 	short		tbl_trans - tbl_trans	# $28-0 fsub norm
4829 	short		tbl_trans - tbl_trans	# $28-1 fsub zero
4830 	short		tbl_trans - tbl_trans	# $28-2 fsub inf
4831 	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
4832 	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
4833 	short		tbl_trans - tbl_trans	# $28-4 fsub snan
4834 	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
4835 	short		tbl_trans - tbl_trans	# $28-7 ERROR
4836 
4837 	short		tbl_trans - tbl_trans	# $29-0 ERROR
4838 	short		tbl_trans - tbl_trans	# $29-1 ERROR
4839 	short		tbl_trans - tbl_trans	# $29-2 ERROR
4840 	short		tbl_trans - tbl_trans	# $29-3 ERROR
4841 	short		tbl_trans - tbl_trans	# $29-4 ERROR
4842 	short		tbl_trans - tbl_trans	# $29-5 ERROR
4843 	short		tbl_trans - tbl_trans	# $29-6 ERROR
4844 	short		tbl_trans - tbl_trans	# $29-7 ERROR
4845 
4846 	short		tbl_trans - tbl_trans	# $2a-0 ERROR
4847 	short		tbl_trans - tbl_trans	# $2a-1 ERROR
4848 	short		tbl_trans - tbl_trans	# $2a-2 ERROR
4849 	short		tbl_trans - tbl_trans	# $2a-3 ERROR
4850 	short		tbl_trans - tbl_trans	# $2a-4 ERROR
4851 	short		tbl_trans - tbl_trans	# $2a-5 ERROR
4852 	short		tbl_trans - tbl_trans	# $2a-6 ERROR
4853 	short		tbl_trans - tbl_trans	# $2a-7 ERROR
4854 
4855 	short		tbl_trans - tbl_trans	# $2b-0 ERROR
4856 	short		tbl_trans - tbl_trans	# $2b-1 ERROR
4857 	short		tbl_trans - tbl_trans	# $2b-2 ERROR
4858 	short		tbl_trans - tbl_trans	# $2b-3 ERROR
4859 	short		tbl_trans - tbl_trans	# $2b-4 ERROR
4860 	short		tbl_trans - tbl_trans	# $2b-5 ERROR
4861 	short		tbl_trans - tbl_trans	# $2b-6 ERROR
4862 	short		tbl_trans - tbl_trans	# $2b-7 ERROR
4863 
4864 	short		tbl_trans - tbl_trans	# $2c-0 ERROR
4865 	short		tbl_trans - tbl_trans	# $2c-1 ERROR
4866 	short		tbl_trans - tbl_trans	# $2c-2 ERROR
4867 	short		tbl_trans - tbl_trans	# $2c-3 ERROR
4868 	short		tbl_trans - tbl_trans	# $2c-4 ERROR
4869 	short		tbl_trans - tbl_trans	# $2c-5 ERROR
4870 	short		tbl_trans - tbl_trans	# $2c-6 ERROR
4871 	short		tbl_trans - tbl_trans	# $2c-7 ERROR
4872 
4873 	short		tbl_trans - tbl_trans	# $2d-0 ERROR
4874 	short		tbl_trans - tbl_trans	# $2d-1 ERROR
4875 	short		tbl_trans - tbl_trans	# $2d-2 ERROR
4876 	short		tbl_trans - tbl_trans	# $2d-3 ERROR
4877 	short		tbl_trans - tbl_trans	# $2d-4 ERROR
4878 	short		tbl_trans - tbl_trans	# $2d-5 ERROR
4879 	short		tbl_trans - tbl_trans	# $2d-6 ERROR
4880 	short		tbl_trans - tbl_trans	# $2d-7 ERROR
4881 
4882 	short		tbl_trans - tbl_trans	# $2e-0 ERROR
4883 	short		tbl_trans - tbl_trans	# $2e-1 ERROR
4884 	short		tbl_trans - tbl_trans	# $2e-2 ERROR
4885 	short		tbl_trans - tbl_trans	# $2e-3 ERROR
4886 	short		tbl_trans - tbl_trans	# $2e-4 ERROR
4887 	short		tbl_trans - tbl_trans	# $2e-5 ERROR
4888 	short		tbl_trans - tbl_trans	# $2e-6 ERROR
4889 	short		tbl_trans - tbl_trans	# $2e-7 ERROR
4890 
4891 	short		tbl_trans - tbl_trans	# $2f-0 ERROR
4892 	short		tbl_trans - tbl_trans	# $2f-1 ERROR
4893 	short		tbl_trans - tbl_trans	# $2f-2 ERROR
4894 	short		tbl_trans - tbl_trans	# $2f-3 ERROR
4895 	short		tbl_trans - tbl_trans	# $2f-4 ERROR
4896 	short		tbl_trans - tbl_trans	# $2f-5 ERROR
4897 	short		tbl_trans - tbl_trans	# $2f-6 ERROR
4898 	short		tbl_trans - tbl_trans	# $2f-7 ERROR
4899 
4900 	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
4901 	short		ssincosz - tbl_trans	# $30-1 fsincos zero
4902 	short		ssincosi - tbl_trans	# $30-2 fsincos inf
4903 	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
4904 	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
4905 	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
4906 	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
4907 	short		tbl_trans - tbl_trans	# $30-7 ERROR
4908 
4909 	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
4910 	short		ssincosz - tbl_trans	# $31-1 fsincos zero
4911 	short		ssincosi - tbl_trans	# $31-2 fsincos inf
4912 	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
4913 	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
4914 	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
4915 	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
4916 	short		tbl_trans - tbl_trans	# $31-7 ERROR
4917 
4918 	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
4919 	short		ssincosz - tbl_trans	# $32-1 fsincos zero
4920 	short		ssincosi - tbl_trans	# $32-2 fsincos inf
4921 	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
4922 	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
4923 	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
4924 	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
4925 	short		tbl_trans - tbl_trans	# $32-7 ERROR
4926 
4927 	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
4928 	short		ssincosz - tbl_trans	# $33-1 fsincos zero
4929 	short		ssincosi - tbl_trans	# $33-2 fsincos inf
4930 	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
4931 	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
4932 	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
4933 	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
4934 	short		tbl_trans - tbl_trans	# $33-7 ERROR
4935 
4936 	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
4937 	short		ssincosz - tbl_trans	# $34-1 fsincos zero
4938 	short		ssincosi - tbl_trans	# $34-2 fsincos inf
4939 	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
4940 	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
4941 	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
4942 	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
4943 	short		tbl_trans - tbl_trans	# $34-7 ERROR
4944 
4945 	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
4946 	short		ssincosz - tbl_trans	# $35-1 fsincos zero
4947 	short		ssincosi - tbl_trans	# $35-2 fsincos inf
4948 	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
4949 	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
4950 	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
4951 	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
4952 	short		tbl_trans - tbl_trans	# $35-7 ERROR
4953 
4954 	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
4955 	short		ssincosz - tbl_trans	# $36-1 fsincos zero
4956 	short		ssincosi - tbl_trans	# $36-2 fsincos inf
4957 	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
4958 	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
4959 	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
4960 	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
4961 	short		tbl_trans - tbl_trans	# $36-7 ERROR
4962 
4963 	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
4964 	short		ssincosz - tbl_trans	# $37-1 fsincos zero
4965 	short		ssincosi - tbl_trans	# $37-2 fsincos inf
4966 	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
4967 	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
4968 	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
4969 	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
4970 	short		tbl_trans - tbl_trans	# $37-7 ERROR
4971 
4972 ##########
4973 
4974 # the instruction fetch access for the displacement word for the
4975 # fdbcc emulation failed. here, we create an access error frame
4976 # from the current frame and branch to _real_access().
4977 funimp_iacc:
4978 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4979 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4980 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
4981 
4982 	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4983 
4984 	unlk		%a6
4985 
4986 	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
4987 	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
4988 	mov.w		&0x4008,0x6(%sp)	# store voff
4989 	mov.l		0x2(%sp),0x8(%sp)	# store EA
4990 	mov.l		&0x09428001,0xc(%sp)	# store FSLW
4991 
4992 	btst		&0x5,(%sp)		# user or supervisor mode?
4993 	beq.b		funimp_iacc_end		# user
4994 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
4995 
4996 funimp_iacc_end:
4997 	bra.l		_real_access
4998 
4999 #########################################################################
5000 # ssin():     computes the sine of a normalized input			#
5001 # ssind():    computes the sine of a denormalized input			#
5002 # scos():     computes the cosine of a normalized input			#
5003 # scosd():    computes the cosine of a denormalized input		#
5004 # ssincos():  computes the sine and cosine of a normalized input	#
5005 # ssincosd(): computes the sine and cosine of a denormalized input	#
5006 #									#
5007 # INPUT *************************************************************** #
5008 #	a0 = pointer to extended precision input			#
5009 #	d0 = round precision,mode					#
5010 #									#
5011 # OUTPUT ************************************************************** #
5012 #	fp0 = sin(X) or cos(X)						#
5013 #									#
5014 #    For ssincos(X):							#
5015 #	fp0 = sin(X)							#
5016 #	fp1 = cos(X)							#
5017 #									#
5018 # ACCURACY and MONOTONICITY ******************************************* #
5019 #	The returned result is within 1 ulp in 64 significant bit, i.e.	#
5020 #	within 0.5001 ulp to 53 bits if the result is subsequently	#
5021 #	rounded to double precision. The result is provably monotonic	#
5022 #	in double precision.						#
5023 #									#
5024 # ALGORITHM ***********************************************************	#
5025 #									#
5026 #	SIN and COS:							#
5027 #	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
5028 #									#
5029 #	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
5030 #									#
5031 #	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5032 #		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5033 #		Overwrite k by k := k + AdjN.				#
5034 #									#
5035 #	4. If k is even, go to 6.					#
5036 #									#
5037 #	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.			#
5038 #		Return sgn*cos(r) where cos(r) is approximated by an	#
5039 #		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
5040 #		s = r*r.						#
5041 #		Exit.							#
5042 #									#
5043 #	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
5044 #		where sin(r) is approximated by an odd polynomial in r	#
5045 #		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
5046 #		Exit.							#
5047 #									#
5048 #	7. If |X| > 1, go to 9.						#
5049 #									#
5050 #	8. (|X|<2**(-40)) If SIN is invoked, return X;			#
5051 #		otherwise return 1.					#
5052 #									#
5053 #	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
5054 #		go back to 3.						#
5055 #									#
5056 #	SINCOS:								#
5057 #	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5058 #									#
5059 #	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5060 #		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5061 #									#
5062 #	3. If k is even, go to 5.					#
5063 #									#
5064 #	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
5065 #		j1 exclusive or with the l.s.b. of k.			#
5066 #		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
5067 #		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
5068 #		sin(r) and cos(r) are computed as odd and even		#
5069 #		polynomials in r, respectively. Exit			#
5070 #									#
5071 #	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
5072 #		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
5073 #		sin(r) and cos(r) are computed as odd and even		#
5074 #		polynomials in r, respectively. Exit			#
5075 #									#
5076 #	6. If |X| > 1, go to 8.						#
5077 #									#
5078 #	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
5079 #									#
5080 #	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
5081 #		go back to 2.						#
5082 #									#
5083 #########################################################################
5084 
5085 SINA7:	long		0xBD6AAA77,0xCCC994F5
5086 SINA6:	long		0x3DE61209,0x7AAE8DA1
5087 SINA5:	long		0xBE5AE645,0x2A118AE4
5088 SINA4:	long		0x3EC71DE3,0xA5341531
5089 SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5090 SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
5091 SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5092 
5093 COSB8:	long		0x3D2AC4D0,0xD6011EE3
5094 COSB7:	long		0xBDA9396F,0x9F45AC19
5095 COSB6:	long		0x3E21EED9,0x0612C972
5096 COSB5:	long		0xBE927E4F,0xB79D9FCF
5097 COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5098 COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5099 COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5100 COSB1:	long		0xBF000000
5101 
5102 	set		INARG,FP_SCR0
5103 
5104 	set		X,FP_SCR0
5105 #	set		XDCARE,X+2
5106 	set		XFRAC,X+4
5107 
5108 	set		RPRIME,FP_SCR0
5109 	set		SPRIME,FP_SCR1
5110 
5111 	set		POSNEG1,L_SCR1
5112 	set		TWOTO63,L_SCR1
5113 
5114 	set		ENDFLAG,L_SCR2
5115 	set		INT,L_SCR2
5116 
5117 	set		ADJN,L_SCR3
5118 
5119 ############################################
5120 	global		ssin
5121 ssin:
5122 	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
5123 	bra.b		SINBGN
5124 
5125 ############################################
5126 	global		scos
5127 scos:
5128 	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
5129 
5130 ############################################
5131 SINBGN:
5132 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5133 
5134 	fmov.x		(%a0),%fp0		# LOAD INPUT
5135 	fmov.x		%fp0,X(%a6)		# save input at X
5136 
5137 # "COMPACTIFY" X
5138 	mov.l		(%a0),%d1		# put exp in hi word
5139 	mov.w		4(%a0),%d1		# fetch hi(man)
5140 	and.l		&0x7FFFFFFF,%d1		# strip sign
5141 
5142 	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
5143 	bge.b		SOK1			# no
5144 	bra.w		SINSM			# yes; input is very small
5145 
5146 SOK1:
5147 	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
5148 	blt.b		SINMAIN			# no
5149 	bra.w		SREDUCEX		# yes; input is very large
5150 
5151 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5152 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5153 SINMAIN:
5154 	fmov.x		%fp0,%fp1
5155 	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5156 
5157 	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5158 
5159 	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5160 
5161 	mov.l		INT(%a6),%d1		# make a copy of N
5162 	asl.l		&4,%d1			# N *= 16
5163 	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
5164 
5165 # A1 IS THE ADDRESS OF N*PIBY2
5166 # ...WHICH IS IN TWO PIECES Y1 & Y2
5167 	fsub.x		(%a1)+,%fp0		# X-Y1
5168 	fsub.s		(%a1),%fp0		# fp0 = R = (X-Y1)-Y2
5169 
5170 SINCONT:
5171 #--continuation from REDUCEX
5172 
5173 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5174 	mov.l		INT(%a6),%d1
5175 	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
5176 	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
5177 	cmp.l		%d1,&0
5178 	blt.w		COSPOLY
5179 
5180 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5181 #--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5182 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5183 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5184 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5185 #--WHERE T=S*S.
5186 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5187 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5188 SINPOLY:
5189 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5190 
5191 	fmov.x		%fp0,X(%a6)		# X IS R
5192 	fmul.x		%fp0,%fp0		# FP0 IS S
5193 
5194 	fmov.d		SINA7(%pc),%fp3
5195 	fmov.d		SINA6(%pc),%fp2
5196 
5197 	fmov.x		%fp0,%fp1
5198 	fmul.x		%fp1,%fp1		# FP1 IS T
5199 
5200 	ror.l		&1,%d1
5201 	and.l		&0x80000000,%d1
5202 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5203 	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
5204 
5205 	fmul.x		%fp1,%fp3		# TA7
5206 	fmul.x		%fp1,%fp2		# TA6
5207 
5208 	fadd.d		SINA5(%pc),%fp3		# A5+TA7
5209 	fadd.d		SINA4(%pc),%fp2		# A4+TA6
5210 
5211 	fmul.x		%fp1,%fp3		# T(A5+TA7)
5212 	fmul.x		%fp1,%fp2		# T(A4+TA6)
5213 
5214 	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
5215 	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
5216 
5217 	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
5218 
5219 	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
5220 	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
5221 	fmul.x		X(%a6),%fp0		# R'*S
5222 
5223 	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5224 
5225 	fmul.x		%fp1,%fp0		# SIN(R')-R'
5226 
5227 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5228 
5229 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5230 	fadd.x		X(%a6),%fp0		# last inst - possible exception set
5231 	bra		t_inx2
5232 
5233 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5234 #--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5235 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5236 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5237 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5238 #--WHERE T=S*S.
5239 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5240 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5241 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5242 COSPOLY:
5243 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5244 
5245 	fmul.x		%fp0,%fp0		# FP0 IS S
5246 
5247 	fmov.d		COSB8(%pc),%fp2
5248 	fmov.d		COSB7(%pc),%fp3
5249 
5250 	fmov.x		%fp0,%fp1
5251 	fmul.x		%fp1,%fp1		# FP1 IS T
5252 
5253 	fmov.x		%fp0,X(%a6)		# X IS S
5254 	ror.l		&1,%d1
5255 	and.l		&0x80000000,%d1
5256 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5257 
5258 	fmul.x		%fp1,%fp2		# TB8
5259 
5260 	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
5261 	and.l		&0x80000000,%d1
5262 
5263 	fmul.x		%fp1,%fp3		# TB7
5264 
5265 	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
5266 	mov.l		%d1,POSNEG1(%a6)
5267 
5268 	fadd.d		COSB6(%pc),%fp2		# B6+TB8
5269 	fadd.d		COSB5(%pc),%fp3		# B5+TB7
5270 
5271 	fmul.x		%fp1,%fp2		# T(B6+TB8)
5272 	fmul.x		%fp1,%fp3		# T(B5+TB7)
5273 
5274 	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
5275 	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
5276 
5277 	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
5278 	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
5279 
5280 	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
5281 	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
5282 
5283 	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
5284 
5285 	fadd.x		%fp1,%fp0
5286 
5287 	fmul.x		X(%a6),%fp0
5288 
5289 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5290 
5291 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5292 	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
5293 	bra		t_inx2
5294 
5295 ##############################################
5296 
5297 # SINe: Big OR Small?
5298 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5299 #--IF |X| < 2**(-40), RETURN X OR 1.
5300 SINBORS:
5301 	cmp.l		%d1,&0x3FFF8000
5302 	bgt.l		SREDUCEX
5303 
5304 SINSM:
5305 	mov.l		ADJN(%a6),%d1
5306 	cmp.l		%d1,&0
5307 	bgt.b		COSTINY
5308 
5309 # here, the operation may underflow iff the precision is sgl or dbl.
5310 # extended denorms are handled through another entry point.
5311 SINTINY:
5312 #	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
5313 
5314 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5315 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5316 	fmov.x		X(%a6),%fp0		# last inst - possible exception set
5317 	bra		t_catch
5318 
5319 COSTINY:
5320 	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5321 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5322 	fadd.s		&0x80800000,%fp0	# last inst - possible exception set
5323 	bra		t_pinx2
5324 
5325 ################################################
5326 	global		ssind
5327 #--SIN(X) = X FOR DENORMALIZED X
5328 ssind:
5329 	bra		t_extdnrm
5330 
5331 ############################################
5332 	global		scosd
5333 #--COS(X) = 1 FOR DENORMALIZED X
5334 scosd:
5335 	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5336 	bra		t_pinx2
5337 
5338 ##################################################
5339 
5340 	global		ssincos
5341 ssincos:
5342 #--SET ADJN TO 4
5343 	mov.l		&4,ADJN(%a6)
5344 
5345 	fmov.x		(%a0),%fp0		# LOAD INPUT
5346 	fmov.x		%fp0,X(%a6)
5347 
5348 	mov.l		(%a0),%d1
5349 	mov.w		4(%a0),%d1
5350 	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
5351 
5352 	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5353 	bge.b		SCOK1
5354 	bra.w		SCSM
5355 
5356 SCOK1:
5357 	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5358 	blt.b		SCMAIN
5359 	bra.w		SREDUCEX
5360 
5361 
5362 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5363 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5364 SCMAIN:
5365 	fmov.x		%fp0,%fp1
5366 
5367 	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5368 
5369 	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5370 
5371 	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5372 
5373 	mov.l		INT(%a6),%d1
5374 	asl.l		&4,%d1
5375 	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
5376 
5377 	fsub.x		(%a1)+,%fp0		# X-Y1
5378 	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5379 
5380 SCCONT:
5381 #--continuation point from REDUCEX
5382 
5383 	mov.l		INT(%a6),%d1
5384 	ror.l		&1,%d1
5385 	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
5386 	bge.w		NEVEN
5387 
5388 SNODD:
5389 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5390 	fmovm.x		&0x04,-(%sp)		# save fp2
5391 
5392 	fmov.x		%fp0,RPRIME(%a6)
5393 	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5394 	fmov.d		SINA7(%pc),%fp1		# A7
5395 	fmov.d		COSB8(%pc),%fp2		# B8
5396 	fmul.x		%fp0,%fp1		# SA7
5397 	fmul.x		%fp0,%fp2		# SB8
5398 
5399 	mov.l		%d2,-(%sp)
5400 	mov.l		%d1,%d2
5401 	ror.l		&1,%d2
5402 	and.l		&0x80000000,%d2
5403 	eor.l		%d1,%d2
5404 	and.l		&0x80000000,%d2
5405 
5406 	fadd.d		SINA6(%pc),%fp1		# A6+SA7
5407 	fadd.d		COSB7(%pc),%fp2		# B7+SB8
5408 
5409 	fmul.x		%fp0,%fp1		# S(A6+SA7)
5410 	eor.l		%d2,RPRIME(%a6)
5411 	mov.l		(%sp)+,%d2
5412 	fmul.x		%fp0,%fp2		# S(B7+SB8)
5413 	ror.l		&1,%d1
5414 	and.l		&0x80000000,%d1
5415 	mov.l		&0x3F800000,POSNEG1(%a6)
5416 	eor.l		%d1,POSNEG1(%a6)
5417 
5418 	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
5419 	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
5420 
5421 	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
5422 	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
5423 	fmov.x		%fp0,SPRIME(%a6)
5424 
5425 	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
5426 	eor.l		%d1,SPRIME(%a6)
5427 	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
5428 
5429 	fmul.x		%fp0,%fp1		# S(A4+...)
5430 	fmul.x		%fp0,%fp2		# S(B5+...)
5431 
5432 	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
5433 	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
5434 
5435 	fmul.x		%fp0,%fp1		# S(A3+...)
5436 	fmul.x		%fp0,%fp2		# S(B4+...)
5437 
5438 	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
5439 	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
5440 
5441 	fmul.x		%fp0,%fp1		# S(A2+...)
5442 	fmul.x		%fp0,%fp2		# S(B3+...)
5443 
5444 	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
5445 	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
5446 
5447 	fmul.x		%fp0,%fp1		# S(A1+...)
5448 	fmul.x		%fp2,%fp0		# S(B2+...)
5449 
5450 	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
5451 	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
5452 	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
5453 
5454 	fmovm.x		(%sp)+,&0x20		# restore fp2
5455 
5456 	fmov.l		%d0,%fpcr
5457 	fadd.x		RPRIME(%a6),%fp1	# COS(X)
5458 	bsr		sto_cos			# store cosine result
5459 	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
5460 	bra		t_inx2
5461 
5462 NEVEN:
5463 #--REGISTERS SAVED SO FAR: FP2.
5464 	fmovm.x		&0x04,-(%sp)		# save fp2
5465 
5466 	fmov.x		%fp0,RPRIME(%a6)
5467 	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5468 
5469 	fmov.d		COSB8(%pc),%fp1		# B8
5470 	fmov.d		SINA7(%pc),%fp2		# A7
5471 
5472 	fmul.x		%fp0,%fp1		# SB8
5473 	fmov.x		%fp0,SPRIME(%a6)
5474 	fmul.x		%fp0,%fp2		# SA7
5475 
5476 	ror.l		&1,%d1
5477 	and.l		&0x80000000,%d1
5478 
5479 	fadd.d		COSB7(%pc),%fp1		# B7+SB8
5480 	fadd.d		SINA6(%pc),%fp2		# A6+SA7
5481 
5482 	eor.l		%d1,RPRIME(%a6)
5483 	eor.l		%d1,SPRIME(%a6)
5484 
5485 	fmul.x		%fp0,%fp1		# S(B7+SB8)
5486 
5487 	or.l		&0x3F800000,%d1
5488 	mov.l		%d1,POSNEG1(%a6)
5489 
5490 	fmul.x		%fp0,%fp2		# S(A6+SA7)
5491 
5492 	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
5493 	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
5494 
5495 	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
5496 	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
5497 
5498 	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
5499 	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
5500 
5501 	fmul.x		%fp0,%fp1		# S(B5+...)
5502 	fmul.x		%fp0,%fp2		# S(A4+...)
5503 
5504 	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
5505 	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
5506 
5507 	fmul.x		%fp0,%fp1		# S(B4+...)
5508 	fmul.x		%fp0,%fp2		# S(A3+...)
5509 
5510 	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
5511 	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
5512 
5513 	fmul.x		%fp0,%fp1		# S(B3+...)
5514 	fmul.x		%fp0,%fp2		# S(A2+...)
5515 
5516 	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
5517 	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
5518 
5519 	fmul.x		%fp0,%fp1		# S(B2+...)
5520 	fmul.x		%fp2,%fp0		# s(a1+...)
5521 
5522 
5523 	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
5524 	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
5525 	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
5526 
5527 	fmovm.x		(%sp)+,&0x20		# restore fp2
5528 
5529 	fmov.l		%d0,%fpcr
5530 	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
5531 	bsr		sto_cos			# store cosine result
5532 	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
5533 	bra		t_inx2
5534 
5535 ################################################
5536 
5537 SCBORS:
5538 	cmp.l		%d1,&0x3FFF8000
5539 	bgt.w		SREDUCEX
5540 
5541 ################################################
5542 
5543 SCSM:
5544 #	mov.w		&0x0000,XDCARE(%a6)
5545 	fmov.s		&0x3F800000,%fp1
5546 
5547 	fmov.l		%d0,%fpcr
5548 	fsub.s		&0x00800000,%fp1
5549 	bsr		sto_cos			# store cosine result
5550 	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
5551 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5552 	fmov.x		X(%a6),%fp0
5553 	bra		t_catch
5554 
5555 ##############################################
5556 
5557 	global		ssincosd
5558 #--SIN AND COS OF X FOR DENORMALIZED X
5559 ssincosd:
5560 	mov.l		%d0,-(%sp)		# save d0
5561 	fmov.s		&0x3F800000,%fp1
5562 	bsr		sto_cos			# store cosine result
5563 	mov.l		(%sp)+,%d0		# restore d0
5564 	bra		t_extdnrm
5565 
5566 ############################################
5567 
5568 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5569 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5570 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5571 SREDUCEX:
5572 	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
5573 	mov.l		%d2,-(%sp)		# save d2
5574 	fmov.s		&0x00000000,%fp1	# fp1 = 0
5575 
5576 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5577 #--there is a danger of unwanted overflow in first LOOP iteration.  In this
5578 #--case, reduce argument by one remainder step to make subsequent reduction
5579 #--safe.
5580 	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
5581 	bne.b		SLOOP			# no
5582 
5583 # yes; create 2**16383*PI/2
5584 	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
5585 	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
5586 	clr.l		FP_SCR0_LO(%a6)
5587 
5588 # create low half of 2**16383*PI/2 at FP_SCR1
5589 	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
5590 	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
5591 	clr.l		FP_SCR1_LO(%a6)
5592 
5593 	ftest.x		%fp0			# test sign of argument
5594 	fblt.w		sred_neg
5595 
5596 	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
5597 	or.b		&0x80,FP_SCR1_EX(%a6)
5598 sred_neg:
5599 	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
5600 	fmov.x		%fp0,%fp1		# save high result in fp1
5601 	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
5602 	fsub.x		%fp0,%fp1		# determine low component of result
5603 	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
5604 
5605 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5606 #--integer quotient will be stored in N
5607 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5608 SLOOP:
5609 	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
5610 	mov.w		INARG(%a6),%d1
5611 	mov.l		%d1,%a1			# save a copy of D0
5612 	and.l		&0x00007FFF,%d1
5613 	sub.l		&0x00003FFF,%d1		# d0 = K
5614 	cmp.l		%d1,&28
5615 	ble.b		SLASTLOOP
5616 SCONTLOOP:
5617 	sub.l		&27,%d1			# d0 = L := K-27
5618 	mov.b		&0,ENDFLAG(%a6)
5619 	bra.b		SWORK
5620 SLASTLOOP:
5621 	clr.l		%d1			# d0 = L := 0
5622 	mov.b		&1,ENDFLAG(%a6)
5623 
5624 SWORK:
5625 #--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
5626 #--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
5627 
5628 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5629 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5630 
5631 	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
5632 	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
5633 
5634 	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
5635 	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
5636 	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
5637 
5638 	fmov.x		%fp0,%fp2
5639 	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
5640 
5641 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5642 #--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
5643 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5644 #--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
5645 #--US THE DESIRED VALUE IN FLOATING POINT.
5646 	mov.l		%a1,%d2
5647 	swap		%d2
5648 	and.l		&0x80000000,%d2
5649 	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
5650 	mov.l		%d2,TWOTO63(%a6)
5651 	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
5652 	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
5653 #	fint.x		%fp2
5654 
5655 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5656 	mov.l		%d1,%d2			# d2 = L
5657 
5658 	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
5659 	mov.w		%d2,FP_SCR0_EX(%a6)
5660 	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
5661 	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
5662 
5663 	add.l		&0x00003FDD,%d1
5664 	mov.w		%d1,FP_SCR1_EX(%a6)
5665 	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
5666 	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
5667 
5668 	mov.b		ENDFLAG(%a6),%d1
5669 
5670 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5671 #--P2 = 2**(L) * Piby2_2
5672 	fmov.x		%fp2,%fp4		# fp4 = N
5673 	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
5674 	fmov.x		%fp2,%fp5		# fp5 = N
5675 	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
5676 	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
5677 
5678 #--we want P+p = W+w  but  |p| <= half ulp of P
5679 #--Then, we need to compute  A := R-P   and  a := r-p
5680 	fadd.x		%fp5,%fp3		# fp3 = P
5681 	fsub.x		%fp3,%fp4		# fp4 = W-P
5682 
5683 	fsub.x		%fp3,%fp0		# fp0 = A := R - P
5684 	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
5685 
5686 	fmov.x		%fp0,%fp3		# fp3 = A
5687 	fsub.x		%fp4,%fp1		# fp1 = a := r - p
5688 
5689 #--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
5690 #--|r| <= half ulp of R.
5691 	fadd.x		%fp1,%fp0		# fp0 = R := A+a
5692 #--No need to calculate r if this is the last loop
5693 	cmp.b		%d1,&0
5694 	bgt.w		SRESTORE
5695 
5696 #--Need to calculate r
5697 	fsub.x		%fp0,%fp3		# fp3 = A-R
5698 	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
5699 	bra.w		SLOOP
5700 
5701 SRESTORE:
5702 	fmov.l		%fp2,INT(%a6)
5703 	mov.l		(%sp)+,%d2		# restore d2
5704 	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
5705 
5706 	mov.l		ADJN(%a6),%d1
5707 	cmp.l		%d1,&4
5708 
5709 	blt.w		SINCONT
5710 	bra.w		SCCONT
5711 
5712 #########################################################################
5713 # stan():  computes the tangent of a normalized input			#
5714 # stand(): computes the tangent of a denormalized input			#
5715 #									#
5716 # INPUT *************************************************************** #
5717 #	a0 = pointer to extended precision input			#
5718 #	d0 = round precision,mode					#
5719 #									#
5720 # OUTPUT ************************************************************** #
5721 #	fp0 = tan(X)							#
5722 #									#
5723 # ACCURACY and MONOTONICITY ******************************************* #
5724 #	The returned result is within 3 ulp in 64 significant bit, i.e. #
5725 #	within 0.5001 ulp to 53 bits if the result is subsequently	#
5726 #	rounded to double precision. The result is provably monotonic	#
5727 #	in double precision.						#
5728 #									#
5729 # ALGORITHM *********************************************************** #
5730 #									#
5731 #	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5732 #									#
5733 #	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5734 #		k = N mod 2, so in particular, k = 0 or 1.		#
5735 #									#
5736 #	3. If k is odd, go to 5.					#
5737 #									#
5738 #	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
5739 #		rational function U/V where				#
5740 #		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5741 #		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
5742 #		Exit.							#
5743 #									#
5744 #	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5745 #		a rational function U/V where				#
5746 #		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5747 #		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
5748 #		-Cot(r) = -V/U. Exit.					#
5749 #									#
5750 #	6. If |X| > 1, go to 8.						#
5751 #									#
5752 #	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
5753 #									#
5754 #	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back	#
5755 #		to 2.							#
5756 #									#
5757 #########################################################################
5758 
5759 TANQ4:
5760 	long		0x3EA0B759,0xF50F8688
5761 TANP3:
5762 	long		0xBEF2BAA5,0xA8924F04
5763 
5764 TANQ3:
5765 	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5766 
5767 TANP2:
5768 	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5769 
5770 TANQ2:
5771 	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5772 
5773 TANP1:
5774 	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5775 
5776 TANQ1:
5777 	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5778 
5779 INVTWOPI:
5780 	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5781 
5782 TWOPI1:
5783 	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
5784 TWOPI2:
5785 	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5786 
5787 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5788 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5789 #--MOST 69 BITS LONG.
5790 #	global		PITBL
5791 PITBL:
5792 	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5793 	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5794 	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5795 	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5796 	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5797 	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5798 	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5799 	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5800 	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5801 	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
5802 	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5803 	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5804 	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5805 	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5806 	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5807 	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5808 	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5809 	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5810 	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5811 	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5812 	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5813 	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5814 	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5815 	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5816 	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5817 	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5818 	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5819 	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5820 	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5821 	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5822 	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5823 	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5824 	long		0x00000000,0x00000000,0x00000000,0x00000000
5825 	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5826 	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5827 	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5828 	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5829 	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5830 	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5831 	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5832 	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5833 	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5834 	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5835 	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5836 	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5837 	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5838 	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5839 	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5840 	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5841 	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5842 	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5843 	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5844 	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5845 	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5846 	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5847 	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
5848 	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5849 	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5850 	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5851 	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5852 	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5853 	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5854 	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5855 	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5856 	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5857 
5858 	set		INARG,FP_SCR0
5859 
5860 	set		TWOTO63,L_SCR1
5861 	set		INT,L_SCR1
5862 	set		ENDFLAG,L_SCR2
5863 
5864 	global		stan
5865 stan:
5866 	fmov.x		(%a0),%fp0		# LOAD INPUT
5867 
5868 	mov.l		(%a0),%d1
5869 	mov.w		4(%a0),%d1
5870 	and.l		&0x7FFFFFFF,%d1
5871 
5872 	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5873 	bge.b		TANOK1
5874 	bra.w		TANSM
5875 TANOK1:
5876 	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5877 	blt.b		TANMAIN
5878 	bra.w		REDUCEX
5879 
5880 TANMAIN:
5881 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5882 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5883 	fmov.x		%fp0,%fp1
5884 	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5885 
5886 	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5887 
5888 	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
5889 
5890 	asl.l		&4,%d1
5891 	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
5892 
5893 	fsub.x		(%a1)+,%fp0		# X-Y1
5894 
5895 	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5896 
5897 	ror.l		&5,%d1
5898 	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
5899 
5900 TANCONT:
5901 	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
5902 
5903 	cmp.l		%d1,&0
5904 	blt.w		NODD
5905 
5906 	fmov.x		%fp0,%fp1
5907 	fmul.x		%fp1,%fp1		# S = R*R
5908 
5909 	fmov.d		TANQ4(%pc),%fp3
5910 	fmov.d		TANP3(%pc),%fp2
5911 
5912 	fmul.x		%fp1,%fp3		# SQ4
5913 	fmul.x		%fp1,%fp2		# SP3
5914 
5915 	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5916 	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5917 
5918 	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
5919 	fmul.x		%fp1,%fp2		# S(P2+SP3)
5920 
5921 	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5922 	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5923 
5924 	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
5925 	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
5926 
5927 	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5928 	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
5929 
5930 	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
5931 
5932 	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
5933 
5934 	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
5935 
5936 	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5937 
5938 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5939 	fdiv.x		%fp1,%fp0		# last inst - possible exception set
5940 	bra		t_inx2
5941 
5942 NODD:
5943 	fmov.x		%fp0,%fp1
5944 	fmul.x		%fp0,%fp0		# S = R*R
5945 
5946 	fmov.d		TANQ4(%pc),%fp3
5947 	fmov.d		TANP3(%pc),%fp2
5948 
5949 	fmul.x		%fp0,%fp3		# SQ4
5950 	fmul.x		%fp0,%fp2		# SP3
5951 
5952 	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5953 	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5954 
5955 	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
5956 	fmul.x		%fp0,%fp2		# S(P2+SP3)
5957 
5958 	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5959 	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5960 
5961 	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
5962 	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
5963 
5964 	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5965 	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
5966 
5967 	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
5968 
5969 	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
5970 	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
5971 
5972 	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5973 
5974 	fmov.x		%fp1,-(%sp)
5975 	eor.l		&0x80000000,(%sp)
5976 
5977 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5978 	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
5979 	bra		t_inx2
5980 
5981 TANBORS:
5982 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5983 #--IF |X| < 2**(-40), RETURN X OR 1.
5984 	cmp.l		%d1,&0x3FFF8000
5985 	bgt.b		REDUCEX
5986 
5987 TANSM:
5988 	fmov.x		%fp0,-(%sp)
5989 	fmov.l		%d0,%fpcr		# restore users round mode,prec
5990 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5991 	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
5992 	bra		t_catch
5993 
5994 	global		stand
5995 #--TAN(X) = X FOR DENORMALIZED X
5996 stand:
5997 	bra		t_extdnrm
5998 
5999 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6000 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6001 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6002 REDUCEX:
6003 	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
6004 	mov.l		%d2,-(%sp)		# save d2
6005 	fmov.s		&0x00000000,%fp1	# fp1 = 0
6006 
6007 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6008 #--there is a danger of unwanted overflow in first LOOP iteration.  In this
6009 #--case, reduce argument by one remainder step to make subsequent reduction
6010 #--safe.
6011 	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
6012 	bne.b		LOOP			# no
6013 
6014 # yes; create 2**16383*PI/2
6015 	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
6016 	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
6017 	clr.l		FP_SCR0_LO(%a6)
6018 
6019 # create low half of 2**16383*PI/2 at FP_SCR1
6020 	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
6021 	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
6022 	clr.l		FP_SCR1_LO(%a6)
6023 
6024 	ftest.x		%fp0			# test sign of argument
6025 	fblt.w		red_neg
6026 
6027 	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
6028 	or.b		&0x80,FP_SCR1_EX(%a6)
6029 red_neg:
6030 	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
6031 	fmov.x		%fp0,%fp1		# save high result in fp1
6032 	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
6033 	fsub.x		%fp0,%fp1		# determine low component of result
6034 	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
6035 
6036 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6037 #--integer quotient will be stored in N
6038 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6039 LOOP:
6040 	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
6041 	mov.w		INARG(%a6),%d1
6042 	mov.l		%d1,%a1			# save a copy of D0
6043 	and.l		&0x00007FFF,%d1
6044 	sub.l		&0x00003FFF,%d1		# d0 = K
6045 	cmp.l		%d1,&28
6046 	ble.b		LASTLOOP
6047 CONTLOOP:
6048 	sub.l		&27,%d1			# d0 = L := K-27
6049 	mov.b		&0,ENDFLAG(%a6)
6050 	bra.b		WORK
6051 LASTLOOP:
6052 	clr.l		%d1			# d0 = L := 0
6053 	mov.b		&1,ENDFLAG(%a6)
6054 
6055 WORK:
6056 #--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
6057 #--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
6058 
6059 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6060 #--2**L * (PIby2_1), 2**L * (PIby2_2)
6061 
6062 	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
6063 	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
6064 
6065 	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
6066 	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
6067 	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
6068 
6069 	fmov.x		%fp0,%fp2
6070 	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
6071 
6072 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6073 #--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
6074 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6075 #--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
6076 #--US THE DESIRED VALUE IN FLOATING POINT.
6077 	mov.l		%a1,%d2
6078 	swap		%d2
6079 	and.l		&0x80000000,%d2
6080 	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
6081 	mov.l		%d2,TWOTO63(%a6)
6082 	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
6083 	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
6084 #	fintrz.x	%fp2,%fp2
6085 
6086 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6087 	mov.l		%d1,%d2			# d2 = L
6088 
6089 	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
6090 	mov.w		%d2,FP_SCR0_EX(%a6)
6091 	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
6092 	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
6093 
6094 	add.l		&0x00003FDD,%d1
6095 	mov.w		%d1,FP_SCR1_EX(%a6)
6096 	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
6097 	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
6098 
6099 	mov.b		ENDFLAG(%a6),%d1
6100 
6101 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6102 #--P2 = 2**(L) * Piby2_2
6103 	fmov.x		%fp2,%fp4		# fp4 = N
6104 	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
6105 	fmov.x		%fp2,%fp5		# fp5 = N
6106 	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
6107 	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
6108 
6109 #--we want P+p = W+w  but  |p| <= half ulp of P
6110 #--Then, we need to compute  A := R-P   and  a := r-p
6111 	fadd.x		%fp5,%fp3		# fp3 = P
6112 	fsub.x		%fp3,%fp4		# fp4 = W-P
6113 
6114 	fsub.x		%fp3,%fp0		# fp0 = A := R - P
6115 	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
6116 
6117 	fmov.x		%fp0,%fp3		# fp3 = A
6118 	fsub.x		%fp4,%fp1		# fp1 = a := r - p
6119 
6120 #--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
6121 #--|r| <= half ulp of R.
6122 	fadd.x		%fp1,%fp0		# fp0 = R := A+a
6123 #--No need to calculate r if this is the last loop
6124 	cmp.b		%d1,&0
6125 	bgt.w		RESTORE
6126 
6127 #--Need to calculate r
6128 	fsub.x		%fp0,%fp3		# fp3 = A-R
6129 	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
6130 	bra.w		LOOP
6131 
6132 RESTORE:
6133 	fmov.l		%fp2,INT(%a6)
6134 	mov.l		(%sp)+,%d2		# restore d2
6135 	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
6136 
6137 	mov.l		INT(%a6),%d1
6138 	ror.l		&1,%d1
6139 
6140 	bra.w		TANCONT
6141 
6142 #########################################################################
6143 # satan():  computes the arctangent of a normalized number		#
6144 # satand(): computes the arctangent of a denormalized number		#
6145 #									#
6146 # INPUT	*************************************************************** #
6147 #	a0 = pointer to extended precision input			#
6148 #	d0 = round precision,mode					#
6149 #									#
6150 # OUTPUT ************************************************************** #
6151 #	fp0 = arctan(X)							#
6152 #									#
6153 # ACCURACY and MONOTONICITY ******************************************* #
6154 #	The returned result is within 2 ulps in	64 significant bit,	#
6155 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6156 #	rounded to double precision. The result is provably monotonic	#
6157 #	in double precision.						#
6158 #									#
6159 # ALGORITHM *********************************************************** #
6160 #	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
6161 #									#
6162 #	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.			#
6163 #		Note that k = -4, -3,..., or 3.				#
6164 #		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5	#
6165 #		significant bits of X with a bit-1 attached at the 6-th	#
6166 #		bit position. Define u to be u = (X-F) / (1 + X*F).	#
6167 #									#
6168 #	Step 3. Approximate arctan(u) by a polynomial poly.		#
6169 #									#
6170 #	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a	#
6171 #		table of values calculated beforehand. Exit.		#
6172 #									#
6173 #	Step 5. If |X| >= 16, go to Step 7.				#
6174 #									#
6175 #	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
6176 #									#
6177 #	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd	#
6178 #		polynomial in X'.					#
6179 #		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
6180 #									#
6181 #########################################################################
6182 
6183 ATANA3:	long		0xBFF6687E,0x314987D8
6184 ATANA2:	long		0x4002AC69,0x34A26DB3
6185 ATANA1:	long		0xBFC2476F,0x4E1DA28E
6186 
6187 ATANB6:	long		0x3FB34444,0x7F876989
6188 ATANB5:	long		0xBFB744EE,0x7FAF45DB
6189 ATANB4:	long		0x3FBC71C6,0x46940220
6190 ATANB3:	long		0xBFC24924,0x921872F9
6191 ATANB2:	long		0x3FC99999,0x99998FA9
6192 ATANB1:	long		0xBFD55555,0x55555555
6193 
6194 ATANC5:	long		0xBFB70BF3,0x98539E6A
6195 ATANC4:	long		0x3FBC7187,0x962D1D7D
6196 ATANC3:	long		0xBFC24924,0x827107B8
6197 ATANC2:	long		0x3FC99999,0x9996263E
6198 ATANC1:	long		0xBFD55555,0x55555536
6199 
6200 PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6201 NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6202 
6203 PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
6204 NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
6205 
6206 ATANTBL:
6207 	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6208 	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6209 	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6210 	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6211 	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6212 	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6213 	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6214 	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6215 	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6216 	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6217 	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6218 	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6219 	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6220 	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6221 	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6222 	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6223 	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6224 	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6225 	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6226 	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6227 	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6228 	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6229 	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6230 	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6231 	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6232 	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6233 	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6234 	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6235 	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6236 	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6237 	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6238 	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6239 	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6240 	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6241 	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6242 	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6243 	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6244 	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6245 	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6246 	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6247 	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6248 	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6249 	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6250 	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6251 	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6252 	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6253 	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6254 	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6255 	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6256 	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6257 	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6258 	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6259 	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6260 	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
6261 	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6262 	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6263 	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6264 	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6265 	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6266 	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6267 	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6268 	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6269 	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6270 	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6271 	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6272 	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6273 	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6274 	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6275 	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6276 	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6277 	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6278 	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6279 	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6280 	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6281 	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6282 	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6283 	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6284 	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6285 	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6286 	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6287 	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6288 	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6289 	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6290 	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6291 	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6292 	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6293 	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6294 	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6295 	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6296 	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6297 	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6298 	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6299 	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6300 	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6301 	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6302 	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6303 	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6304 	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6305 	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6306 	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6307 	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6308 	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6309 	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6310 	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6311 	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6312 	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6313 	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6314 	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6315 	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6316 	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6317 	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6318 	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6319 	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6320 	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6321 	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6322 	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6323 	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6324 	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6325 	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6326 	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6327 	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6328 	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6329 	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6330 	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6331 	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6332 	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6333 	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6334 	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6335 
6336 	set		X,FP_SCR0
6337 	set		XDCARE,X+2
6338 	set		XFRAC,X+4
6339 	set		XFRACLO,X+8
6340 
6341 	set		ATANF,FP_SCR1
6342 	set		ATANFHI,ATANF+4
6343 	set		ATANFLO,ATANF+8
6344 
6345 	global		satan
6346 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6347 satan:
6348 	fmov.x		(%a0),%fp0		# LOAD INPUT
6349 
6350 	mov.l		(%a0),%d1
6351 	mov.w		4(%a0),%d1
6352 	fmov.x		%fp0,X(%a6)
6353 	and.l		&0x7FFFFFFF,%d1
6354 
6355 	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
6356 	bge.b		ATANOK1
6357 	bra.w		ATANSM
6358 
6359 ATANOK1:
6360 	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
6361 	ble.b		ATANMAIN
6362 	bra.w		ATANBIG
6363 
6364 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6365 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6366 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6367 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6368 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6369 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6370 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6371 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6372 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6373 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6374 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6375 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6376 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6377 
6378 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6379 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6380 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6381 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6382 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6383 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6384 
6385 ATANMAIN:
6386 
6387 	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
6388 	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
6389 	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6390 
6391 	fmov.x		%fp0,%fp1		# FP1 IS X
6392 	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
6393 	fsub.x		X(%a6),%fp0		# FP0 IS X-F
6394 	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
6395 	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
6396 
6397 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6398 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6399 #--SAVE REGISTERS FP2.
6400 
6401 	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
6402 	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
6403 	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
6404 	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
6405 	sub.l		&0x3FFB0000,%d2		# K+4
6406 	asr.l		&1,%d2
6407 	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
6408 	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
6409 	lea		ATANTBL(%pc),%a1
6410 	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
6411 	mov.l		(%a1)+,ATANF(%a6)
6412 	mov.l		(%a1)+,ATANFHI(%a6)
6413 	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
6414 	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
6415 	and.l		&0x80000000,%d1		# SIGN(F)
6416 	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
6417 	mov.l		(%sp)+,%d2		# RESTORE d2
6418 
6419 #--THAT'S ALL I HAVE TO DO FOR NOW,
6420 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6421 
6422 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6423 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6424 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6425 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6426 #--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
6427 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6428 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6429 
6430 	fmovm.x		&0x04,-(%sp)		# save fp2
6431 
6432 	fmov.x		%fp0,%fp1
6433 	fmul.x		%fp1,%fp1
6434 	fmov.d		ATANA3(%pc),%fp2
6435 	fadd.x		%fp1,%fp2		# A3+V
6436 	fmul.x		%fp1,%fp2		# V*(A3+V)
6437 	fmul.x		%fp0,%fp1		# U*V
6438 	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
6439 	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
6440 	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
6441 	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
6442 
6443 	fmovm.x		(%sp)+,&0x20		# restore fp2
6444 
6445 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6446 	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
6447 	bra		t_inx2
6448 
6449 ATANBORS:
6450 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6451 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6452 	cmp.l		%d1,&0x3FFF8000
6453 	bgt.w		ATANBIG			# I.E. |X| >= 16
6454 
6455 ATANSM:
6456 #--|X| <= 1/16
6457 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6458 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6459 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6460 #--WHERE Y = X*X, AND Z = Y*Y.
6461 
6462 	cmp.l		%d1,&0x3FD78000
6463 	blt.w		ATANTINY
6464 
6465 #--COMPUTE POLYNOMIAL
6466 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6467 
6468 	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
6469 
6470 	fmov.x		%fp0,%fp1
6471 	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6472 
6473 	fmov.d		ATANB6(%pc),%fp2
6474 	fmov.d		ATANB5(%pc),%fp3
6475 
6476 	fmul.x		%fp1,%fp2		# Z*B6
6477 	fmul.x		%fp1,%fp3		# Z*B5
6478 
6479 	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
6480 	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
6481 
6482 	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
6483 	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
6484 
6485 	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
6486 	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
6487 
6488 	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
6489 	fmul.x		X(%a6),%fp0		# X*Y
6490 
6491 	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6492 
6493 	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6494 
6495 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6496 
6497 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6498 	fadd.x		X(%a6),%fp0
6499 	bra		t_inx2
6500 
6501 ATANTINY:
6502 #--|X| < 2^(-40), ATAN(X) = X
6503 
6504 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6505 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6506 	fmov.x		X(%a6),%fp0		# last inst - possible exception set
6507 
6508 	bra		t_catch
6509 
6510 ATANBIG:
6511 #--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
6512 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6513 	cmp.l		%d1,&0x40638000
6514 	bgt.w		ATANHUGE
6515 
6516 #--APPROXIMATE ATAN(-1/X) BY
6517 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6518 #--THIS CAN BE RE-WRITTEN AS
6519 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6520 
6521 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6522 
6523 	fmov.s		&0xBF800000,%fp1	# LOAD -1
6524 	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
6525 
6526 #--DIVIDE IS STILL CRANKING
6527 
6528 	fmov.x		%fp1,%fp0		# FP0 IS X'
6529 	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
6530 	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
6531 
6532 	fmov.x		%fp0,%fp1
6533 	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6534 
6535 	fmov.d		ATANC5(%pc),%fp3
6536 	fmov.d		ATANC4(%pc),%fp2
6537 
6538 	fmul.x		%fp1,%fp3		# Z*C5
6539 	fmul.x		%fp1,%fp2		# Z*B4
6540 
6541 	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
6542 	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
6543 
6544 	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
6545 	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
6546 
6547 	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
6548 	fmul.x		X(%a6),%fp0		# X'*Y
6549 
6550 	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6551 
6552 	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
6553 #					...	+[Y*(B2+Z*(B4+Z*B6))])
6554 	fadd.x		X(%a6),%fp0
6555 
6556 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6557 
6558 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6559 	tst.b		(%a0)
6560 	bpl.b		pos_big
6561 
6562 neg_big:
6563 	fadd.x		NPIBY2(%pc),%fp0
6564 	bra		t_minx2
6565 
6566 pos_big:
6567 	fadd.x		PPIBY2(%pc),%fp0
6568 	bra		t_pinx2
6569 
6570 ATANHUGE:
6571 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6572 	tst.b		(%a0)
6573 	bpl.b		pos_huge
6574 
6575 neg_huge:
6576 	fmov.x		NPIBY2(%pc),%fp0
6577 	fmov.l		%d0,%fpcr
6578 	fadd.x		PTINY(%pc),%fp0
6579 	bra		t_minx2
6580 
6581 pos_huge:
6582 	fmov.x		PPIBY2(%pc),%fp0
6583 	fmov.l		%d0,%fpcr
6584 	fadd.x		NTINY(%pc),%fp0
6585 	bra		t_pinx2
6586 
6587 	global		satand
6588 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6589 satand:
6590 	bra		t_extdnrm
6591 
6592 #########################################################################
6593 # sasin():  computes the inverse sine of a normalized input		#
6594 # sasind(): computes the inverse sine of a denormalized input		#
6595 #									#
6596 # INPUT ***************************************************************	#
6597 #	a0 = pointer to extended precision input			#
6598 #	d0 = round precision,mode					#
6599 #									#
6600 # OUTPUT **************************************************************	#
6601 #	fp0 = arcsin(X)							#
6602 #									#
6603 # ACCURACY and MONOTONICITY *******************************************	#
6604 #	The returned result is within 3 ulps in	64 significant bit,	#
6605 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6606 #	rounded to double precision. The result is provably monotonic	#
6607 #	in double precision.						#
6608 #									#
6609 # ALGORITHM ***********************************************************	#
6610 #									#
6611 #	ASIN								#
6612 #	1. If |X| >= 1, go to 3.					#
6613 #									#
6614 #	2. (|X| < 1) Calculate asin(X) by				#
6615 #		z := sqrt( [1-X][1+X] )					#
6616 #		asin(X) = atan( x / z ).				#
6617 #		Exit.							#
6618 #									#
6619 #	3. If |X| > 1, go to 5.						#
6620 #									#
6621 #	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6622 #									#
6623 #	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6624 #		Exit.							#
6625 #									#
6626 #########################################################################
6627 
6628 	global		sasin
6629 sasin:
6630 	fmov.x		(%a0),%fp0		# LOAD INPUT
6631 
6632 	mov.l		(%a0),%d1
6633 	mov.w		4(%a0),%d1
6634 	and.l		&0x7FFFFFFF,%d1
6635 	cmp.l		%d1,&0x3FFF8000
6636 	bge.b		ASINBIG
6637 
6638 # This catch is added here for the '060 QSP. Originally, the call to
6639 # satan() would handle this case by causing the exception which would
6640 # not be caught until gen_except(). Now, with the exceptions being
6641 # detected inside of satan(), the exception would have been handled there
6642 # instead of inside sasin() as expected.
6643 	cmp.l		%d1,&0x3FD78000
6644 	blt.w		ASINTINY
6645 
6646 #--THIS IS THE USUAL CASE, |X| < 1
6647 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6648 
6649 ASINMAIN:
6650 	fmov.s		&0x3F800000,%fp1
6651 	fsub.x		%fp0,%fp1		# 1-X
6652 	fmovm.x		&0x4,-(%sp)		#  {fp2}
6653 	fmov.s		&0x3F800000,%fp2
6654 	fadd.x		%fp0,%fp2		# 1+X
6655 	fmul.x		%fp2,%fp1		# (1+X)(1-X)
6656 	fmovm.x		(%sp)+,&0x20		#  {fp2}
6657 	fsqrt.x		%fp1			# SQRT([1-X][1+X])
6658 	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
6659 	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
6660 	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
6661 	bsr		satan
6662 	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
6663 	bra		t_inx2
6664 
6665 ASINBIG:
6666 	fabs.x		%fp0			# |X|
6667 	fcmp.s		%fp0,&0x3F800000
6668 	fbgt		t_operr			# cause an operr exception
6669 
6670 #--|X| = 1, ASIN(X) = +- PI/2.
6671 ASINONE:
6672 	fmov.x		PIBY2(%pc),%fp0
6673 	mov.l		(%a0),%d1
6674 	and.l		&0x80000000,%d1		# SIGN BIT OF X
6675 	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
6676 	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
6677 	fmov.l		%d0,%fpcr
6678 	fmul.s		(%sp)+,%fp0
6679 	bra		t_inx2
6680 
6681 #--|X| < 2^(-40), ATAN(X) = X
6682 ASINTINY:
6683 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6684 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6685 	fmov.x		(%a0),%fp0		# last inst - possible exception
6686 	bra		t_catch
6687 
6688 	global		sasind
6689 #--ASIN(X) = X FOR DENORMALIZED X
6690 sasind:
6691 	bra		t_extdnrm
6692 
6693 #########################################################################
6694 # sacos():  computes the inverse cosine of a normalized input		#
6695 # sacosd(): computes the inverse cosine of a denormalized input		#
6696 #									#
6697 # INPUT ***************************************************************	#
6698 #	a0 = pointer to extended precision input			#
6699 #	d0 = round precision,mode					#
6700 #									#
6701 # OUTPUT ************************************************************** #
6702 #	fp0 = arccos(X)							#
6703 #									#
6704 # ACCURACY and MONOTONICITY *******************************************	#
6705 #	The returned result is within 3 ulps in	64 significant bit,	#
6706 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6707 #	rounded to double precision. The result is provably monotonic	#
6708 #	in double precision.						#
6709 #									#
6710 # ALGORITHM *********************************************************** #
6711 #									#
6712 #	ACOS								#
6713 #	1. If |X| >= 1, go to 3.					#
6714 #									#
6715 #	2. (|X| < 1) Calculate acos(X) by				#
6716 #		z := (1-X) / (1+X)					#
6717 #		acos(X) = 2 * atan( sqrt(z) ).				#
6718 #		Exit.							#
6719 #									#
6720 #	3. If |X| > 1, go to 5.						#
6721 #									#
6722 #	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
6723 #									#
6724 #	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6725 #		Exit.							#
6726 #									#
6727 #########################################################################
6728 
6729 	global		sacos
6730 sacos:
6731 	fmov.x		(%a0),%fp0		# LOAD INPUT
6732 
6733 	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
6734 	mov.w		4(%a0),%d1
6735 	and.l		&0x7FFFFFFF,%d1
6736 	cmp.l		%d1,&0x3FFF8000
6737 	bge.b		ACOSBIG
6738 
6739 #--THIS IS THE USUAL CASE, |X| < 1
6740 #--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
6741 
6742 ACOSMAIN:
6743 	fmov.s		&0x3F800000,%fp1
6744 	fadd.x		%fp0,%fp1		# 1+X
6745 	fneg.x		%fp0			# -X
6746 	fadd.s		&0x3F800000,%fp0	# 1-X
6747 	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
6748 	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
6749 	mov.l		%d0,-(%sp)		# save original users fpcr
6750 	clr.l		%d0
6751 	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
6752 	lea		(%sp),%a0		# pass ptr to sqrt
6753 	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
6754 	add.l		&0xc,%sp		# clear SQRT(...) from stack
6755 
6756 	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
6757 	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
6758 	bra		t_pinx2
6759 
6760 ACOSBIG:
6761 	fabs.x		%fp0
6762 	fcmp.s		%fp0,&0x3F800000
6763 	fbgt		t_operr			# cause an operr exception
6764 
6765 #--|X| = 1, ACOS(X) = 0 OR PI
6766 	tst.b		(%a0)			# is X positive or negative?
6767 	bpl.b		ACOSP1
6768 
6769 #--X = -1
6770 #Returns PI and inexact exception
6771 ACOSM1:
6772 	fmov.x		PI(%pc),%fp0		# load PI
6773 	fmov.l		%d0,%fpcr		# load round mode,prec
6774 	fadd.s		&0x00800000,%fp0	# add a small value
6775 	bra		t_pinx2
6776 
6777 ACOSP1:
6778 	bra		ld_pzero		# answer is positive zero
6779 
6780 	global		sacosd
6781 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6782 sacosd:
6783 	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
6784 	fmov.x		PIBY2(%pc),%fp0
6785 	bra		t_pinx2
6786 
6787 #########################################################################
6788 # setox():    computes the exponential for a normalized input		#
6789 # setoxd():   computes the exponential for a denormalized input		#
6790 # setoxm1():  computes the exponential minus 1 for a normalized input	#
6791 # setoxm1d(): computes the exponential minus 1 for a denormalized input	#
6792 #									#
6793 # INPUT	*************************************************************** #
6794 #	a0 = pointer to extended precision input			#
6795 #	d0 = round precision,mode					#
6796 #									#
6797 # OUTPUT ************************************************************** #
6798 #	fp0 = exp(X) or exp(X)-1					#
6799 #									#
6800 # ACCURACY and MONOTONICITY ******************************************* #
6801 #	The returned result is within 0.85 ulps in 64 significant bit,	#
6802 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6803 #	rounded to double precision. The result is provably monotonic	#
6804 #	in double precision.						#
6805 #									#
6806 # ALGORITHM and IMPLEMENTATION **************************************** #
6807 #									#
6808 #	setoxd								#
6809 #	------								#
6810 #	Step 1.	Set ans := 1.0						#
6811 #									#
6812 #	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
6813 #	Notes:	This will always generate one exception -- inexact.	#
6814 #									#
6815 #									#
6816 #	setox								#
6817 #	-----								#
6818 #									#
6819 #	Step 1.	Filter out extreme cases of input argument.		#
6820 #		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
6821 #		1.2	Go to Step 7.					#
6822 #		1.3	If |X| < 16380 log(2), go to Step 2.		#
6823 #		1.4	Go to Step 8.					#
6824 #	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6825 #		To avoid the use of floating-point comparisons, a	#
6826 #		compact representation of |X| is used. This format is a	#
6827 #		32-bit integer, the upper (more significant) 16 bits	#
6828 #		are the sign and biased exponent field of |X|; the	#
6829 #		lower 16 bits are the 16 most significant fraction	#
6830 #		(including the explicit bit) bits of |X|. Consequently,	#
6831 #		the comparisons in Steps 1.1 and 1.3 can be performed	#
6832 #		by integer comparison. Note also that the constant	#
6833 #		16380 log(2) used in Step 1.3 is also in the compact	#
6834 #		form. Thus taking the branch to Step 2 guarantees	#
6835 #		|X| < 16380 log(2). There is no harm to have a small	#
6836 #		number of cases where |X| is less than,	but close to,	#
6837 #		16380 log(2) and the branch to Step 9 is taken.		#
6838 #									#
6839 #	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6840 #		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6841 #			was taken)					#
6842 #		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
6843 #		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
6844 #			or 63.						#
6845 #		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
6846 #		2.5	Calculate the address of the stored value of	#
6847 #			2^(J/64).					#
6848 #		2.6	Create the value Scale = 2^M.			#
6849 #	Notes:	The calculation in 2.2 is really performed by		#
6850 #			Z := X * constant				#
6851 #			N := round-to-nearest-integer(Z)		#
6852 #		where							#
6853 #			constant := single-precision( 64/log 2 ).	#
6854 #									#
6855 #		Using a single-precision constant avoids memory		#
6856 #		access. Another effect of using a single-precision	#
6857 #		"constant" is that the calculated value Z is		#
6858 #									#
6859 #			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
6860 #									#
6861 #		This error has to be considered later in Steps 3 and 4.	#
6862 #									#
6863 #	Step 3.	Calculate X - N*log2/64.				#
6864 #		3.1	R := X + N*L1,					#
6865 #				where L1 := single-precision(-log2/64).	#
6866 #		3.2	R := R + N*L2,					#
6867 #				L2 := extended-precision(-log2/64 - L1).#
6868 #	Notes:	a) The way L1 and L2 are chosen ensures L1+L2		#
6869 #		approximate the value -log2/64 to 88 bits of accuracy.	#
6870 #		b) N*L1 is exact because N is no longer than 22 bits	#
6871 #		and L1 is no longer than 24 bits.			#
6872 #		c) The calculation X+N*L1 is also exact due to		#
6873 #		cancellation. Thus, R is practically X+N(L1+L2) to full	#
6874 #		64 bits.						#
6875 #		d) It is important to estimate how large can |R| be	#
6876 #		after Step 3.2.						#
6877 #									#
6878 #		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
6879 #		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
6880 #		X*64/log2 - N	=	f - eps*X 64/log2		#
6881 #		X - N*log2/64	=	f*log2/64 - eps*X		#
6882 #									#
6883 #									#
6884 #		Now |X| <= 16446 log2, thus				#
6885 #									#
6886 #			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
6887 #					<= 0.57 log2/64.		#
6888 #		 This bound will be used in Step 4.			#
6889 #									#
6890 #	Step 4.	Approximate exp(R)-1 by a polynomial			#
6891 #		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
6892 #	Notes:	a) In order to reduce memory access, the coefficients	#
6893 #		are made as "short" as possible: A1 (which is 1/2), A4	#
6894 #		and A5 are single precision; A2 and A3 are double	#
6895 #		precision.						#
6896 #		b) Even with the restrictions above,			#
6897 #		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
6898 #		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
6899 #		c) To fully utilize the pipeline, p is separated into	#
6900 #		two independent pieces of roughly equal complexities	#
6901 #			p = [ R + R*S*(A2 + S*A4) ]	+		#
6902 #				[ S*(A1 + S*(A3 + S*A5)) ]		#
6903 #		where S = R*R.						#
6904 #									#
6905 #	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
6906 #				ans := T + ( T*p + t)			#
6907 #		where T and t are the stored values for 2^(J/64).	#
6908 #	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
6909 #		2^(J/64) to roughly 85 bits; T is in extended precision	#
6910 #		and t is in single precision. Note also that T is	#
6911 #		rounded to 62 bits so that the last two bits of T are	#
6912 #		zero. The reason for such a special form is that T-1,	#
6913 #		T-2, and T-8 will all be exact --- a property that will	#
6914 #		give much more accurate computation of the function	#
6915 #		EXPM1.							#
6916 #									#
6917 #	Step 6.	Reconstruction of exp(X)				#
6918 #			exp(X) = 2^M * 2^(J/64) * exp(R).		#
6919 #		6.1	If AdjFlag = 0, go to 6.3			#
6920 #		6.2	ans := ans * AdjScale				#
6921 #		6.3	Restore the user FPCR				#
6922 #		6.4	Return ans := ans * Scale. Exit.		#
6923 #	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
6924 #		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
6925 #		neither overflow nor underflow. If AdjFlag = 1, that	#
6926 #		means that						#
6927 #			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
6928 #		Hence, exp(X) may overflow or underflow or neither.	#
6929 #		When that is the case, AdjScale = 2^(M1) where M1 is	#
6930 #		approximately M. Thus 6.2 will never cause		#
6931 #		over/underflow. Possible exception in 6.4 is overflow	#
6932 #		or underflow. The inexact exception is not generated in	#
6933 #		6.4. Although one can argue that the inexact flag	#
6934 #		should always be raised, to simulate that exception	#
6935 #		cost to much than the flag is worth in practical uses.	#
6936 #									#
6937 #	Step 7.	Return 1 + X.						#
6938 #		7.1	ans := X					#
6939 #		7.2	Restore user FPCR.				#
6940 #		7.3	Return ans := 1 + ans. Exit			#
6941 #	Notes:	For non-zero X, the inexact exception will always be	#
6942 #		raised by 7.3. That is the only exception raised by 7.3.#
6943 #		Note also that we use the FMOVEM instruction to move X	#
6944 #		in Step 7.1 to avoid unnecessary trapping. (Although	#
6945 #		the FMOVEM may not seem relevant since X is normalized,	#
6946 #		the precaution will be useful in the library version of	#
6947 #		this code where the separate entry for denormalized	#
6948 #		inputs will be done away with.)				#
6949 #									#
6950 #	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
6951 #		8.1	If |X| > 16480 log2, go to Step 9.		#
6952 #		(mimic 2.2 - 2.6)					#
6953 #		8.2	N := round-to-integer( X * 64/log2 )		#
6954 #		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
6955 #		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1,	#
6956 #			AdjFlag := 1.					#
6957 #		8.5	Calculate the address of the stored value	#
6958 #			2^(J/64).					#
6959 #		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
6960 #		8.7	Go to Step 3.					#
6961 #	Notes:	Refer to notes for 2.2 - 2.6.				#
6962 #									#
6963 #	Step 9.	Handle exp(X), |X| > 16480 log2.			#
6964 #		9.1	If X < 0, go to 9.3				#
6965 #		9.2	ans := Huge, go to 9.4				#
6966 #		9.3	ans := Tiny.					#
6967 #		9.4	Restore user FPCR.				#
6968 #		9.5	Return ans := ans * ans. Exit.			#
6969 #	Notes:	Exp(X) will surely overflow or underflow, depending on	#
6970 #		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
6971 #		extended-precision numbers whose square over/underflow	#
6972 #		with an inexact result. Thus, 9.5 always raises the	#
6973 #		inexact together with either overflow or underflow.	#
6974 #									#
6975 #	setoxm1d							#
6976 #	--------							#
6977 #									#
6978 #	Step 1.	Set ans := 0						#
6979 #									#
6980 #	Step 2.	Return	ans := X + ans. Exit.				#
6981 #	Notes:	This will return X with the appropriate rounding	#
6982 #		 precision prescribed by the user FPCR.			#
6983 #									#
6984 #	setoxm1								#
6985 #	-------								#
6986 #									#
6987 #	Step 1.	Check |X|						#
6988 #		1.1	If |X| >= 1/4, go to Step 1.3.			#
6989 #		1.2	Go to Step 7.					#
6990 #		1.3	If |X| < 70 log(2), go to Step 2.		#
6991 #		1.4	Go to Step 10.					#
6992 #	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6993 #		However, it is conceivable |X| can be small very often	#
6994 #		because EXPM1 is intended to evaluate exp(X)-1		#
6995 #		accurately when |X| is small. For further details on	#
6996 #		the comparisons, see the notes on Step 1 of setox.	#
6997 #									#
6998 #	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6999 #		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
7000 #		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
7001 #			or 63.						#
7002 #		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
7003 #		2.4	Calculate the address of the stored value of	#
7004 #			2^(J/64).					#
7005 #		2.5	Create the values Sc = 2^M and			#
7006 #			OnebySc := -2^(-M).				#
7007 #	Notes:	See the notes on Step 2 of setox.			#
7008 #									#
7009 #	Step 3.	Calculate X - N*log2/64.				#
7010 #		3.1	R := X + N*L1,					#
7011 #				where L1 := single-precision(-log2/64).	#
7012 #		3.2	R := R + N*L2,					#
7013 #				L2 := extended-precision(-log2/64 - L1).#
7014 #	Notes:	Applying the analysis of Step 3 of setox in this case	#
7015 #		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
7016 #		this case).						#
7017 #									#
7018 #	Step 4.	Approximate exp(R)-1 by a polynomial			#
7019 #			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
7020 #	Notes:	a) In order to reduce memory access, the coefficients	#
7021 #		are made as "short" as possible: A1 (which is 1/2), A5	#
7022 #		and A6 are single precision; A2, A3 and A4 are double	#
7023 #		precision.						#
7024 #		b) Even with the restriction above,			#
7025 #			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
7026 #		for all |R| <= 0.0055.					#
7027 #		c) To fully utilize the pipeline, p is separated into	#
7028 #		two independent pieces of roughly equal complexity	#
7029 #			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
7030 #				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
7031 #		where S = R*R.						#
7032 #									#
7033 #	Step 5.	Compute 2^(J/64)*p by					#
7034 #				p := T*p				#
7035 #		where T and t are the stored values for 2^(J/64).	#
7036 #	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
7037 #		2^(J/64) to roughly 85 bits; T is in extended precision	#
7038 #		and t is in single precision. Note also that T is	#
7039 #		rounded to 62 bits so that the last two bits of T are	#
7040 #		zero. The reason for such a special form is that T-1,	#
7041 #		T-2, and T-8 will all be exact --- a property that will	#
7042 #		be exploited in Step 6 below. The total relative error	#
7043 #		in p is no bigger than 2^(-67.7) compared to the final	#
7044 #		result.							#
7045 #									#
7046 #	Step 6.	Reconstruction of exp(X)-1				#
7047 #			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
7048 #		6.1	If M <= 63, go to Step 6.3.			#
7049 #		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
7050 #		6.3	If M >= -3, go to 6.5.				#
7051 #		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
7052 #		6.5	ans := (T + OnebySc) + (p + t).			#
7053 #		6.6	Restore user FPCR.				#
7054 #		6.7	Return ans := Sc * ans. Exit.			#
7055 #	Notes:	The various arrangements of the expressions give	#
7056 #		accurate evaluations.					#
7057 #									#
7058 #	Step 7.	exp(X)-1 for |X| < 1/4.					#
7059 #		7.1	If |X| >= 2^(-65), go to Step 9.		#
7060 #		7.2	Go to Step 8.					#
7061 #									#
7062 #	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
7063 #		8.1	If |X| < 2^(-16312), goto 8.3			#
7064 #		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
7065 #			Exit.						#
7066 #		8.3	X := X * 2^(140).				#
7067 #		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
7068 #		 Return ans := ans*2^(140). Exit			#
7069 #	Notes:	The idea is to return "X - tiny" under the user		#
7070 #		precision and rounding modes. To avoid unnecessary	#
7071 #		inefficiency, we stay away from denormalized numbers	#
7072 #		the best we can. For |X| >= 2^(-16312), the		#
7073 #		straightforward 8.2 generates the inexact exception as	#
7074 #		the case warrants.					#
7075 #									#
7076 #	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
7077 #			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
7078 #	Notes:	a) In order to reduce memory access, the coefficients	#
7079 #		are made as "short" as possible: B1 (which is 1/2), B9	#
7080 #		to B12 are single precision; B3 to B8 are double	#
7081 #		precision; and B2 is double extended.			#
7082 #		b) Even with the restriction above,			#
7083 #			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
7084 #		for all |X| <= 0.251.					#
7085 #		Note that 0.251 is slightly bigger than 1/4.		#
7086 #		c) To fully preserve accuracy, the polynomial is	#
7087 #		computed as						#
7088 #			X + ( S*B1 +	Q ) where S = X*X and		#
7089 #			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
7090 #		d) To fully utilize the pipeline, Q is separated into	#
7091 #		two independent pieces of roughly equal complexity	#
7092 #			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
7093 #				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
7094 #									#
7095 #	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
7096 #		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all	#
7097 #		practical purposes. Therefore, go to Step 1 of setox.	#
7098 #		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
7099 #		purposes.						#
7100 #		ans := -1						#
7101 #		Restore user FPCR					#
7102 #		Return ans := ans + 2^(-126). Exit.			#
7103 #	Notes:	10.2 will always create an inexact and return -1 + tiny	#
7104 #		in the user rounding precision and mode.		#
7105 #									#
7106 #########################################################################
7107 
7108 L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7109 
7110 EEXPA3:	long		0x3FA55555,0x55554CC1
7111 EEXPA2:	long		0x3FC55555,0x55554A54
7112 
7113 EM1A4:	long		0x3F811111,0x11174385
7114 EM1A3:	long		0x3FA55555,0x55554F5A
7115 
7116 EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
7117 
7118 EM1B8:	long		0x3EC71DE3,0xA5774682
7119 EM1B7:	long		0x3EFA01A0,0x19D7CB68
7120 
7121 EM1B6:	long		0x3F2A01A0,0x1A019DF3
7122 EM1B5:	long		0x3F56C16C,0x16C170E2
7123 
7124 EM1B4:	long		0x3F811111,0x11111111
7125 EM1B3:	long		0x3FA55555,0x55555555
7126 
7127 EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7128 	long		0x00000000
7129 
7130 TWO140:	long		0x48B00000,0x00000000
7131 TWON140:
7132 	long		0x37300000,0x00000000
7133 
7134 EEXPTBL:
7135 	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
7136 	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7137 	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7138 	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7139 	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7140 	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7141 	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7142 	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7143 	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7144 	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7145 	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7146 	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7147 	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7148 	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7149 	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7150 	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7151 	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7152 	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7153 	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7154 	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7155 	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7156 	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7157 	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7158 	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7159 	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7160 	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7161 	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7162 	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7163 	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7164 	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7165 	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7166 	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7167 	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7168 	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7169 	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7170 	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7171 	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7172 	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7173 	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7174 	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7175 	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7176 	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7177 	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7178 	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7179 	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7180 	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7181 	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7182 	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7183 	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7184 	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7185 	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7186 	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7187 	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7188 	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7189 	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7190 	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7191 	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7192 	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7193 	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7194 	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7195 	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7196 	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7197 	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7198 	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7199 
7200 	set		ADJFLAG,L_SCR2
7201 	set		SCALE,FP_SCR0
7202 	set		ADJSCALE,FP_SCR1
7203 	set		SC,FP_SCR0
7204 	set		ONEBYSC,FP_SCR1
7205 
7206 	global		setox
7207 setox:
7208 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7209 
7210 #--Step 1.
7211 	mov.l		(%a0),%d1		# load part of input X
7212 	and.l		&0x7FFF0000,%d1		# biased expo. of X
7213 	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7214 	bge.b		EXPC1			# normal case
7215 	bra		EXPSM
7216 
7217 EXPC1:
7218 #--The case |X| >= 2^(-65)
7219 	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7220 	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
7221 	blt.b		EXPMAIN			# normal case
7222 	bra		EEXPBIG
7223 
7224 EXPMAIN:
7225 #--Step 2.
7226 #--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
7227 	fmov.x		(%a0),%fp0		# load input from (a0)
7228 
7229 	fmov.x		%fp0,%fp1
7230 	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7231 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7232 	mov.l		&0,ADJFLAG(%a6)
7233 	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7234 	lea		EEXPTBL(%pc),%a1
7235 	fmov.l		%d1,%fp0		# convert to floating-format
7236 
7237 	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7238 	and.l		&0x3F,%d1		# D0 is J = N mod 64
7239 	lsl.l		&4,%d1
7240 	add.l		%d1,%a1			# address of 2^(J/64)
7241 	mov.l		L_SCR1(%a6),%d1
7242 	asr.l		&6,%d1			# D0 is M
7243 	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7244 	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
7245 
7246 EXPCONT1:
7247 #--Step 3.
7248 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7249 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7250 	fmov.x		%fp0,%fp2
7251 	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7252 	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7253 	fadd.x		%fp1,%fp0		# X + N*L1
7254 	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7255 
7256 #--Step 4.
7257 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7258 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7259 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7260 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7261 
7262 	fmov.x		%fp0,%fp1
7263 	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7264 
7265 	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
7266 
7267 	fmul.x		%fp1,%fp2		# fp2 IS S*A5
7268 	fmov.x		%fp1,%fp3
7269 	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
7270 
7271 	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
7272 	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
7273 
7274 	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
7275 	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
7276 	mov.l		&0x80000000,SCALE+4(%a6)
7277 	clr.l		SCALE+8(%a6)
7278 
7279 	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
7280 
7281 	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
7282 	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
7283 
7284 	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
7285 	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
7286 
7287 	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
7288 	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
7289 
7290 #--Step 5
7291 #--final reconstruction process
7292 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7293 
7294 	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
7295 	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7296 	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
7297 
7298 	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
7299 	mov.l		ADJFLAG(%a6),%d1
7300 
7301 #--Step 6
7302 	tst.l		%d1
7303 	beq.b		NORMAL
7304 ADJUST:
7305 	fmul.x		ADJSCALE(%a6),%fp0
7306 NORMAL:
7307 	fmov.l		%d0,%fpcr		# restore user FPCR
7308 	mov.b		&FMUL_OP,%d1		# last inst is MUL
7309 	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
7310 	bra		t_catch
7311 
7312 EXPSM:
7313 #--Step 7
7314 	fmovm.x		(%a0),&0x80		# load X
7315 	fmov.l		%d0,%fpcr
7316 	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
7317 	bra		t_pinx2
7318 
7319 EEXPBIG:
7320 #--Step 8
7321 	cmp.l		%d1,&0x400CB27C		# 16480 log2
7322 	bgt.b		EXP2BIG
7323 #--Steps 8.2 -- 8.6
7324 	fmov.x		(%a0),%fp0		# load input from (a0)
7325 
7326 	fmov.x		%fp0,%fp1
7327 	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7328 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7329 	mov.l		&1,ADJFLAG(%a6)
7330 	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7331 	lea		EEXPTBL(%pc),%a1
7332 	fmov.l		%d1,%fp0		# convert to floating-format
7333 	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7334 	and.l		&0x3F,%d1		# D0 is J = N mod 64
7335 	lsl.l		&4,%d1
7336 	add.l		%d1,%a1			# address of 2^(J/64)
7337 	mov.l		L_SCR1(%a6),%d1
7338 	asr.l		&6,%d1			# D0 is K
7339 	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
7340 	asr.l		&1,%d1			# D0 is M1
7341 	sub.l		%d1,L_SCR1(%a6)		# a1 is M
7342 	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
7343 	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
7344 	mov.l		&0x80000000,ADJSCALE+4(%a6)
7345 	clr.l		ADJSCALE+8(%a6)
7346 	mov.l		L_SCR1(%a6),%d1		# D0 is M
7347 	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7348 	bra.w		EXPCONT1		# go back to Step 3
7349 
7350 EXP2BIG:
7351 #--Step 9
7352 	tst.b		(%a0)			# is X positive or negative?
7353 	bmi		t_unfl2
7354 	bra		t_ovfl2
7355 
7356 	global		setoxd
7357 setoxd:
7358 #--entry point for EXP(X), X is denormalized
7359 	mov.l		(%a0),-(%sp)
7360 	andi.l		&0x80000000,(%sp)
7361 	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
7362 
7363 	fmov.s		&0x3F800000,%fp0
7364 
7365 	fmov.l		%d0,%fpcr
7366 	fadd.s		(%sp)+,%fp0
7367 	bra		t_pinx2
7368 
7369 	global		setoxm1
7370 setoxm1:
7371 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7372 
7373 #--Step 1.
7374 #--Step 1.1
7375 	mov.l		(%a0),%d1		# load part of input X
7376 	and.l		&0x7FFF0000,%d1		# biased expo. of X
7377 	cmp.l		%d1,&0x3FFD0000		# 1/4
7378 	bge.b		EM1CON1			# |X| >= 1/4
7379 	bra		EM1SM
7380 
7381 EM1CON1:
7382 #--Step 1.3
7383 #--The case |X| >= 1/4
7384 	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7385 	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
7386 	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
7387 	bra		EM1BIG
7388 
7389 EM1MAIN:
7390 #--Step 2.
7391 #--This is the case:	1/4 <= |X| <= 70 log2.
7392 	fmov.x		(%a0),%fp0		# load input from (a0)
7393 
7394 	fmov.x		%fp0,%fp1
7395 	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7396 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7397 	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7398 	lea		EEXPTBL(%pc),%a1
7399 	fmov.l		%d1,%fp0		# convert to floating-format
7400 
7401 	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7402 	and.l		&0x3F,%d1		# D0 is J = N mod 64
7403 	lsl.l		&4,%d1
7404 	add.l		%d1,%a1			# address of 2^(J/64)
7405 	mov.l		L_SCR1(%a6),%d1
7406 	asr.l		&6,%d1			# D0 is M
7407 	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
7408 
7409 #--Step 3.
7410 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7411 #--a0 points to 2^(J/64), D0 and a1 both contain M
7412 	fmov.x		%fp0,%fp2
7413 	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7414 	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7415 	fadd.x		%fp1,%fp0		# X + N*L1
7416 	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7417 	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
7418 
7419 #--Step 4.
7420 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7421 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7422 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7423 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7424 
7425 	fmov.x		%fp0,%fp1
7426 	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7427 
7428 	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
7429 
7430 	fmul.x		%fp1,%fp2		# fp2 IS S*A6
7431 	fmov.x		%fp1,%fp3
7432 	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
7433 
7434 	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
7435 	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
7436 	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
7437 	mov.l		&0x80000000,SC+4(%a6)
7438 	clr.l		SC+8(%a6)
7439 
7440 	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
7441 	mov.l		L_SCR1(%a6),%d1		# D0 is	M
7442 	neg.w		%d1			# D0 is -M
7443 	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
7444 	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
7445 	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
7446 	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
7447 
7448 	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
7449 	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
7450 	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
7451 	mov.l		&0x80000000,ONEBYSC+4(%a6)
7452 	clr.l		ONEBYSC+8(%a6)
7453 	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
7454 
7455 	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
7456 	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
7457 
7458 	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
7459 
7460 	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7461 
7462 #--Step 5
7463 #--Compute 2^(J/64)*p
7464 
7465 	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
7466 
7467 #--Step 6
7468 #--Step 6.1
7469 	mov.l		L_SCR1(%a6),%d1		# retrieve M
7470 	cmp.l		%d1,&63
7471 	ble.b		MLE63
7472 #--Step 6.2	M >= 64
7473 	fmov.s		12(%a1),%fp1		# fp1 is t
7474 	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
7475 	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
7476 	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
7477 	bra		EM1SCALE
7478 MLE63:
7479 #--Step 6.3	M <= 63
7480 	cmp.l		%d1,&-3
7481 	bge.b		MGEN3
7482 MLTN3:
7483 #--Step 6.4	M <= -4
7484 	fadd.s		12(%a1),%fp0		# p+t
7485 	fadd.x		(%a1),%fp0		# T+(p+t)
7486 	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
7487 	bra		EM1SCALE
7488 MGEN3:
7489 #--Step 6.5	-3 <= M <= 63
7490 	fmov.x		(%a1)+,%fp1		# fp1 is T
7491 	fadd.s		(%a1),%fp0		# fp0 is p+t
7492 	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
7493 	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
7494 
7495 EM1SCALE:
7496 #--Step 6.6
7497 	fmov.l		%d0,%fpcr
7498 	fmul.x		SC(%a6),%fp0
7499 	bra		t_inx2
7500 
7501 EM1SM:
7502 #--Step 7	|X| < 1/4.
7503 	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7504 	bge.b		EM1POLY
7505 
7506 EM1TINY:
7507 #--Step 8	|X| < 2^(-65)
7508 	cmp.l		%d1,&0x00330000		# 2^(-16312)
7509 	blt.b		EM12TINY
7510 #--Step 8.2
7511 	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
7512 	mov.l		&0x80000000,SC+4(%a6)
7513 	clr.l		SC+8(%a6)
7514 	fmov.x		(%a0),%fp0
7515 	fmov.l		%d0,%fpcr
7516 	mov.b		&FADD_OP,%d1		# last inst is ADD
7517 	fadd.x		SC(%a6),%fp0
7518 	bra		t_catch
7519 
7520 EM12TINY:
7521 #--Step 8.3
7522 	fmov.x		(%a0),%fp0
7523 	fmul.d		TWO140(%pc),%fp0
7524 	mov.l		&0x80010000,SC(%a6)
7525 	mov.l		&0x80000000,SC+4(%a6)
7526 	clr.l		SC+8(%a6)
7527 	fadd.x		SC(%a6),%fp0
7528 	fmov.l		%d0,%fpcr
7529 	mov.b		&FMUL_OP,%d1		# last inst is MUL
7530 	fmul.d		TWON140(%pc),%fp0
7531 	bra		t_catch
7532 
7533 EM1POLY:
7534 #--Step 9	exp(X)-1 by a simple polynomial
7535 	fmov.x		(%a0),%fp0		# fp0 is X
7536 	fmul.x		%fp0,%fp0		# fp0 is S := X*X
7537 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7538 	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
7539 	fmul.x		%fp0,%fp1		# fp1 is S*B12
7540 	fmov.s		&0x310F8290,%fp2	# fp2 is B11
7541 	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
7542 
7543 	fmul.x		%fp0,%fp2		# fp2 is S*B11
7544 	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
7545 
7546 	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
7547 	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
7548 
7549 	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
7550 	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
7551 
7552 	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
7553 	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
7554 
7555 	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
7556 	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
7557 
7558 	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
7559 	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
7560 
7561 	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
7562 	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
7563 
7564 	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
7565 	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
7566 
7567 	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
7568 	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
7569 
7570 	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
7571 	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
7572 
7573 	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
7574 	fadd.x		%fp2,%fp1		# fp1 is Q
7575 
7576 	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7577 
7578 	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
7579 
7580 	fmov.l		%d0,%fpcr
7581 	fadd.x		(%a0),%fp0
7582 	bra		t_inx2
7583 
7584 EM1BIG:
7585 #--Step 10	|X| > 70 log2
7586 	mov.l		(%a0),%d1
7587 	cmp.l		%d1,&0
7588 	bgt.w		EXPC1
7589 #--Step 10.2
7590 	fmov.s		&0xBF800000,%fp0	# fp0 is -1
7591 	fmov.l		%d0,%fpcr
7592 	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
7593 	bra		t_minx2
7594 
7595 	global		setoxm1d
7596 setoxm1d:
7597 #--entry point for EXPM1(X), here X is denormalized
7598 #--Step 0.
7599 	bra		t_extdnrm
7600 
7601 #########################################################################
7602 # sgetexp():  returns the exponent portion of the input argument.	#
7603 #	      The exponent bias is removed and the exponent value is	#
7604 #	      returned as an extended precision number in fp0.		#
7605 # sgetexpd(): handles denormalized numbers.				#
7606 #									#
7607 # sgetman():  extracts the mantissa of the input argument. The		#
7608 #	      mantissa is converted to an extended precision number w/	#
7609 #	      an exponent of $3fff and is returned in fp0. The range of #
7610 #	      the result is [1.0 - 2.0).				#
7611 # sgetmand(): handles denormalized numbers.				#
7612 #									#
7613 # INPUT *************************************************************** #
7614 #	a0  = pointer to extended precision input			#
7615 #									#
7616 # OUTPUT ************************************************************** #
7617 #	fp0 = exponent(X) or mantissa(X)				#
7618 #									#
7619 #########################################################################
7620 
7621 	global		sgetexp
7622 sgetexp:
7623 	mov.w		SRC_EX(%a0),%d0		# get the exponent
7624 	bclr		&0xf,%d0		# clear the sign bit
7625 	subi.w		&0x3fff,%d0		# subtract off the bias
7626 	fmov.w		%d0,%fp0		# return exp in fp0
7627 	blt.b		sgetexpn		# it's negative
7628 	rts
7629 
7630 sgetexpn:
7631 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7632 	rts
7633 
7634 	global		sgetexpd
7635 sgetexpd:
7636 	bsr.l		norm			# normalize
7637 	neg.w		%d0			# new exp = -(shft amt)
7638 	subi.w		&0x3fff,%d0		# subtract off the bias
7639 	fmov.w		%d0,%fp0		# return exp in fp0
7640 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7641 	rts
7642 
7643 	global		sgetman
7644 sgetman:
7645 	mov.w		SRC_EX(%a0),%d0		# get the exp
7646 	ori.w		&0x7fff,%d0		# clear old exp
7647 	bclr		&0xe,%d0		# make it the new exp +-3fff
7648 
7649 # here, we build the result in a tmp location so as not to disturb the input
7650 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7651 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7652 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7653 	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
7654 	bmi.b		sgetmann		# it's negative
7655 	rts
7656 
7657 sgetmann:
7658 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7659 	rts
7660 
7661 #
7662 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7663 # then load the exponent with +/1 $3fff.
7664 #
7665 	global		sgetmand
7666 sgetmand:
7667 	bsr.l		norm			# normalize exponent
7668 	bra.b		sgetman
7669 
7670 #########################################################################
7671 # scosh():  computes the hyperbolic cosine of a normalized input	#
7672 # scoshd(): computes the hyperbolic cosine of a denormalized input	#
7673 #									#
7674 # INPUT ***************************************************************	#
7675 #	a0 = pointer to extended precision input			#
7676 #	d0 = round precision,mode					#
7677 #									#
7678 # OUTPUT **************************************************************	#
7679 #	fp0 = cosh(X)							#
7680 #									#
7681 # ACCURACY and MONOTONICITY *******************************************	#
7682 #	The returned result is within 3 ulps in 64 significant bit,	#
7683 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
7684 #	rounded to double precision. The result is provably monotonic	#
7685 #	in double precision.						#
7686 #									#
7687 # ALGORITHM ***********************************************************	#
7688 #									#
7689 #	COSH								#
7690 #	1. If |X| > 16380 log2, go to 3.				#
7691 #									#
7692 #	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
7693 #		y = |X|, z = exp(Y), and				#
7694 #		cosh(X) = (1/2)*( z + 1/z ).				#
7695 #		Exit.							#
7696 #									#
7697 #	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
7698 #									#
7699 #	4. (16380 log2 < |X| <= 16480 log2)				#
7700 #		cosh(X) = sign(X) * exp(|X|)/2.				#
7701 #		However, invoking exp(|X|) may cause premature		#
7702 #		overflow. Thus, we calculate sinh(X) as follows:	#
7703 #		Y	:= |X|						#
7704 #		Fact	:=	2**(16380)				#
7705 #		Y'	:= Y - 16381 log2				#
7706 #		cosh(X) := Fact * exp(Y').				#
7707 #		Exit.							#
7708 #									#
7709 #	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7710 #		Huge*Huge to generate overflow and an infinity with	#
7711 #		the appropriate sign. Huge is the largest finite number	#
7712 #		in extended format. Exit.				#
7713 #									#
7714 #########################################################################
7715 
7716 TWO16380:
7717 	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
7718 
7719 	global		scosh
7720 scosh:
7721 	fmov.x		(%a0),%fp0		# LOAD INPUT
7722 
7723 	mov.l		(%a0),%d1
7724 	mov.w		4(%a0),%d1
7725 	and.l		&0x7FFFFFFF,%d1
7726 	cmp.l		%d1,&0x400CB167
7727 	bgt.b		COSHBIG
7728 
7729 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7730 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7731 
7732 	fabs.x		%fp0			# |X|
7733 
7734 	mov.l		%d0,-(%sp)
7735 	clr.l		%d0
7736 	fmovm.x		&0x01,-(%sp)		# save |X| to stack
7737 	lea		(%sp),%a0		# pass ptr to |X|
7738 	bsr		setox			# FP0 IS EXP(|X|)
7739 	add.l		&0xc,%sp		# erase |X| from stack
7740 	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
7741 	mov.l		(%sp)+,%d0
7742 
7743 	fmov.s		&0x3E800000,%fp1	# (1/4)
7744 	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
7745 
7746 	fmov.l		%d0,%fpcr
7747 	mov.b		&FADD_OP,%d1		# last inst is ADD
7748 	fadd.x		%fp1,%fp0
7749 	bra		t_catch
7750 
7751 COSHBIG:
7752 	cmp.l		%d1,&0x400CB2B3
7753 	bgt.b		COSHHUGE
7754 
7755 	fabs.x		%fp0
7756 	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7757 	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7758 
7759 	mov.l		%d0,-(%sp)
7760 	clr.l		%d0
7761 	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
7762 	lea		(%sp),%a0		# pass ptr to fp0
7763 	bsr		setox
7764 	add.l		&0xc,%sp		# clear fp0 from stack
7765 	mov.l		(%sp)+,%d0
7766 
7767 	fmov.l		%d0,%fpcr
7768 	mov.b		&FMUL_OP,%d1		# last inst is MUL
7769 	fmul.x		TWO16380(%pc),%fp0
7770 	bra		t_catch
7771 
7772 COSHHUGE:
7773 	bra		t_ovfl2
7774 
7775 	global		scoshd
7776 #--COSH(X) = 1 FOR DENORMALIZED X
7777 scoshd:
7778 	fmov.s		&0x3F800000,%fp0
7779 
7780 	fmov.l		%d0,%fpcr
7781 	fadd.s		&0x00800000,%fp0
7782 	bra		t_pinx2
7783 
7784 #########################################################################
7785 # ssinh():  computes the hyperbolic sine of a normalized input		#
7786 # ssinhd(): computes the hyperbolic sine of a denormalized input	#
7787 #									#
7788 # INPUT *************************************************************** #
7789 #	a0 = pointer to extended precision input			#
7790 #	d0 = round precision,mode					#
7791 #									#
7792 # OUTPUT ************************************************************** #
7793 #	fp0 = sinh(X)							#
7794 #									#
7795 # ACCURACY and MONOTONICITY *******************************************	#
7796 #	The returned result is within 3 ulps in 64 significant bit,	#
7797 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7798 #	rounded to double precision. The result is provably monotonic	#
7799 #	in double precision.						#
7800 #									#
7801 # ALGORITHM *********************************************************** #
7802 #									#
7803 #       SINH								#
7804 #       1. If |X| > 16380 log2, go to 3.				#
7805 #									#
7806 #       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
7807 #               y = |X|, sgn = sign(X), and z = expm1(Y),		#
7808 #               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
7809 #          Exit.							#
7810 #									#
7811 #       3. If |X| > 16480 log2, go to 5.				#
7812 #									#
7813 #       4. (16380 log2 < |X| <= 16480 log2)				#
7814 #               sinh(X) = sign(X) * exp(|X|)/2.				#
7815 #          However, invoking exp(|X|) may cause premature overflow.	#
7816 #          Thus, we calculate sinh(X) as follows:			#
7817 #             Y       := |X|						#
7818 #             sgn     := sign(X)					#
7819 #             sgnFact := sgn * 2**(16380)				#
7820 #             Y'      := Y - 16381 log2					#
7821 #             sinh(X) := sgnFact * exp(Y').				#
7822 #          Exit.							#
7823 #									#
7824 #       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7825 #          sign(X)*Huge*Huge to generate overflow and an infinity with	#
7826 #          the appropriate sign. Huge is the largest finite number in	#
7827 #          extended format. Exit.					#
7828 #									#
7829 #########################################################################
7830 
7831 	global		ssinh
7832 ssinh:
7833 	fmov.x		(%a0),%fp0		# LOAD INPUT
7834 
7835 	mov.l		(%a0),%d1
7836 	mov.w		4(%a0),%d1
7837 	mov.l		%d1,%a1			# save (compacted) operand
7838 	and.l		&0x7FFFFFFF,%d1
7839 	cmp.l		%d1,&0x400CB167
7840 	bgt.b		SINHBIG
7841 
7842 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7843 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7844 
7845 	fabs.x		%fp0			# Y = |X|
7846 
7847 	movm.l		&0x8040,-(%sp)		# {a1/d0}
7848 	fmovm.x		&0x01,-(%sp)		# save Y on stack
7849 	lea		(%sp),%a0		# pass ptr to Y
7850 	clr.l		%d0
7851 	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7852 	add.l		&0xc,%sp		# clear Y from stack
7853 	fmov.l		&0,%fpcr
7854 	movm.l		(%sp)+,&0x0201		# {a1/d0}
7855 
7856 	fmov.x		%fp0,%fp1
7857 	fadd.s		&0x3F800000,%fp1	# 1+Z
7858 	fmov.x		%fp0,-(%sp)
7859 	fdiv.x		%fp1,%fp0		# Z/(1+Z)
7860 	mov.l		%a1,%d1
7861 	and.l		&0x80000000,%d1
7862 	or.l		&0x3F000000,%d1
7863 	fadd.x		(%sp)+,%fp0
7864 	mov.l		%d1,-(%sp)
7865 
7866 	fmov.l		%d0,%fpcr
7867 	mov.b		&FMUL_OP,%d1		# last inst is MUL
7868 	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
7869 	bra		t_catch
7870 
7871 SINHBIG:
7872 	cmp.l		%d1,&0x400CB2B3
7873 	bgt		t_ovfl
7874 	fabs.x		%fp0
7875 	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7876 	mov.l		&0,-(%sp)
7877 	mov.l		&0x80000000,-(%sp)
7878 	mov.l		%a1,%d1
7879 	and.l		&0x80000000,%d1
7880 	or.l		&0x7FFB0000,%d1
7881 	mov.l		%d1,-(%sp)		# EXTENDED FMT
7882 	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7883 
7884 	mov.l		%d0,-(%sp)
7885 	clr.l		%d0
7886 	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
7887 	lea		(%sp),%a0		# pass ptr to fp0
7888 	bsr		setox
7889 	add.l		&0xc,%sp		# clear fp0 from stack
7890 
7891 	mov.l		(%sp)+,%d0
7892 	fmov.l		%d0,%fpcr
7893 	mov.b		&FMUL_OP,%d1		# last inst is MUL
7894 	fmul.x		(%sp)+,%fp0		# possible exception
7895 	bra		t_catch
7896 
7897 	global		ssinhd
7898 #--SINH(X) = X FOR DENORMALIZED X
7899 ssinhd:
7900 	bra		t_extdnrm
7901 
7902 #########################################################################
7903 # stanh():  computes the hyperbolic tangent of a normalized input	#
7904 # stanhd(): computes the hyperbolic tangent of a denormalized input	#
7905 #									#
7906 # INPUT ***************************************************************	#
7907 #	a0 = pointer to extended precision input			#
7908 #	d0 = round precision,mode					#
7909 #									#
7910 # OUTPUT **************************************************************	#
7911 #	fp0 = tanh(X)							#
7912 #									#
7913 # ACCURACY and MONOTONICITY *******************************************	#
7914 #	The returned result is within 3 ulps in 64 significant bit,	#
7915 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7916 #	rounded to double precision. The result is provably monotonic	#
7917 #	in double precision.						#
7918 #									#
7919 # ALGORITHM ***********************************************************	#
7920 #									#
7921 #	TANH								#
7922 #	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
7923 #									#
7924 #	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
7925 #		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
7926 #		tanh(X) = sgn*( z/(2+z) ).				#
7927 #		Exit.							#
7928 #									#
7929 #	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
7930 #		go to 7.						#
7931 #									#
7932 #	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
7933 #									#
7934 #	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
7935 #		sgn := sign(X), y := 2|X|, z := exp(Y),			#
7936 #		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
7937 #		Exit.							#
7938 #									#
7939 #	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
7940 #		calculate Tanh(X) by					#
7941 #		sgn := sign(X), Tiny := 2**(-126),			#
7942 #		tanh(X) := sgn - sgn*Tiny.				#
7943 #		Exit.							#
7944 #									#
7945 #	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
7946 #									#
7947 #########################################################################
7948 
7949 	set		X,FP_SCR0
7950 	set		XFRAC,X+4
7951 
7952 	set		SGN,L_SCR3
7953 
7954 	set		V,FP_SCR0
7955 
7956 	global		stanh
7957 stanh:
7958 	fmov.x		(%a0),%fp0		# LOAD INPUT
7959 
7960 	fmov.x		%fp0,X(%a6)
7961 	mov.l		(%a0),%d1
7962 	mov.w		4(%a0),%d1
7963 	mov.l		%d1,X(%a6)
7964 	and.l		&0x7FFFFFFF,%d1
7965 	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
7966 	blt.w		TANHBORS		# yes
7967 	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
7968 	bgt.w		TANHBORS		# yes
7969 
7970 #--THIS IS THE USUAL CASE
7971 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7972 
7973 	mov.l		X(%a6),%d1
7974 	mov.l		%d1,SGN(%a6)
7975 	and.l		&0x7FFF0000,%d1
7976 	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
7977 	mov.l		%d1,X(%a6)
7978 	and.l		&0x80000000,SGN(%a6)
7979 	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
7980 
7981 	mov.l		%d0,-(%sp)
7982 	clr.l		%d0
7983 	fmovm.x		&0x1,-(%sp)		# save Y on stack
7984 	lea		(%sp),%a0		# pass ptr to Y
7985 	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7986 	add.l		&0xc,%sp		# clear Y from stack
7987 	mov.l		(%sp)+,%d0
7988 
7989 	fmov.x		%fp0,%fp1
7990 	fadd.s		&0x40000000,%fp1	# Z+2
7991 	mov.l		SGN(%a6),%d1
7992 	fmov.x		%fp1,V(%a6)
7993 	eor.l		%d1,V(%a6)
7994 
7995 	fmov.l		%d0,%fpcr		# restore users round prec,mode
7996 	fdiv.x		V(%a6),%fp0
7997 	bra		t_inx2
7998 
7999 TANHBORS:
8000 	cmp.l		%d1,&0x3FFF8000
8001 	blt.w		TANHSM
8002 
8003 	cmp.l		%d1,&0x40048AA1
8004 	bgt.w		TANHHUGE
8005 
8006 #-- (5/2) LOG2 < |X| < 50 LOG2,
8007 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8008 #--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
8009 
8010 	mov.l		X(%a6),%d1
8011 	mov.l		%d1,SGN(%a6)
8012 	and.l		&0x7FFF0000,%d1
8013 	add.l		&0x00010000,%d1		# EXPO OF 2|X|
8014 	mov.l		%d1,X(%a6)		# Y = 2|X|
8015 	and.l		&0x80000000,SGN(%a6)
8016 	mov.l		SGN(%a6),%d1
8017 	fmov.x		X(%a6),%fp0		# Y = 2|X|
8018 
8019 	mov.l		%d0,-(%sp)
8020 	clr.l		%d0
8021 	fmovm.x		&0x01,-(%sp)		# save Y on stack
8022 	lea		(%sp),%a0		# pass ptr to Y
8023 	bsr		setox			# FP0 IS EXP(Y)
8024 	add.l		&0xc,%sp		# clear Y from stack
8025 	mov.l		(%sp)+,%d0
8026 	mov.l		SGN(%a6),%d1
8027 	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
8028 
8029 	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
8030 	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
8031 	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
8032 
8033 	mov.l		SGN(%a6),%d1
8034 	or.l		&0x3F800000,%d1		# SGN
8035 	fmov.s		%d1,%fp0		# SGN IN SGL FMT
8036 
8037 	fmov.l		%d0,%fpcr		# restore users round prec,mode
8038 	mov.b		&FADD_OP,%d1		# last inst is ADD
8039 	fadd.x		%fp1,%fp0
8040 	bra		t_inx2
8041 
8042 TANHSM:
8043 	fmov.l		%d0,%fpcr		# restore users round prec,mode
8044 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8045 	fmov.x		X(%a6),%fp0		# last inst - possible exception set
8046 	bra		t_catch
8047 
8048 #---RETURN SGN(X) - SGN(X)EPS
8049 TANHHUGE:
8050 	mov.l		X(%a6),%d1
8051 	and.l		&0x80000000,%d1
8052 	or.l		&0x3F800000,%d1
8053 	fmov.s		%d1,%fp0
8054 	and.l		&0x80000000,%d1
8055 	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
8056 
8057 	fmov.l		%d0,%fpcr		# restore users round prec,mode
8058 	fadd.s		%d1,%fp0
8059 	bra		t_inx2
8060 
8061 	global		stanhd
8062 #--TANH(X) = X FOR DENORMALIZED X
8063 stanhd:
8064 	bra		t_extdnrm
8065 
8066 #########################################################################
8067 # slogn():    computes the natural logarithm of a normalized input	#
8068 # slognd():   computes the natural logarithm of a denormalized input	#
8069 # slognp1():  computes the log(1+X) of a normalized input		#
8070 # slognp1d(): computes the log(1+X) of a denormalized input		#
8071 #									#
8072 # INPUT ***************************************************************	#
8073 #	a0 = pointer to extended precision input			#
8074 #	d0 = round precision,mode					#
8075 #									#
8076 # OUTPUT **************************************************************	#
8077 #	fp0 = log(X) or log(1+X)					#
8078 #									#
8079 # ACCURACY and MONOTONICITY *******************************************	#
8080 #	The returned result is within 2 ulps in 64 significant bit,	#
8081 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8082 #	rounded to double precision. The result is provably monotonic	#
8083 #	in double precision.						#
8084 #									#
8085 # ALGORITHM ***********************************************************	#
8086 #	LOGN:								#
8087 #	Step 1. If |X-1| < 1/16, approximate log(X) by an odd		#
8088 #		polynomial in u, where u = 2(X-1)/(X+1). Otherwise,	#
8089 #		move on to Step 2.					#
8090 #									#
8091 #	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
8092 #		seven significant bits of Y plus 2**(-7), i.e.		#
8093 #		F = 1.xxxxxx1 in base 2 where the six "x" match those	#
8094 #		of Y. Note that |Y-F| <= 2**(-7).			#
8095 #									#
8096 #	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a		#
8097 #		polynomial in u, log(1+u) = poly.			#
8098 #									#
8099 #	Step 4. Reconstruct						#
8100 #		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
8101 #		by k*log(2) + (log(F) + poly). The values of log(F) are	#
8102 #		calculated beforehand and stored in the program.	#
8103 #									#
8104 #	lognp1:								#
8105 #	Step 1: If |X| < 1/16, approximate log(1+X) by an odd		#
8106 #		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
8107 #		to Step 2.						#
8108 #									#
8109 #	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
8110 #		in Step 2 of the algorithm for LOGN and compute		#
8111 #		log(1+X) as k*log(2) + log(F) + poly where poly		#
8112 #		approximates log(1+u), u = (Y-F)/F.			#
8113 #									#
8114 #	Implementation Notes:						#
8115 #	Note 1. There are 64 different possible values for F, thus 64	#
8116 #		log(F)'s need to be tabulated. Moreover, the values of	#
8117 #		1/F are also tabulated so that the division in (Y-F)/F	#
8118 #		can be performed by a multiplication.			#
8119 #									#
8120 #	Note 2. In Step 2 of lognp1, in order to preserved accuracy,	#
8121 #		the value Y-F has to be calculated carefully when	#
8122 #		1/2 <= X < 3/2.						#
8123 #									#
8124 #	Note 3. To fully exploit the pipeline, polynomials are usually	#
8125 #		separated into two parts evaluated independently before	#
8126 #		being added up.						#
8127 #									#
8128 #########################################################################
8129 LOGOF2:
8130 	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8131 
8132 one:
8133 	long		0x3F800000
8134 zero:
8135 	long		0x00000000
8136 infty:
8137 	long		0x7F800000
8138 negone:
8139 	long		0xBF800000
8140 
8141 LOGA6:
8142 	long		0x3FC2499A,0xB5E4040B
8143 LOGA5:
8144 	long		0xBFC555B5,0x848CB7DB
8145 
8146 LOGA4:
8147 	long		0x3FC99999,0x987D8730
8148 LOGA3:
8149 	long		0xBFCFFFFF,0xFF6F7E97
8150 
8151 LOGA2:
8152 	long		0x3FD55555,0x555555A4
8153 LOGA1:
8154 	long		0xBFE00000,0x00000008
8155 
8156 LOGB5:
8157 	long		0x3F175496,0xADD7DAD6
8158 LOGB4:
8159 	long		0x3F3C71C2,0xFE80C7E0
8160 
8161 LOGB3:
8162 	long		0x3F624924,0x928BCCFF
8163 LOGB2:
8164 	long		0x3F899999,0x999995EC
8165 
8166 LOGB1:
8167 	long		0x3FB55555,0x55555555
8168 TWO:
8169 	long		0x40000000,0x00000000
8170 
8171 LTHOLD:
8172 	long		0x3f990000,0x80000000,0x00000000,0x00000000
8173 
8174 LOGTBL:
8175 	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8176 	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8177 	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8178 	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8179 	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8180 	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8181 	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8182 	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8183 	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8184 	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8185 	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8186 	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8187 	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8188 	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8189 	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8190 	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8191 	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8192 	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8193 	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8194 	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8195 	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8196 	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8197 	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8198 	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8199 	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8200 	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8201 	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8202 	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8203 	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8204 	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8205 	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8206 	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8207 	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8208 	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8209 	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8210 	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8211 	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8212 	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8213 	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8214 	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8215 	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8216 	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8217 	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8218 	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8219 	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8220 	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8221 	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8222 	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8223 	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8224 	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8225 	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8226 	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8227 	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8228 	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8229 	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8230 	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8231 	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8232 	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8233 	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8234 	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8235 	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8236 	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8237 	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8238 	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8239 	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8240 	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8241 	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8242 	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8243 	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8244 	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8245 	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8246 	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8247 	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8248 	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8249 	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8250 	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8251 	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8252 	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8253 	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8254 	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8255 	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8256 	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8257 	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8258 	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8259 	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8260 	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8261 	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8262 	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8263 	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8264 	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8265 	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8266 	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8267 	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
8268 	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8269 	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8270 	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8271 	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8272 	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8273 	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8274 	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8275 	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8276 	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8277 	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8278 	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8279 	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8280 	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8281 	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8282 	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8283 	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8284 	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8285 	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8286 	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8287 	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8288 	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8289 	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8290 	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8291 	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8292 	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8293 	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8294 	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8295 	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8296 	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8297 	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8298 	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8299 	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8300 	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8301 	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
8302 	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8303 
8304 	set		ADJK,L_SCR1
8305 
8306 	set		X,FP_SCR0
8307 	set		XDCARE,X+2
8308 	set		XFRAC,X+4
8309 
8310 	set		F,FP_SCR1
8311 	set		FFRAC,F+4
8312 
8313 	set		KLOG2,FP_SCR0
8314 
8315 	set		SAVEU,FP_SCR0
8316 
8317 	global		slogn
8318 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8319 slogn:
8320 	fmov.x		(%a0),%fp0		# LOAD INPUT
8321 	mov.l		&0x00000000,ADJK(%a6)
8322 
8323 LOGBGN:
8324 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8325 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8326 
8327 	mov.l		(%a0),%d1
8328 	mov.w		4(%a0),%d1
8329 
8330 	mov.l		(%a0),X(%a6)
8331 	mov.l		4(%a0),X+4(%a6)
8332 	mov.l		8(%a0),X+8(%a6)
8333 
8334 	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
8335 	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
8336 # X IS POSITIVE, CHECK IF X IS NEAR 1
8337 	cmp.l		%d1,&0x3ffef07d		# IS X < 15/16?
8338 	blt.b		LOGMAIN			# YES
8339 	cmp.l		%d1,&0x3fff8841		# IS X > 17/16?
8340 	ble.w		LOGNEAR1		# NO
8341 
8342 LOGMAIN:
8343 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8344 
8345 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8346 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8347 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8348 #--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8349 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8350 #--LOG(1+U) CAN BE VERY EFFICIENT.
8351 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8352 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8353 
8354 #--GET K, Y, F, AND ADDRESS OF 1/F.
8355 	asr.l		&8,%d1
8356 	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
8357 	sub.l		&0x3FFF,%d1		# THIS IS K
8358 	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
8359 	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
8360 	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
8361 
8362 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8363 	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
8364 	mov.l		XFRAC(%a6),FFRAC(%a6)
8365 	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
8366 	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
8367 	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
8368 	and.l		&0x7E000000,%d1
8369 	asr.l		&8,%d1
8370 	asr.l		&8,%d1
8371 	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
8372 	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
8373 
8374 	fmov.x		X(%a6),%fp0
8375 	mov.l		&0x3fff0000,F(%a6)
8376 	clr.l		F+8(%a6)
8377 	fsub.x		F(%a6),%fp0		# Y-F
8378 	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
8379 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8380 #--REGISTERS SAVED: FPCR, FP1, FP2
8381 
8382 LP1CONT1:
8383 #--AN RE-ENTRY POINT FOR LOGNP1
8384 	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
8385 	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
8386 	fmov.x		%fp0,%fp2
8387 	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
8388 	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
8389 
8390 #--LOG(1+U) IS APPROXIMATED BY
8391 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8392 #--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
8393 
8394 	fmov.x		%fp2,%fp3
8395 	fmov.x		%fp2,%fp1
8396 
8397 	fmul.d		LOGA6(%pc),%fp1		# V*A6
8398 	fmul.d		LOGA5(%pc),%fp2		# V*A5
8399 
8400 	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
8401 	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
8402 
8403 	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
8404 	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
8405 
8406 	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
8407 	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
8408 
8409 	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
8410 	add.l		&16,%a0			# ADDRESS OF LOG(F)
8411 	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
8412 
8413 	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
8414 	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
8415 
8416 	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
8417 	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
8418 	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
8419 
8420 	fmov.l		%d0,%fpcr
8421 	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
8422 	bra		t_inx2
8423 
8424 
8425 LOGNEAR1:
8426 
8427 # if the input is exactly equal to one, then exit through ld_pzero.
8428 # if these 2 lines weren't here, the correct answer would be returned
8429 # but the INEX2 bit would be set.
8430 	fcmp.b		%fp0,&0x1		# is it equal to one?
8431 	fbeq.l		ld_pzero		# yes
8432 
8433 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8434 	fmov.x		%fp0,%fp1
8435 	fsub.s		one(%pc),%fp1		# FP1 IS X-1
8436 	fadd.s		one(%pc),%fp0		# FP0 IS X+1
8437 	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
8438 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8439 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8440 
8441 LP1CONT2:
8442 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8443 	fdiv.x		%fp0,%fp1		# FP1 IS U
8444 	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
8445 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8446 #--LET V=U*U, W=V*V, CALCULATE
8447 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8448 #--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
8449 	fmov.x		%fp1,%fp0
8450 	fmul.x		%fp0,%fp0		# FP0 IS V
8451 	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
8452 	fmov.x		%fp0,%fp1
8453 	fmul.x		%fp1,%fp1		# FP1 IS W
8454 
8455 	fmov.d		LOGB5(%pc),%fp3
8456 	fmov.d		LOGB4(%pc),%fp2
8457 
8458 	fmul.x		%fp1,%fp3		# W*B5
8459 	fmul.x		%fp1,%fp2		# W*B4
8460 
8461 	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
8462 	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
8463 
8464 	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
8465 
8466 	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
8467 
8468 	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
8469 	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
8470 
8471 	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8472 	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
8473 
8474 	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8475 
8476 	fmov.l		%d0,%fpcr
8477 	fadd.x		SAVEU(%a6),%fp0
8478 	bra		t_inx2
8479 
8480 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8481 LOGNEG:
8482 	bra		t_operr
8483 
8484 	global		slognd
8485 slognd:
8486 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8487 
8488 	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
8489 
8490 #----normalize the input value by left shifting k bits (k to be determined
8491 #----below), adjusting exponent and storing -k to  ADJK
8492 #----the value TWOTO100 is no longer needed.
8493 #----Note that this code assumes the denormalized input is NON-ZERO.
8494 
8495 	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
8496 	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
8497 	mov.l		4(%a0),%d4
8498 	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
8499 	clr.l		%d2			# D2 used for holding K
8500 
8501 	tst.l		%d4
8502 	bne.b		Hi_not0
8503 
8504 Hi_0:
8505 	mov.l		%d5,%d4
8506 	clr.l		%d5
8507 	mov.l		&32,%d2
8508 	clr.l		%d6
8509 	bfffo		%d4{&0:&32},%d6
8510 	lsl.l		%d6,%d4
8511 	add.l		%d6,%d2			# (D3,D4,D5) is normalized
8512 
8513 	mov.l		%d3,X(%a6)
8514 	mov.l		%d4,XFRAC(%a6)
8515 	mov.l		%d5,XFRAC+4(%a6)
8516 	neg.l		%d2
8517 	mov.l		%d2,ADJK(%a6)
8518 	fmov.x		X(%a6),%fp0
8519 	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8520 	lea		X(%a6),%a0
8521 	bra.w		LOGBGN			# begin regular log(X)
8522 
8523 Hi_not0:
8524 	clr.l		%d6
8525 	bfffo		%d4{&0:&32},%d6		# find first 1
8526 	mov.l		%d6,%d2			# get k
8527 	lsl.l		%d6,%d4
8528 	mov.l		%d5,%d7			# a copy of D5
8529 	lsl.l		%d6,%d5
8530 	neg.l		%d6
8531 	add.l		&32,%d6
8532 	lsr.l		%d6,%d7
8533 	or.l		%d7,%d4			# (D3,D4,D5) normalized
8534 
8535 	mov.l		%d3,X(%a6)
8536 	mov.l		%d4,XFRAC(%a6)
8537 	mov.l		%d5,XFRAC+4(%a6)
8538 	neg.l		%d2
8539 	mov.l		%d2,ADJK(%a6)
8540 	fmov.x		X(%a6),%fp0
8541 	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8542 	lea		X(%a6),%a0
8543 	bra.w		LOGBGN			# begin regular log(X)
8544 
8545 	global		slognp1
8546 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8547 slognp1:
8548 	fmov.x		(%a0),%fp0		# LOAD INPUT
8549 	fabs.x		%fp0			# test magnitude
8550 	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
8551 	fbgt.w		LP1REAL			# if greater, continue
8552 	fmov.l		%d0,%fpcr
8553 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8554 	fmov.x		(%a0),%fp0		# return signed argument
8555 	bra		t_catch
8556 
8557 LP1REAL:
8558 	fmov.x		(%a0),%fp0		# LOAD INPUT
8559 	mov.l		&0x00000000,ADJK(%a6)
8560 	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
8561 	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
8562 	fmov.x		%fp0,X(%a6)
8563 	mov.w		XFRAC(%a6),XDCARE(%a6)
8564 	mov.l		X(%a6),%d1
8565 	cmp.l		%d1,&0
8566 	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
8567 	cmp.l		%d1,&0x3ffe8000		# IS BOUNDS [1/2,3/2]?
8568 	blt.w		LOGMAIN
8569 	cmp.l		%d1,&0x3fffc000
8570 	bgt.w		LOGMAIN
8571 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8572 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8573 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8574 
8575 LP1NEAR1:
8576 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8577 	cmp.l		%d1,&0x3ffef07d
8578 	blt.w		LP1CARE
8579 	cmp.l		%d1,&0x3fff8841
8580 	bgt.w		LP1CARE
8581 
8582 LP1ONE16:
8583 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8584 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8585 	fadd.x		%fp1,%fp1		# FP1 IS 2Z
8586 	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
8587 #--U = FP1/FP0
8588 	bra.w		LP1CONT2
8589 
8590 LP1CARE:
8591 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8592 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8593 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8594 #--THERE ARE ONLY TWO CASES.
8595 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8596 #--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
8597 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8598 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8599 
8600 	mov.l		XFRAC(%a6),FFRAC(%a6)
8601 	and.l		&0xFE000000,FFRAC(%a6)
8602 	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
8603 	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
8604 	bge.b		KISZERO
8605 
8606 KISNEG1:
8607 	fmov.s		TWO(%pc),%fp0
8608 	mov.l		&0x3fff0000,F(%a6)
8609 	clr.l		F+8(%a6)
8610 	fsub.x		F(%a6),%fp0		# 2-F
8611 	mov.l		FFRAC(%a6),%d1
8612 	and.l		&0x7E000000,%d1
8613 	asr.l		&8,%d1
8614 	asr.l		&8,%d1
8615 	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
8616 	fadd.x		%fp1,%fp1		# GET 2Z
8617 	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
8618 	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
8619 	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
8620 	add.l		%d1,%a0
8621 	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
8622 	bra.w		LP1CONT1
8623 
8624 KISZERO:
8625 	fmov.s		one(%pc),%fp0
8626 	mov.l		&0x3fff0000,F(%a6)
8627 	clr.l		F+8(%a6)
8628 	fsub.x		F(%a6),%fp0		# 1-F
8629 	mov.l		FFRAC(%a6),%d1
8630 	and.l		&0x7E000000,%d1
8631 	asr.l		&8,%d1
8632 	asr.l		&8,%d1
8633 	asr.l		&4,%d1
8634 	fadd.x		%fp1,%fp0		# FP0 IS Y-F
8635 	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
8636 	lea		LOGTBL(%pc),%a0
8637 	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
8638 	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
8639 	bra.w		LP1CONT1
8640 
8641 LP1NEG0:
8642 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8643 	cmp.l		%d1,&0
8644 	blt.b		LP1NEG
8645 LP1ZERO:
8646 	fmov.s		negone(%pc),%fp0
8647 
8648 	fmov.l		%d0,%fpcr
8649 	bra		t_dz
8650 
8651 LP1NEG:
8652 	fmov.s		zero(%pc),%fp0
8653 
8654 	fmov.l		%d0,%fpcr
8655 	bra		t_operr
8656 
8657 	global		slognp1d
8658 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8659 # Simply return the denorm
8660 slognp1d:
8661 	bra		t_extdnrm
8662 
8663 #########################################################################
8664 # satanh():  computes the inverse hyperbolic tangent of a norm input	#
8665 # satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
8666 #									#
8667 # INPUT ***************************************************************	#
8668 #	a0 = pointer to extended precision input			#
8669 #	d0 = round precision,mode					#
8670 #									#
8671 # OUTPUT **************************************************************	#
8672 #	fp0 = arctanh(X)						#
8673 #									#
8674 # ACCURACY and MONOTONICITY *******************************************	#
8675 #	The returned result is within 3 ulps in	64 significant bit,	#
8676 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8677 #	rounded to double precision. The result is provably monotonic	#
8678 #	in double precision.						#
8679 #									#
8680 # ALGORITHM ***********************************************************	#
8681 #									#
8682 #	ATANH								#
8683 #	1. If |X| >= 1, go to 3.					#
8684 #									#
8685 #	2. (|X| < 1) Calculate atanh(X) by				#
8686 #		sgn := sign(X)						#
8687 #		y := |X|						#
8688 #		z := 2y/(1-y)						#
8689 #		atanh(X) := sgn * (1/2) * logp1(z)			#
8690 #		Exit.							#
8691 #									#
8692 #	3. If |X| > 1, go to 5.						#
8693 #									#
8694 #	4. (|X| = 1) Generate infinity with an appropriate sign and	#
8695 #		divide-by-zero by					#
8696 #		sgn := sign(X)						#
8697 #		atan(X) := sgn / (+0).					#
8698 #		Exit.							#
8699 #									#
8700 #	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
8701 #		Exit.							#
8702 #									#
8703 #########################################################################
8704 
8705 	global		satanh
8706 satanh:
8707 	mov.l		(%a0),%d1
8708 	mov.w		4(%a0),%d1
8709 	and.l		&0x7FFFFFFF,%d1
8710 	cmp.l		%d1,&0x3FFF8000
8711 	bge.b		ATANHBIG
8712 
8713 #--THIS IS THE USUAL CASE, |X| < 1
8714 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8715 
8716 	fabs.x		(%a0),%fp0		# Y = |X|
8717 	fmov.x		%fp0,%fp1
8718 	fneg.x		%fp1			# -Y
8719 	fadd.x		%fp0,%fp0		# 2Y
8720 	fadd.s		&0x3F800000,%fp1	# 1-Y
8721 	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
8722 	mov.l		(%a0),%d1
8723 	and.l		&0x80000000,%d1
8724 	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
8725 	mov.l		%d1,-(%sp)
8726 
8727 	mov.l		%d0,-(%sp)		# save rnd prec,mode
8728 	clr.l		%d0			# pass ext prec,RN
8729 	fmovm.x		&0x01,-(%sp)		# save Z on stack
8730 	lea		(%sp),%a0		# pass ptr to Z
8731 	bsr		slognp1			# LOG1P(Z)
8732 	add.l		&0xc,%sp		# clear Z from stack
8733 
8734 	mov.l		(%sp)+,%d0		# fetch old prec,mode
8735 	fmov.l		%d0,%fpcr		# load it
8736 	mov.b		&FMUL_OP,%d1		# last inst is MUL
8737 	fmul.s		(%sp)+,%fp0
8738 	bra		t_catch
8739 
8740 ATANHBIG:
8741 	fabs.x		(%a0),%fp0		# |X|
8742 	fcmp.s		%fp0,&0x3F800000
8743 	fbgt		t_operr
8744 	bra		t_dz
8745 
8746 	global		satanhd
8747 #--ATANH(X) = X FOR DENORMALIZED X
8748 satanhd:
8749 	bra		t_extdnrm
8750 
8751 #########################################################################
8752 # slog10():  computes the base-10 logarithm of a normalized input	#
8753 # slog10d(): computes the base-10 logarithm of a denormalized input	#
8754 # slog2():   computes the base-2 logarithm of a normalized input	#
8755 # slog2d():  computes the base-2 logarithm of a denormalized input	#
8756 #									#
8757 # INPUT *************************************************************** #
8758 #	a0 = pointer to extended precision input			#
8759 #	d0 = round precision,mode					#
8760 #									#
8761 # OUTPUT **************************************************************	#
8762 #	fp0 = log_10(X) or log_2(X)					#
8763 #									#
8764 # ACCURACY and MONOTONICITY *******************************************	#
8765 #	The returned result is within 1.7 ulps in 64 significant bit,	#
8766 #	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
8767 #	rounded to double precision. The result is provably monotonic	#
8768 #	in double precision.						#
8769 #									#
8770 # ALGORITHM ***********************************************************	#
8771 #									#
8772 #       slog10d:							#
8773 #									#
8774 #       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
8775 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8776 #       Notes:  Default means round-to-nearest mode, no floating-point	#
8777 #               traps, and precision control = double extended.		#
8778 #									#
8779 #       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8780 #       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8781 #									#
8782 #       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
8783 #            2.1 Restore the user FPCR					#
8784 #            2.2 Return ans := Y * INV_L10.				#
8785 #									#
8786 #       slog10:								#
8787 #									#
8788 #       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8789 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8790 #       Notes:  Default means round-to-nearest mode, no floating-point	#
8791 #               traps, and precision control = double extended.		#
8792 #									#
8793 #       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
8794 #									#
8795 #       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
8796 #            2.1  Restore the user FPCR					#
8797 #            2.2  Return ans := Y * INV_L10.				#
8798 #									#
8799 #       sLog2d:								#
8800 #									#
8801 #       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8802 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8803 #       Notes:  Default means round-to-nearest mode, no floating-point	#
8804 #               traps, and precision control = double extended.		#
8805 #									#
8806 #       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8807 #       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8808 #									#
8809 #       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
8810 #            2.1  Restore the user FPCR					#
8811 #            2.2  Return ans := Y * INV_L2.				#
8812 #									#
8813 #       sLog2:								#
8814 #									#
8815 #       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8816 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8817 #       Notes:  Default means round-to-nearest mode, no floating-point	#
8818 #               traps, and precision control = double extended.		#
8819 #									#
8820 #       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
8821 #               go to Step 3.						#
8822 #									#
8823 #       Step 2.   Return k.						#
8824 #            2.1  Get integer k, X = 2^k.				#
8825 #            2.2  Restore the user FPCR.				#
8826 #            2.3  Return ans := convert-to-double-extended(k).		#
8827 #									#
8828 #       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
8829 #									#
8830 #       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
8831 #            4.1  Restore the user FPCR					#
8832 #            4.2  Return ans := Y * INV_L2.				#
8833 #									#
8834 #########################################################################
8835 
8836 INV_L10:
8837 	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8838 
8839 INV_L2:
8840 	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8841 
8842 	global		slog10
8843 #--entry point for Log10(X), X is normalized
8844 slog10:
8845 	fmov.b		&0x1,%fp0
8846 	fcmp.x		%fp0,(%a0)		# if operand == 1,
8847 	fbeq.l		ld_pzero		# return an EXACT zero
8848 
8849 	mov.l		(%a0),%d1
8850 	blt.w		invalid
8851 	mov.l		%d0,-(%sp)
8852 	clr.l		%d0
8853 	bsr		slogn			# log(X), X normal.
8854 	fmov.l		(%sp)+,%fpcr
8855 	fmul.x		INV_L10(%pc),%fp0
8856 	bra		t_inx2
8857 
8858 	global		slog10d
8859 #--entry point for Log10(X), X is denormalized
8860 slog10d:
8861 	mov.l		(%a0),%d1
8862 	blt.w		invalid
8863 	mov.l		%d0,-(%sp)
8864 	clr.l		%d0
8865 	bsr		slognd			# log(X), X denorm.
8866 	fmov.l		(%sp)+,%fpcr
8867 	fmul.x		INV_L10(%pc),%fp0
8868 	bra		t_minx2
8869 
8870 	global		slog2
8871 #--entry point for Log2(X), X is normalized
8872 slog2:
8873 	mov.l		(%a0),%d1
8874 	blt.w		invalid
8875 
8876 	mov.l		8(%a0),%d1
8877 	bne.b		continue		# X is not 2^k
8878 
8879 	mov.l		4(%a0),%d1
8880 	and.l		&0x7FFFFFFF,%d1
8881 	bne.b		continue
8882 
8883 #--X = 2^k.
8884 	mov.w		(%a0),%d1
8885 	and.l		&0x00007FFF,%d1
8886 	sub.l		&0x3FFF,%d1
8887 	beq.l		ld_pzero
8888 	fmov.l		%d0,%fpcr
8889 	fmov.l		%d1,%fp0
8890 	bra		t_inx2
8891 
8892 continue:
8893 	mov.l		%d0,-(%sp)
8894 	clr.l		%d0
8895 	bsr		slogn			# log(X), X normal.
8896 	fmov.l		(%sp)+,%fpcr
8897 	fmul.x		INV_L2(%pc),%fp0
8898 	bra		t_inx2
8899 
8900 invalid:
8901 	bra		t_operr
8902 
8903 	global		slog2d
8904 #--entry point for Log2(X), X is denormalized
8905 slog2d:
8906 	mov.l		(%a0),%d1
8907 	blt.w		invalid
8908 	mov.l		%d0,-(%sp)
8909 	clr.l		%d0
8910 	bsr		slognd			# log(X), X denorm.
8911 	fmov.l		(%sp)+,%fpcr
8912 	fmul.x		INV_L2(%pc),%fp0
8913 	bra		t_minx2
8914 
8915 #########################################################################
8916 # stwotox():  computes 2**X for a normalized input			#
8917 # stwotoxd(): computes 2**X for a denormalized input			#
8918 # stentox():  computes 10**X for a normalized input			#
8919 # stentoxd(): computes 10**X for a denormalized input			#
8920 #									#
8921 # INPUT ***************************************************************	#
8922 #	a0 = pointer to extended precision input			#
8923 #	d0 = round precision,mode					#
8924 #									#
8925 # OUTPUT **************************************************************	#
8926 #	fp0 = 2**X or 10**X						#
8927 #									#
8928 # ACCURACY and MONOTONICITY *******************************************	#
8929 #	The returned result is within 2 ulps in 64 significant bit,	#
8930 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8931 #	rounded to double precision. The result is provably monotonic	#
8932 #	in double precision.						#
8933 #									#
8934 # ALGORITHM ***********************************************************	#
8935 #									#
8936 #	twotox								#
8937 #	1. If |X| > 16480, go to ExpBig.				#
8938 #									#
8939 #	2. If |X| < 2**(-70), go to ExpSm.				#
8940 #									#
8941 #	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
8942 #		decompose N as						#
8943 #		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8944 #									#
8945 #	4. Overwrite r := r * log2. Then				#
8946 #		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8947 #		Go to expr to compute that expression.			#
8948 #									#
8949 #	tentox								#
8950 #	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
8951 #									#
8952 #	2. If |X| < 2**(-70), go to ExpSm.				#
8953 #									#
8954 #	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
8955 #		N := round-to-int(y). Decompose N as			#
8956 #		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8957 #									#
8958 #	4. Define r as							#
8959 #		r := ((X - N*L1)-N*L2) * L10				#
8960 #		where L1, L2 are the leading and trailing parts of	#
8961 #		log_10(2)/64 and L10 is the natural log of 10. Then	#
8962 #		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8963 #		Go to expr to compute that expression.			#
8964 #									#
8965 #	expr								#
8966 #	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
8967 #									#
8968 #	2. Overwrite Fact1 and Fact2 by					#
8969 #		Fact1 := 2**(M) * Fact1					#
8970 #		Fact2 := 2**(M) * Fact2					#
8971 #		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
8972 #									#
8973 #	3. Calculate P where 1 + P approximates exp(r):			#
8974 #		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
8975 #									#
8976 #	4. Let AdjFact := 2**(M'). Return				#
8977 #		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
8978 #		Exit.							#
8979 #									#
8980 #	ExpBig								#
8981 #	1. Generate overflow by Huge * Huge if X > 0; otherwise,	#
8982 #	        generate underflow by Tiny * Tiny.			#
8983 #									#
8984 #	ExpSm								#
8985 #	1. Return 1 + X.						#
8986 #									#
8987 #########################################################################
8988 
8989 L2TEN64:
8990 	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
8991 L10TWO1:
8992 	long		0x3F734413,0x509F8000	# LOG2/64LOG10
8993 
8994 L10TWO2:
8995 	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8996 
8997 LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8998 
8999 LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9000 
9001 EXPA5:	long		0x3F56C16D,0x6F7BD0B2
9002 EXPA4:	long		0x3F811112,0x302C712C
9003 EXPA3:	long		0x3FA55555,0x55554CC1
9004 EXPA2:	long		0x3FC55555,0x55554A54
9005 EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
9006 
9007 TEXPTBL:
9008 	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
9009 	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9010 	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9011 	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9012 	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9013 	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9014 	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9015 	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9016 	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9017 	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9018 	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9019 	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9020 	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9021 	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9022 	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9023 	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9024 	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9025 	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9026 	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9027 	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9028 	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9029 	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9030 	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9031 	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9032 	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9033 	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9034 	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9035 	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9036 	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9037 	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9038 	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9039 	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9040 	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9041 	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9042 	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9043 	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9044 	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9045 	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9046 	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9047 	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9048 	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9049 	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9050 	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9051 	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9052 	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9053 	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9054 	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9055 	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9056 	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9057 	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9058 	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9059 	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9060 	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9061 	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9062 	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9063 	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9064 	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9065 	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9066 	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9067 	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9068 	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9069 	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9070 	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9071 	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9072 
9073 	set		INT,L_SCR1
9074 
9075 	set		X,FP_SCR0
9076 	set		XDCARE,X+2
9077 	set		XFRAC,X+4
9078 
9079 	set		ADJFACT,FP_SCR0
9080 
9081 	set		FACT1,FP_SCR0
9082 	set		FACT1HI,FACT1+4
9083 	set		FACT1LOW,FACT1+8
9084 
9085 	set		FACT2,FP_SCR1
9086 	set		FACT2HI,FACT2+4
9087 	set		FACT2LOW,FACT2+8
9088 
9089 	global		stwotox
9090 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9091 stwotox:
9092 	fmovm.x		(%a0),&0x80		# LOAD INPUT
9093 
9094 	mov.l		(%a0),%d1
9095 	mov.w		4(%a0),%d1
9096 	fmov.x		%fp0,X(%a6)
9097 	and.l		&0x7FFFFFFF,%d1
9098 
9099 	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9100 	bge.b		TWOOK1
9101 	bra.w		EXPBORS
9102 
9103 TWOOK1:
9104 	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
9105 	ble.b		TWOMAIN
9106 	bra.w		EXPBORS
9107 
9108 TWOMAIN:
9109 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9110 
9111 	fmov.x		%fp0,%fp1
9112 	fmul.s		&0x42800000,%fp1	# 64 * X
9113 	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
9114 	mov.l		%d2,-(%sp)
9115 	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9116 	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9117 	mov.l		INT(%a6),%d1
9118 	mov.l		%d1,%d2
9119 	and.l		&0x3F,%d1		# D0 IS J
9120 	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9121 	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9122 	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9123 	mov.l		%d2,%d1
9124 	asr.l		&1,%d1			# D0 IS M
9125 	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9126 	add.l		&0x3FFF,%d2
9127 
9128 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130 #--ADJFACT = 2^(M').
9131 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9132 
9133 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9134 
9135 	fmul.s		&0x3C800000,%fp1	# (1/64)*N
9136 	mov.l		(%a1)+,FACT1(%a6)
9137 	mov.l		(%a1)+,FACT1HI(%a6)
9138 	mov.l		(%a1)+,FACT1LOW(%a6)
9139 	mov.w		(%a1)+,FACT2(%a6)
9140 
9141 	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
9142 
9143 	mov.w		(%a1)+,FACT2HI(%a6)
9144 	clr.w		FACT2HI+2(%a6)
9145 	clr.l		FACT2LOW(%a6)
9146 	add.w		%d1,FACT1(%a6)
9147 	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
9148 	add.w		%d1,FACT2(%a6)
9149 
9150 	bra.w		expr
9151 
9152 EXPBORS:
9153 #--FPCR, D0 SAVED
9154 	cmp.l		%d1,&0x3FFF8000
9155 	bgt.b		TEXPBIG
9156 
9157 #--|X| IS SMALL, RETURN 1 + X
9158 
9159 	fmov.l		%d0,%fpcr		# restore users round prec,mode
9160 	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
9161 	bra		t_pinx2
9162 
9163 TEXPBIG:
9164 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9165 #--REGISTERS SAVE SO FAR ARE FPCR AND  D0
9166 	mov.l		X(%a6),%d1
9167 	cmp.l		%d1,&0
9168 	blt.b		EXPNEG
9169 
9170 	bra		t_ovfl2			# t_ovfl expects positive value
9171 
9172 EXPNEG:
9173 	bra		t_unfl2			# t_unfl expects positive value
9174 
9175 	global		stwotoxd
9176 stwotoxd:
9177 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9178 
9179 	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9180 	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9181 	mov.l		(%a0),%d1
9182 	or.l		&0x00800001,%d1
9183 	fadd.s		%d1,%fp0
9184 	bra		t_pinx2
9185 
9186 	global		stentox
9187 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9188 stentox:
9189 	fmovm.x		(%a0),&0x80		# LOAD INPUT
9190 
9191 	mov.l		(%a0),%d1
9192 	mov.w		4(%a0),%d1
9193 	fmov.x		%fp0,X(%a6)
9194 	and.l		&0x7FFFFFFF,%d1
9195 
9196 	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9197 	bge.b		TENOK1
9198 	bra.w		EXPBORS
9199 
9200 TENOK1:
9201 	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
9202 	ble.b		TENMAIN
9203 	bra.w		EXPBORS
9204 
9205 TENMAIN:
9206 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9207 
9208 	fmov.x		%fp0,%fp1
9209 	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
9210 	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
9211 	mov.l		%d2,-(%sp)
9212 	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9213 	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9214 	mov.l		INT(%a6),%d1
9215 	mov.l		%d1,%d2
9216 	and.l		&0x3F,%d1		# D0 IS J
9217 	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9218 	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9219 	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9220 	mov.l		%d2,%d1
9221 	asr.l		&1,%d1			# D0 IS M
9222 	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9223 	add.l		&0x3FFF,%d2
9224 
9225 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9226 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9227 #--ADJFACT = 2^(M').
9228 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9229 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9230 
9231 	fmov.x		%fp1,%fp2
9232 
9233 	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
9234 	mov.l		(%a1)+,FACT1(%a6)
9235 
9236 	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
9237 
9238 	mov.l		(%a1)+,FACT1HI(%a6)
9239 	mov.l		(%a1)+,FACT1LOW(%a6)
9240 	fsub.x		%fp1,%fp0		# X - N L_LEAD
9241 	mov.w		(%a1)+,FACT2(%a6)
9242 
9243 	fsub.x		%fp2,%fp0		# X - N L_TRAIL
9244 
9245 	mov.w		(%a1)+,FACT2HI(%a6)
9246 	clr.w		FACT2HI+2(%a6)
9247 	clr.l		FACT2LOW(%a6)
9248 
9249 	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
9250 	add.w		%d1,FACT1(%a6)
9251 	add.w		%d1,FACT2(%a6)
9252 
9253 expr:
9254 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9255 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9256 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9257 #--	2**(M'+M) * 2**(J/64) * EXP(R)
9258 
9259 	fmov.x		%fp0,%fp1
9260 	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
9261 
9262 	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
9263 	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
9264 
9265 	fmul.x		%fp1,%fp2		# FP2 IS S*A5
9266 	fmul.x		%fp1,%fp3		# FP3 IS S*A4
9267 
9268 	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
9269 	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
9270 
9271 	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
9272 	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
9273 
9274 	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
9275 	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
9276 
9277 	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
9278 	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
9279 	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
9280 
9281 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
9282 
9283 #--FINAL RECONSTRUCTION PROCESS
9284 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
9285 
9286 	fmul.x		FACT1(%a6),%fp0
9287 	fadd.x		FACT2(%a6),%fp0
9288 	fadd.x		FACT1(%a6),%fp0
9289 
9290 	fmov.l		%d0,%fpcr		# restore users round prec,mode
9291 	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
9292 	mov.l		(%sp)+,%d2
9293 	mov.l		&0x80000000,ADJFACT+4(%a6)
9294 	clr.l		ADJFACT+8(%a6)
9295 	mov.b		&FMUL_OP,%d1		# last inst is MUL
9296 	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
9297 	bra		t_catch
9298 
9299 	global		stentoxd
9300 stentoxd:
9301 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9302 
9303 	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9304 	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9305 	mov.l		(%a0),%d1
9306 	or.l		&0x00800001,%d1
9307 	fadd.s		%d1,%fp0
9308 	bra		t_pinx2
9309 
9310 #########################################################################
9311 # smovcr(): returns the ROM constant at the offset specified in d1	#
9312 #	    rounded to the mode and precision specified in d0.		#
9313 #									#
9314 # INPUT	***************************************************************	#
9315 #	d0 = rnd prec,mode						#
9316 #	d1 = ROM offset							#
9317 #									#
9318 # OUTPUT **************************************************************	#
9319 #	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
9320 #									#
9321 #########################################################################
9322 
9323 	global		smovcr
9324 smovcr:
9325 	mov.l		%d1,-(%sp)		# save rom offset for a sec
9326 
9327 	lsr.b		&0x4,%d0		# shift ctrl bits to lo
9328 	mov.l		%d0,%d1			# make a copy
9329 	andi.w		&0x3,%d1		# extract rnd mode
9330 	andi.w		&0xc,%d0		# extract rnd prec
9331 	swap		%d0			# put rnd prec in hi
9332 	mov.w		%d1,%d0			# put rnd mode in lo
9333 
9334 	mov.l		(%sp)+,%d1		# get rom offset
9335 
9336 #
9337 # check range of offset
9338 #
9339 	tst.b		%d1			# if zero, offset is to pi
9340 	beq.b		pi_tbl			# it is pi
9341 	cmpi.b		%d1,&0x0a		# check range $01 - $0a
9342 	ble.b		z_val			# if in this range, return zero
9343 	cmpi.b		%d1,&0x0e		# check range $0b - $0e
9344 	ble.b		sm_tbl			# valid constants in this range
9345 	cmpi.b		%d1,&0x2f		# check range $10 - $2f
9346 	ble.b		z_val			# if in this range, return zero
9347 	cmpi.b		%d1,&0x3f		# check range $30 - $3f
9348 	ble.b		bg_tbl			# valid constants in this range
9349 
9350 z_val:
9351 	bra.l		ld_pzero		# return a zero
9352 
9353 #
9354 # the answer is PI rounded to the proper precision.
9355 #
9356 # fetch a pointer to the answer table relating to the proper rounding
9357 # precision.
9358 #
9359 pi_tbl:
9360 	tst.b		%d0			# is rmode RN?
9361 	bne.b		pi_not_rn		# no
9362 pi_rn:
9363 	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
9364 	bra.w		set_finx
9365 pi_not_rn:
9366 	cmpi.b		%d0,&rp_mode		# is rmode RP?
9367 	beq.b		pi_rp			# yes
9368 pi_rzrm:
9369 	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
9370 	bra.b		set_finx
9371 pi_rp:
9372 	lea.l		PIRP(%pc),%a0		# load PI RP table addr
9373 	bra.b		set_finx
9374 
9375 #
9376 # the answer is one of:
9377 #	$0B	log10(2)	(inexact)
9378 #	$0C	e		(inexact)
9379 #	$0D	log2(e)		(inexact)
9380 #	$0E	log10(e)	(exact)
9381 #
9382 # fetch a pointer to the answer table relating to the proper rounding
9383 # precision.
9384 #
9385 sm_tbl:
9386 	subi.b		&0xb,%d1		# make offset in 0-4 range
9387 	tst.b		%d0			# is rmode RN?
9388 	bne.b		sm_not_rn		# no
9389 sm_rn:
9390 	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
9391 sm_tbl_cont:
9392 	cmpi.b		%d1,&0x2		# is result log10(e)?
9393 	ble.b		set_finx		# no; answer is inexact
9394 	bra.b		no_finx			# yes; answer is exact
9395 sm_not_rn:
9396 	cmpi.b		%d0,&rp_mode		# is rmode RP?
9397 	beq.b		sm_rp			# yes
9398 sm_rzrm:
9399 	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
9400 	bra.b		sm_tbl_cont
9401 sm_rp:
9402 	lea.l		SMALRP(%pc),%a0		# load RP table addr
9403 	bra.b		sm_tbl_cont
9404 
9405 #
9406 # the answer is one of:
9407 #	$30	ln(2)		(inexact)
9408 #	$31	ln(10)		(inexact)
9409 #	$32	10^0		(exact)
9410 #	$33	10^1		(exact)
9411 #	$34	10^2		(exact)
9412 #	$35	10^4		(exact)
9413 #	$36	10^8		(exact)
9414 #	$37	10^16		(exact)
9415 #	$38	10^32		(inexact)
9416 #	$39	10^64		(inexact)
9417 #	$3A	10^128		(inexact)
9418 #	$3B	10^256		(inexact)
9419 #	$3C	10^512		(inexact)
9420 #	$3D	10^1024		(inexact)
9421 #	$3E	10^2048		(inexact)
9422 #	$3F	10^4096		(inexact)
9423 #
9424 # fetch a pointer to the answer table relating to the proper rounding
9425 # precision.
9426 #
9427 bg_tbl:
9428 	subi.b		&0x30,%d1		# make offset in 0-f range
9429 	tst.b		%d0			# is rmode RN?
9430 	bne.b		bg_not_rn		# no
9431 bg_rn:
9432 	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
9433 bg_tbl_cont:
9434 	cmpi.b		%d1,&0x1		# is offset <= $31?
9435 	ble.b		set_finx		# yes; answer is inexact
9436 	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
9437 	ble.b		no_finx			# yes; answer is exact
9438 	bra.b		set_finx		# no; answer is inexact
9439 bg_not_rn:
9440 	cmpi.b		%d0,&rp_mode		# is rmode RP?
9441 	beq.b		bg_rp			# yes
9442 bg_rzrm:
9443 	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
9444 	bra.b		bg_tbl_cont
9445 bg_rp:
9446 	lea.l		BIGRP(%pc),%a0		# load RP table addr
9447 	bra.b		bg_tbl_cont
9448 
9449 # answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9450 set_finx:
9451 	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9452 no_finx:
9453 	mulu.w		&0xc,%d1		# offset points into tables
9454 	swap		%d0			# put rnd prec in lo word
9455 	tst.b		%d0			# is precision extended?
9456 
9457 	bne.b		not_ext			# if xprec, do not call round
9458 
9459 # Precision is extended
9460 	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
9461 	rts
9462 
9463 # Precision is single or double
9464 not_ext:
9465 	swap		%d0			# rnd prec in upper word
9466 
9467 # call round() to round the answer to the proper precision.
9468 # exponents out of range for single or double DO NOT cause underflow
9469 # or overflow.
9470 	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9471 	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9472 	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9473 	mov.l		%d0,%d1
9474 	clr.l		%d0			# clear g,r,s
9475 	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
9476 	clr.w		LOCAL_SGN(%a0)		# sign always positive
9477 	bsr.l		_round			# round the mantissa
9478 
9479 	fmovm.x		(%a0),&0x80		# return rounded result in fp0
9480 	rts
9481 
9482 	align		0x4
9483 
9484 PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9485 PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
9486 PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9487 
9488 SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9489 	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9490 	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9491 	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9492 	long		0x00000000,0x00000000,0x00000000	# 0.0
9493 
9494 SMALRZRM:
9495 	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9496 	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9497 	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
9498 	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9499 	long		0x00000000,0x00000000,0x00000000	# 0.0
9500 
9501 SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
9502 	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
9503 	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9504 	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9505 	long		0x00000000,0x00000000,0x00000000	# 0.0
9506 
9507 BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9508 	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9509 
9510 	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9511 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9512 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9513 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9514 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9515 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9516 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9517 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9518 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9519 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9520 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9521 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9522 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9523 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9524 
9525 BIGRZRM:
9526 	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
9527 	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
9528 
9529 	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9530 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9531 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9532 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9533 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9534 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9535 	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
9536 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9537 	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
9538 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
9539 	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
9540 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9541 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
9542 	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
9543 
9544 BIGRP:
9545 	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9546 	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9547 
9548 	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9549 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9550 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9551 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9552 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9553 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9554 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9555 	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
9556 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9557 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9558 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9559 	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
9560 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9561 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9562 
9563 #########################################################################
9564 # sscale(): computes the destination operand scaled by the source	#
9565 #	    operand. If the absoulute value of the source operand is	#
9566 #	    >= 2^14, an overflow or underflow is returned.		#
9567 #									#
9568 # INPUT *************************************************************** #
9569 #	a0  = pointer to double-extended source operand X		#
9570 #	a1  = pointer to double-extended destination operand Y		#
9571 #									#
9572 # OUTPUT ************************************************************** #
9573 #	fp0 =  scale(X,Y)						#
9574 #									#
9575 #########################################################################
9576 
9577 set	SIGN,		L_SCR1
9578 
9579 	global		sscale
9580 sscale:
9581 	mov.l		%d0,-(%sp)		# store off ctrl bits for now
9582 
9583 	mov.w		DST_EX(%a1),%d1		# get dst exponent
9584 	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
9585 	andi.l		&0x00007fff,%d1		# strip sign from dst exp
9586 
9587 	mov.w		SRC_EX(%a0),%d0		# check src bounds
9588 	andi.w		&0x7fff,%d0		# clr src sign bit
9589 	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
9590 	blt.w		src_small		# yes
9591 	cmpi.w		%d0,&0x400c		# no; is src too big?
9592 	bgt.w		src_out			# yes
9593 
9594 #
9595 # Source is within 2^14 range.
9596 #
9597 src_ok:
9598 	fintrz.x	SRC(%a0),%fp0		# calc int of src
9599 	fmov.l		%fp0,%d0		# int src to d0
9600 # don't want any accrued bits from the fintrz showing up later since
9601 # we may need to read the fpsr for the last fp op in t_catch2().
9602 	fmov.l		&0x0,%fpsr
9603 
9604 	tst.b		DST_HI(%a1)		# is dst denormalized?
9605 	bmi.b		sok_norm
9606 
9607 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9608 # the src value. then, jump to the norm part of the routine.
9609 sok_dnrm:
9610 	mov.l		%d0,-(%sp)		# save src for now
9611 
9612 	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9613 	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
9614 	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
9615 
9616 	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
9617 	bsr.l		norm			# normalize the DENORM
9618 	neg.l		%d0
9619 	add.l		(%sp)+,%d0		# add adjustment to src
9620 
9621 	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
9622 
9623 	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
9624 	bge.b		sok_norm2		# thank goodness no
9625 
9626 # the multiply factor that we're trying to create should be a denorm
9627 # for the multiply to work. Therefore, we're going to actually do a
9628 # multiply with a denorm which will cause an unimplemented data type
9629 # exception to be put into the machine which will be caught and corrected
9630 # later. we don't do this with the DENORMs above because this method
9631 # is slower. but, don't fret, I don't see it being used much either.
9632 	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9633 	mov.l		&0x80000000,%d1		# load normalized mantissa
9634 	subi.l		&-0x3fff,%d0		# how many should we shift?
9635 	neg.l		%d0			# make it positive
9636 	cmpi.b		%d0,&0x20		# is it > 32?
9637 	bge.b		sok_dnrm_32		# yes
9638 	lsr.l		%d0,%d1			# no; bit stays in upper lw
9639 	clr.l		-(%sp)			# insert zero low mantissa
9640 	mov.l		%d1,-(%sp)		# insert new high mantissa
9641 	clr.l		-(%sp)			# make zero exponent
9642 	bra.b		sok_norm_cont
9643 sok_dnrm_32:
9644 	subi.b		&0x20,%d0		# get shift count
9645 	lsr.l		%d0,%d1			# make low mantissa longword
9646 	mov.l		%d1,-(%sp)		# insert new low mantissa
9647 	clr.l		-(%sp)			# insert zero high mantissa
9648 	clr.l		-(%sp)			# make zero exponent
9649 	bra.b		sok_norm_cont
9650 
9651 # the src will force the dst to a DENORM value or worse. so, let's
9652 # create an fp multiply that will create the result.
9653 sok_norm:
9654 	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
9655 sok_norm2:
9656 	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9657 
9658 	addi.w		&0x3fff,%d0		# turn src amt into exp value
9659 	swap		%d0			# put exponent in high word
9660 	clr.l		-(%sp)			# insert new exponent
9661 	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
9662 	mov.l		%d0,-(%sp)		# insert new lo mantissa
9663 
9664 sok_norm_cont:
9665 	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
9666 	mov.b		&FMUL_OP,%d1		# last inst is MUL
9667 	fmul.x		(%sp)+,%fp0		# do the multiply
9668 	bra		t_catch2		# catch any exceptions
9669 
9670 #
9671 # Source is outside of 2^14 range.  Test the sign and branch
9672 # to the appropriate exception handler.
9673 #
9674 src_out:
9675 	mov.l		(%sp)+,%d0		# restore ctrl bits
9676 	exg		%a0,%a1			# swap src,dst ptrs
9677 	tst.b		SRC_EX(%a1)		# is src negative?
9678 	bmi		t_unfl			# yes; underflow
9679 	bra		t_ovfl_sc		# no; overflow
9680 
9681 #
9682 # The source input is below 1, so we check for denormalized numbers
9683 # and set unfl.
9684 #
9685 src_small:
9686 	tst.b		DST_HI(%a1)		# is dst denormalized?
9687 	bpl.b		ssmall_done		# yes
9688 
9689 	mov.l		(%sp)+,%d0
9690 	fmov.l		%d0,%fpcr		# no; load control bits
9691 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
9692 	fmov.x		DST(%a1),%fp0		# simply return dest
9693 	bra		t_catch2
9694 ssmall_done:
9695 	mov.l		(%sp)+,%d0		# load control bits into d1
9696 	mov.l		%a1,%a0			# pass ptr to dst
9697 	bra		t_resdnrm
9698 
9699 #########################################################################
9700 # smod(): computes the fp MOD of the input values X,Y.			#
9701 # srem(): computes the fp (IEEE) REM of the input values X,Y.		#
9702 #									#
9703 # INPUT *************************************************************** #
9704 #	a0 = pointer to extended precision input X			#
9705 #	a1 = pointer to extended precision input Y			#
9706 #	d0 = round precision,mode					#
9707 #									#
9708 #	The input operands X and Y can be either normalized or		#
9709 #	denormalized.							#
9710 #									#
9711 # OUTPUT ************************************************************** #
9712 #      fp0 = FREM(X,Y) or FMOD(X,Y)					#
9713 #									#
9714 # ALGORITHM *********************************************************** #
9715 #									#
9716 #       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
9717 #                signY := sign(Y), X := |X|, Y := |Y|,			#
9718 #                signQ := signX EOR signY. Record whether MOD or REM	#
9719 #                is requested.						#
9720 #									#
9721 #       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
9722 #                If (L < 0) then					#
9723 #                   R := X, go to Step 4.				#
9724 #                else							#
9725 #                   R := 2^(-L)X, j := L.				#
9726 #                endif							#
9727 #									#
9728 #       Step 3.  Perform MOD(X,Y)					#
9729 #            3.1 If R = Y, go to Step 9.				#
9730 #            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
9731 #            3.3 If j = 0, go to Step 4.				#
9732 #            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
9733 #                Step 3.1.						#
9734 #									#
9735 #       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
9736 #                Last_Subtract := false (used in Step 7 below). If	#
9737 #                MOD is requested, go to Step 6.			#
9738 #									#
9739 #       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
9740 #            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
9741 #                Step 6.						#
9742 #            5.2 If R > Y/2, then { set Last_Subtract := true,		#
9743 #                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
9744 #            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
9745 #                then { Q := Q + 1, signX := -signX }.			#
9746 #									#
9747 #       Step 6.  R := signX*R.						#
9748 #									#
9749 #       Step 7.  If Last_Subtract = true, R := R - Y.			#
9750 #									#
9751 #       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
9752 #									#
9753 #       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
9754 #                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
9755 #                R := 0. Return signQ, last 7 bits of Q, and R.		#
9756 #									#
9757 #########################################################################
9758 
9759 	set		Mod_Flag,L_SCR3
9760 	set		Sc_Flag,L_SCR3+1
9761 
9762 	set		SignY,L_SCR2
9763 	set		SignX,L_SCR2+2
9764 	set		SignQ,L_SCR3+2
9765 
9766 	set		Y,FP_SCR0
9767 	set		Y_Hi,Y+4
9768 	set		Y_Lo,Y+8
9769 
9770 	set		R,FP_SCR1
9771 	set		R_Hi,R+4
9772 	set		R_Lo,R+8
9773 
9774 Scale:
9775 	long		0x00010000,0x80000000,0x00000000,0x00000000
9776 
9777 	global		smod
9778 smod:
9779 	clr.b		FPSR_QBYTE(%a6)
9780 	mov.l		%d0,-(%sp)		# save ctrl bits
9781 	clr.b		Mod_Flag(%a6)
9782 	bra.b		Mod_Rem
9783 
9784 	global		srem
9785 srem:
9786 	clr.b		FPSR_QBYTE(%a6)
9787 	mov.l		%d0,-(%sp)		# save ctrl bits
9788 	mov.b		&0x1,Mod_Flag(%a6)
9789 
9790 Mod_Rem:
9791 #..Save sign of X and Y
9792 	movm.l		&0x3f00,-(%sp)		# save data registers
9793 	mov.w		SRC_EX(%a0),%d3
9794 	mov.w		%d3,SignY(%a6)
9795 	and.l		&0x00007FFF,%d3		# Y := |Y|
9796 
9797 #
9798 	mov.l		SRC_HI(%a0),%d4
9799 	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
9800 
9801 	tst.l		%d3
9802 	bne.b		Y_Normal
9803 
9804 	mov.l		&0x00003FFE,%d3		# $3FFD + 1
9805 	tst.l		%d4
9806 	bne.b		HiY_not0
9807 
9808 HiY_0:
9809 	mov.l		%d5,%d4
9810 	clr.l		%d5
9811 	sub.l		&32,%d3
9812 	clr.l		%d6
9813 	bfffo		%d4{&0:&32},%d6
9814 	lsl.l		%d6,%d4
9815 	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
9816 #	                                        ...with bias $7FFD
9817 	bra.b		Chk_X
9818 
9819 HiY_not0:
9820 	clr.l		%d6
9821 	bfffo		%d4{&0:&32},%d6
9822 	sub.l		%d6,%d3
9823 	lsl.l		%d6,%d4
9824 	mov.l		%d5,%d7			# a copy of D5
9825 	lsl.l		%d6,%d5
9826 	neg.l		%d6
9827 	add.l		&32,%d6
9828 	lsr.l		%d6,%d7
9829 	or.l		%d7,%d4			# (D3,D4,D5) normalized
9830 #                                       ...with bias $7FFD
9831 	bra.b		Chk_X
9832 
9833 Y_Normal:
9834 	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
9835 #                                       ...with bias $7FFD
9836 
9837 Chk_X:
9838 	mov.w		DST_EX(%a1),%d0
9839 	mov.w		%d0,SignX(%a6)
9840 	mov.w		SignY(%a6),%d1
9841 	eor.l		%d0,%d1
9842 	and.l		&0x00008000,%d1
9843 	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
9844 	and.l		&0x00007FFF,%d0
9845 	mov.l		DST_HI(%a1),%d1
9846 	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
9847 	tst.l		%d0
9848 	bne.b		X_Normal
9849 	mov.l		&0x00003FFE,%d0
9850 	tst.l		%d1
9851 	bne.b		HiX_not0
9852 
9853 HiX_0:
9854 	mov.l		%d2,%d1
9855 	clr.l		%d2
9856 	sub.l		&32,%d0
9857 	clr.l		%d6
9858 	bfffo		%d1{&0:&32},%d6
9859 	lsl.l		%d6,%d1
9860 	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9861 #                                       ...with bias $7FFD
9862 	bra.b		Init
9863 
9864 HiX_not0:
9865 	clr.l		%d6
9866 	bfffo		%d1{&0:&32},%d6
9867 	sub.l		%d6,%d0
9868 	lsl.l		%d6,%d1
9869 	mov.l		%d2,%d7			# a copy of D2
9870 	lsl.l		%d6,%d2
9871 	neg.l		%d6
9872 	add.l		&32,%d6
9873 	lsr.l		%d6,%d7
9874 	or.l		%d7,%d1			# (D0,D1,D2) normalized
9875 #                                       ...with bias $7FFD
9876 	bra.b		Init
9877 
9878 X_Normal:
9879 	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
9880 #                                       ...with bias $7FFD
9881 
9882 Init:
9883 #
9884 	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
9885 	mov.l		%d0,-(%sp)		# save biased exp(X)
9886 	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
9887 
9888 	clr.l		%d6			# D6 := carry <- 0
9889 	clr.l		%d3			# D3 is Q
9890 	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
9891 
9892 #..(Carry,D1,D2) is R
9893 	tst.l		%d0
9894 	bge.b		Mod_Loop_pre
9895 
9896 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9897 #
9898 	mov.l		(%sp)+,%d0		# restore d0
9899 	bra.w		Get_Mod
9900 
9901 Mod_Loop_pre:
9902 	addq.l		&0x4,%sp		# erase exp(X)
9903 #..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
9904 Mod_Loop:
9905 	tst.l		%d6			# test carry bit
9906 	bgt.b		R_GT_Y
9907 
9908 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9909 	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
9910 	bne.b		R_NE_Y
9911 	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
9912 	bne.b		R_NE_Y
9913 
9914 #..At this point, R = Y
9915 	bra.w		Rem_is_0
9916 
9917 R_NE_Y:
9918 #..use the borrow of the previous compare
9919 	bcs.b		R_LT_Y			# borrow is set iff R < Y
9920 
9921 R_GT_Y:
9922 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9923 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9924 	sub.l		%d5,%d2			# lo(R) - lo(Y)
9925 	subx.l		%d4,%d1			# hi(R) - hi(Y)
9926 	clr.l		%d6			# clear carry
9927 	addq.l		&1,%d3			# Q := Q + 1
9928 
9929 R_LT_Y:
9930 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9931 	tst.l		%d0			# see if j = 0.
9932 	beq.b		PostLoop
9933 
9934 	add.l		%d3,%d3			# Q := 2Q
9935 	add.l		%d2,%d2			# lo(R) = 2lo(R)
9936 	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
9937 	scs		%d6			# set Carry if 2(R) overflows
9938 	addq.l		&1,%a1			# k := k+1
9939 	subq.l		&1,%d0			# j := j - 1
9940 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9941 
9942 	bra.b		Mod_Loop
9943 
9944 PostLoop:
9945 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9946 
9947 #..normalize R.
9948 	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
9949 	tst.l		%d1
9950 	bne.b		HiR_not0
9951 
9952 HiR_0:
9953 	mov.l		%d2,%d1
9954 	clr.l		%d2
9955 	sub.l		&32,%d0
9956 	clr.l		%d6
9957 	bfffo		%d1{&0:&32},%d6
9958 	lsl.l		%d6,%d1
9959 	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9960 #                                       ...with bias $7FFD
9961 	bra.b		Get_Mod
9962 
9963 HiR_not0:
9964 	clr.l		%d6
9965 	bfffo		%d1{&0:&32},%d6
9966 	bmi.b		Get_Mod			# already normalized
9967 	sub.l		%d6,%d0
9968 	lsl.l		%d6,%d1
9969 	mov.l		%d2,%d7			# a copy of D2
9970 	lsl.l		%d6,%d2
9971 	neg.l		%d6
9972 	add.l		&32,%d6
9973 	lsr.l		%d6,%d7
9974 	or.l		%d7,%d1			# (D0,D1,D2) normalized
9975 
9976 #
9977 Get_Mod:
9978 	cmp.l		%d0,&0x000041FE
9979 	bge.b		No_Scale
9980 Do_Scale:
9981 	mov.w		%d0,R(%a6)
9982 	mov.l		%d1,R_Hi(%a6)
9983 	mov.l		%d2,R_Lo(%a6)
9984 	mov.l		L_SCR1(%a6),%d6
9985 	mov.w		%d6,Y(%a6)
9986 	mov.l		%d4,Y_Hi(%a6)
9987 	mov.l		%d5,Y_Lo(%a6)
9988 	fmov.x		R(%a6),%fp0		# no exception
9989 	mov.b		&1,Sc_Flag(%a6)
9990 	bra.b		ModOrRem
9991 No_Scale:
9992 	mov.l		%d1,R_Hi(%a6)
9993 	mov.l		%d2,R_Lo(%a6)
9994 	sub.l		&0x3FFE,%d0
9995 	mov.w		%d0,R(%a6)
9996 	mov.l		L_SCR1(%a6),%d6
9997 	sub.l		&0x3FFE,%d6
9998 	mov.l		%d6,L_SCR1(%a6)
9999 	fmov.x		R(%a6),%fp0
10000 	mov.w		%d6,Y(%a6)
10001 	mov.l		%d4,Y_Hi(%a6)
10002 	mov.l		%d5,Y_Lo(%a6)
10003 	clr.b		Sc_Flag(%a6)
10004 
10005 #
10006 ModOrRem:
10007 	tst.b		Mod_Flag(%a6)
10008 	beq.b		Fix_Sign
10009 
10010 	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
10011 	subq.l		&1,%d6			# biased expo(Y/2)
10012 	cmp.l		%d0,%d6
10013 	blt.b		Fix_Sign
10014 	bgt.b		Last_Sub
10015 
10016 	cmp.l		%d1,%d4
10017 	bne.b		Not_EQ
10018 	cmp.l		%d2,%d5
10019 	bne.b		Not_EQ
10020 	bra.w		Tie_Case
10021 
10022 Not_EQ:
10023 	bcs.b		Fix_Sign
10024 
10025 Last_Sub:
10026 #
10027 	fsub.x		Y(%a6),%fp0		# no exceptions
10028 	addq.l		&1,%d3			# Q := Q + 1
10029 
10030 #
10031 Fix_Sign:
10032 #..Get sign of X
10033 	mov.w		SignX(%a6),%d6
10034 	bge.b		Get_Q
10035 	fneg.x		%fp0
10036 
10037 #..Get Q
10038 #
10039 Get_Q:
10040 	clr.l		%d6
10041 	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
10042 	mov.l		&8,%d7
10043 	lsr.l		%d7,%d6
10044 	and.l		&0x0000007F,%d3		# 7 bits of Q
10045 	or.l		%d6,%d3			# sign and bits of Q
10046 #	swap		%d3
10047 #	fmov.l		%fpsr,%d6
10048 #	and.l		&0xFF00FFFF,%d6
10049 #	or.l		%d3,%d6
10050 #	fmov.l		%d6,%fpsr		# put Q in fpsr
10051 	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
10052 
10053 #
10054 Restore:
10055 	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
10056 	mov.l		(%sp)+,%d0
10057 	fmov.l		%d0,%fpcr
10058 	tst.b		Sc_Flag(%a6)
10059 	beq.b		Finish
10060 	mov.b		&FMUL_OP,%d1		# last inst is MUL
10061 	fmul.x		Scale(%pc),%fp0		# may cause underflow
10062 	bra		t_catch2
10063 # the '040 package did this apparently to see if the dst operand for the
10064 # preceding fmul was a denorm. but, it better not have been since the
10065 # algorithm just got done playing with fp0 and expected no exceptions
10066 # as a result. trust me...
10067 #	bra		t_avoid_unsupp		# check for denorm as a
10068 #						;result of the scaling
10069 
10070 Finish:
10071 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
10072 	fmov.x		%fp0,%fp0		# capture exceptions & round
10073 	bra		t_catch2
10074 
10075 Rem_is_0:
10076 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10077 	addq.l		&1,%d3
10078 	cmp.l		%d0,&8			# D0 is j
10079 	bge.b		Q_Big
10080 
10081 	lsl.l		%d0,%d3
10082 	bra.b		Set_R_0
10083 
10084 Q_Big:
10085 	clr.l		%d3
10086 
10087 Set_R_0:
10088 	fmov.s		&0x00000000,%fp0
10089 	clr.b		Sc_Flag(%a6)
10090 	bra.w		Fix_Sign
10091 
10092 Tie_Case:
10093 #..Check parity of Q
10094 	mov.l		%d3,%d6
10095 	and.l		&0x00000001,%d6
10096 	tst.l		%d6
10097 	beq.w		Fix_Sign		# Q is even
10098 
10099 #..Q is odd, Q := Q + 1, signX := -signX
10100 	addq.l		&1,%d3
10101 	mov.w		SignX(%a6),%d6
10102 	eor.l		&0x00008000,%d6
10103 	mov.w		%d6,SignX(%a6)
10104 	bra.w		Fix_Sign
10105 
10106 qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
10107 
10108 #########################################################################
10109 # XDEF ****************************************************************	#
10110 #	t_dz(): Handle DZ exception during transcendental emulation.	#
10111 #	        Sets N bit according to sign of source operand.		#
10112 #	t_dz2(): Handle DZ exception during transcendental emulation.	#
10113 #		 Sets N bit always.					#
10114 #									#
10115 # XREF ****************************************************************	#
10116 #	None								#
10117 #									#
10118 # INPUT ***************************************************************	#
10119 #	a0 = pointer to source operand					#
10120 #									#
10121 # OUTPUT **************************************************************	#
10122 #	fp0 = default result						#
10123 #									#
10124 # ALGORITHM ***********************************************************	#
10125 #	- Store properly signed INF into fp0.				#
10126 #	- Set FPSR exception status dz bit, ccode inf bit, and		#
10127 #	  accrued dz bit.						#
10128 #									#
10129 #########################################################################
10130 
10131 	global		t_dz
10132 t_dz:
10133 	tst.b		SRC_EX(%a0)		# no; is src negative?
10134 	bmi.b		t_dz2			# yes
10135 
10136 dz_pinf:
10137 	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
10138 	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10139 	rts
10140 
10141 	global		t_dz2
10142 t_dz2:
10143 	fmov.s		&0xff800000,%fp0	# return -INF in fp0
10144 	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10145 	rts
10146 
10147 #################################################################
10148 # OPERR exception:						#
10149 #	- set FPSR exception status operr bit, condition code	#
10150 #	  nan bit; Store default NAN into fp0			#
10151 #################################################################
10152 	global		t_operr
10153 t_operr:
10154 	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10155 	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
10156 	rts
10157 
10158 #################################################################
10159 # Extended DENORM:						#
10160 #	- For all functions that have a denormalized input and	#
10161 #	  that f(x)=x, this is the entry point.			#
10162 #	- we only return the EXOP here if either underflow or	#
10163 #	  inexact is enabled.					#
10164 #################################################################
10165 
10166 # Entry point for scale w/ extended denorm. The function does
10167 # NOT set INEX2/AUNFL/AINEX.
10168 	global		t_resdnrm
10169 t_resdnrm:
10170 	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
10171 	bra.b		xdnrm_con
10172 
10173 	global		t_extdnrm
10174 t_extdnrm:
10175 	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10176 
10177 xdnrm_con:
10178 	mov.l		%a0,%a1			# make copy of src ptr
10179 	mov.l		%d0,%d1			# make copy of rnd prec,mode
10180 	andi.b		&0xc0,%d1		# extended precision?
10181 	bne.b		xdnrm_sd		# no
10182 
10183 # result precision is extended.
10184 	tst.b		LOCAL_EX(%a0)		# is denorm negative?
10185 	bpl.b		xdnrm_exit		# no
10186 
10187 	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
10188 	bra.b		xdnrm_exit
10189 
10190 # result precision is single or double
10191 xdnrm_sd:
10192 	mov.l		%a1,-(%sp)
10193 	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
10194 	smi.b		%d1			# set d0 accordingly
10195 	bsr.l		unf_sub
10196 	mov.l		(%sp)+,%a1
10197 xdnrm_exit:
10198 	fmovm.x		(%a0),&0x80		# return default result in fp0
10199 
10200 	mov.b		FPCR_ENABLE(%a6),%d0
10201 	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
10202 	bne.b		xdnrm_ena		# yes
10203 	rts
10204 
10205 ################
10206 # unfl enabled #
10207 ################
10208 # we have a DENORM that needs to be converted into an EXOP.
10209 # so, normalize the mantissa, add 0x6000 to the new exponent,
10210 # and return the result in fp1.
10211 xdnrm_ena:
10212 	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10213 	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10214 	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10215 
10216 	lea		FP_SCR0(%a6),%a0
10217 	bsr.l		norm			# normalize mantissa
10218 	addi.l		&0x6000,%d0		# add extra bias
10219 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
10220 	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
10221 
10222 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10223 	rts
10224 
10225 #################################################################
10226 # UNFL exception:						#
10227 #	- This routine is for cases where even an EXOP isn't	#
10228 #	  large enough to hold the range of this result.	#
10229 #	  In such a case, the EXOP equals zero.			#
10230 #	- Return the default result to the proper precision	#
10231 #	  with the sign of this result being the same as that	#
10232 #	  of the src operand.					#
10233 #	- t_unfl2() is provided to force the result sign to	#
10234 #	  positive which is the desired result for fetox().	#
10235 #################################################################
10236 	global		t_unfl
10237 t_unfl:
10238 	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10239 
10240 	tst.b		(%a0)			# is result pos or neg?
10241 	smi.b		%d1			# set d1 accordingly
10242 	bsr.l		unf_sub			# calc default unfl result
10243 	fmovm.x		(%a0),&0x80		# return default result in fp0
10244 
10245 	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10246 	rts
10247 
10248 # t_unfl2 ALWAYS tells unf_sub to create a positive result
10249 	global		t_unfl2
10250 t_unfl2:
10251 	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10252 
10253 	sf.b		%d1			# set d0 to represent positive
10254 	bsr.l		unf_sub			# calc default unfl result
10255 	fmovm.x		(%a0),&0x80		# return default result in fp0
10256 
10257 	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
10258 	rts
10259 
10260 #################################################################
10261 # OVFL exception:						#
10262 #	- This routine is for cases where even an EXOP isn't	#
10263 #	  large enough to hold the range of this result.	#
10264 #	- Return the default result to the proper precision	#
10265 #	  with the sign of this result being the same as that	#
10266 #	  of the src operand.					#
10267 #	- t_ovfl2() is provided to force the result sign to	#
10268 #	  positive which is the desired result for fcosh().	#
10269 #	- t_ovfl_sc() is provided for scale() which only sets	#
10270 #	  the inexact bits if the number is inexact for the	#
10271 #	  precision indicated.					#
10272 #################################################################
10273 
10274 	global		t_ovfl_sc
10275 t_ovfl_sc:
10276 	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10277 
10278 	mov.b		%d0,%d1			# fetch rnd mode/prec
10279 	andi.b		&0xc0,%d1		# extract rnd prec
10280 	beq.b		ovfl_work		# prec is extended
10281 
10282 	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
10283 	bmi.b		ovfl_sc_norm		# no
10284 
10285 # dst op is a DENORM. we have to normalize the mantissa to see if the
10286 # result would be inexact for the given precision. make a copy of the
10287 # dst so we don't screw up the version passed to us.
10288 	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10289 	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10290 	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10291 	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
10292 	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
10293 	bsr.l		norm			# normalize mantissa
10294 	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
10295 
10296 ovfl_sc_norm:
10297 	cmpi.b		%d1,&0x40		# is prec dbl?
10298 	bne.b		ovfl_sc_dbl		# no; sgl
10299 ovfl_sc_sgl:
10300 	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
10301 	bne.b		ovfl_sc_inx		# yes
10302 	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
10303 	bne.b		ovfl_sc_inx		# yes
10304 	bra.b		ovfl_work		# don't set INEX2
10305 ovfl_sc_dbl:
10306 	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
10307 	andi.l		&0x7ff,%d1		# dbl mantissa set?
10308 	beq.b		ovfl_work		# no; don't set INEX2
10309 ovfl_sc_inx:
10310 	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
10311 	bra.b		ovfl_work		# continue
10312 
10313 	global		t_ovfl
10314 t_ovfl:
10315 	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10316 
10317 ovfl_work:
10318 	tst.b		LOCAL_EX(%a0)		# what is the sign?
10319 	smi.b		%d1			# set d1 accordingly
10320 	bsr.l		ovf_res			# calc default ovfl result
10321 	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10322 	fmovm.x		(%a0),&0x80		# return default result in fp0
10323 
10324 	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10325 	rts
10326 
10327 # t_ovfl2 ALWAYS tells ovf_res to create a positive result
10328 	global		t_ovfl2
10329 t_ovfl2:
10330 	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10331 
10332 	sf.b		%d1			# clear sign flag for positive
10333 	bsr.l		ovf_res			# calc default ovfl result
10334 	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10335 	fmovm.x		(%a0),&0x80		# return default result in fp0
10336 
10337 	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10338 	rts
10339 
10340 #################################################################
10341 # t_catch():							#
10342 #	- the last operation of a transcendental emulation	#
10343 #	  routine may have caused an underflow or overflow.	#
10344 #	  we find out if this occurred by doing an fsave and	#
10345 #	  checking the exception bit. if one did occur, then we	#
10346 #	  jump to fgen_except() which creates the default	#
10347 #	  result and EXOP for us.				#
10348 #################################################################
10349 	global		t_catch
10350 t_catch:
10351 
10352 	fsave		-(%sp)
10353 	tst.b		0x2(%sp)
10354 	bmi.b		catch
10355 	add.l		&0xc,%sp
10356 
10357 #################################################################
10358 # INEX2 exception:						#
10359 #	- The inex2 and ainex bits are set.			#
10360 #################################################################
10361 	global		t_inx2
10362 t_inx2:
10363 	fblt.w		t_minx2
10364 	fbeq.w		inx2_zero
10365 
10366 	global		t_pinx2
10367 t_pinx2:
10368 	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10369 	rts
10370 
10371 	global		t_minx2
10372 t_minx2:
10373 	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10374 	rts
10375 
10376 inx2_zero:
10377 	mov.b		&z_bmask,FPSR_CC(%a6)
10378 	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10379 	rts
10380 
10381 # an underflow or overflow exception occurred.
10382 # we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10383 catch:
10384 	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
10385 catch2:
10386 	bsr.l		fgen_except
10387 	add.l		&0xc,%sp
10388 	rts
10389 
10390 	global		t_catch2
10391 t_catch2:
10392 
10393 	fsave		-(%sp)
10394 
10395 	tst.b		0x2(%sp)
10396 	bmi.b		catch2
10397 	add.l		&0xc,%sp
10398 
10399 	fmov.l		%fpsr,%d0
10400 	or.l		%d0,USER_FPSR(%a6)
10401 
10402 	rts
10403 
10404 #########################################################################
10405 
10406 #########################################################################
10407 # unf_res(): underflow default result calculation for transcendentals	#
10408 #									#
10409 # INPUT:								#
10410 #	d0   : rnd mode,precision					#
10411 #	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
10412 # OUTPUT:								#
10413 #	a0   : points to result (in instruction memory)			#
10414 #########################################################################
10415 unf_sub:
10416 	ori.l		&unfinx_mask,USER_FPSR(%a6)
10417 
10418 	andi.w		&0x10,%d1		# keep sign bit in 4th spot
10419 
10420 	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
10421 	andi.b		&0xf,%d0		# strip hi rnd mode bit
10422 	or.b		%d1,%d0			# concat {sgn,mode,prec}
10423 
10424 	mov.l		%d0,%d1			# make a copy
10425 	lsl.b		&0x1,%d1		# mult index 2 by 2
10426 
10427 	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10428 	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10429 	rts
10430 
10431 tbl_unf_cc:
10432 	byte		0x4, 0x4, 0x4, 0x0
10433 	byte		0x4, 0x4, 0x4, 0x0
10434 	byte		0x4, 0x4, 0x4, 0x0
10435 	byte		0x0, 0x0, 0x0, 0x0
10436 	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10437 	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10438 	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10439 
10440 tbl_unf_result:
10441 	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10442 	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10443 	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10444 	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10445 
10446 	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10447 	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10448 	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10449 	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10450 
10451 	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10452 	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10453 	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10454 	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10455 
10456 	long		0x0,0x0,0x0,0x0
10457 	long		0x0,0x0,0x0,0x0
10458 	long		0x0,0x0,0x0,0x0
10459 	long		0x0,0x0,0x0,0x0
10460 
10461 	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10462 	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10463 	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10464 	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10465 
10466 	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10467 	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10468 	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10469 	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10470 
10471 	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10472 	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10473 	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10474 	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10475 
10476 ############################################################
10477 
10478 #########################################################################
10479 # src_zero(): Return signed zero according to sign of src operand.	#
10480 #########################################################################
10481 	global		src_zero
10482 src_zero:
10483 	tst.b		SRC_EX(%a0)		# get sign of src operand
10484 	bmi.b		ld_mzero		# if neg, load neg zero
10485 
10486 #
10487 # ld_pzero(): return a positive zero.
10488 #
10489 	global		ld_pzero
10490 ld_pzero:
10491 	fmov.s		&0x00000000,%fp0	# load +0
10492 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10493 	rts
10494 
10495 # ld_mzero(): return a negative zero.
10496 	global		ld_mzero
10497 ld_mzero:
10498 	fmov.s		&0x80000000,%fp0	# load -0
10499 	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10500 	rts
10501 
10502 #########################################################################
10503 # dst_zero(): Return signed zero according to sign of dst operand.	#
10504 #########################################################################
10505 	global		dst_zero
10506 dst_zero:
10507 	tst.b		DST_EX(%a1)		# get sign of dst operand
10508 	bmi.b		ld_mzero		# if neg, load neg zero
10509 	bra.b		ld_pzero		# load positive zero
10510 
10511 #########################################################################
10512 # src_inf(): Return signed inf according to sign of src operand.	#
10513 #########################################################################
10514 	global		src_inf
10515 src_inf:
10516 	tst.b		SRC_EX(%a0)		# get sign of src operand
10517 	bmi.b		ld_minf			# if negative branch
10518 
10519 #
10520 # ld_pinf(): return a positive infinity.
10521 #
10522 	global		ld_pinf
10523 ld_pinf:
10524 	fmov.s		&0x7f800000,%fp0	# load +INF
10525 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
10526 	rts
10527 
10528 #
10529 # ld_minf():return a negative infinity.
10530 #
10531 	global		ld_minf
10532 ld_minf:
10533 	fmov.s		&0xff800000,%fp0	# load -INF
10534 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10535 	rts
10536 
10537 #########################################################################
10538 # dst_inf(): Return signed inf according to sign of dst operand.	#
10539 #########################################################################
10540 	global		dst_inf
10541 dst_inf:
10542 	tst.b		DST_EX(%a1)		# get sign of dst operand
10543 	bmi.b		ld_minf			# if negative branch
10544 	bra.b		ld_pinf
10545 
10546 	global		szr_inf
10547 #################################################################
10548 # szr_inf(): Return +ZERO for a negative src operand or		#
10549 #	            +INF for a positive src operand.		#
10550 #	     Routine used for fetox, ftwotox, and ftentox.	#
10551 #################################################################
10552 szr_inf:
10553 	tst.b		SRC_EX(%a0)		# check sign of source
10554 	bmi.b		ld_pzero
10555 	bra.b		ld_pinf
10556 
10557 #########################################################################
10558 # sopr_inf(): Return +INF for a positive src operand or			#
10559 #	      jump to operand error routine for a negative src operand.	#
10560 #	      Routine used for flogn, flognp1, flog10, and flog2.	#
10561 #########################################################################
10562 	global		sopr_inf
10563 sopr_inf:
10564 	tst.b		SRC_EX(%a0)		# check sign of source
10565 	bmi.w		t_operr
10566 	bra.b		ld_pinf
10567 
10568 #################################################################
10569 # setoxm1i(): Return minus one for a negative src operand or	#
10570 #	      positive infinity for a positive src operand.	#
10571 #	      Routine used for fetoxm1.				#
10572 #################################################################
10573 	global		setoxm1i
10574 setoxm1i:
10575 	tst.b		SRC_EX(%a0)		# check sign of source
10576 	bmi.b		ld_mone
10577 	bra.b		ld_pinf
10578 
10579 #########################################################################
10580 # src_one(): Return signed one according to sign of src operand.	#
10581 #########################################################################
10582 	global		src_one
10583 src_one:
10584 	tst.b		SRC_EX(%a0)		# check sign of source
10585 	bmi.b		ld_mone
10586 
10587 #
10588 # ld_pone(): return positive one.
10589 #
10590 	global		ld_pone
10591 ld_pone:
10592 	fmov.s		&0x3f800000,%fp0	# load +1
10593 	clr.b		FPSR_CC(%a6)
10594 	rts
10595 
10596 #
10597 # ld_mone(): return negative one.
10598 #
10599 	global		ld_mone
10600 ld_mone:
10601 	fmov.s		&0xbf800000,%fp0	# load -1
10602 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
10603 	rts
10604 
10605 ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
10606 mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
10607 
10608 #################################################################
10609 # spi_2(): Return signed PI/2 according to sign of src operand.	#
10610 #################################################################
10611 	global		spi_2
10612 spi_2:
10613 	tst.b		SRC_EX(%a0)		# check sign of source
10614 	bmi.b		ld_mpi2
10615 
10616 #
10617 # ld_ppi2(): return positive PI/2.
10618 #
10619 	global		ld_ppi2
10620 ld_ppi2:
10621 	fmov.l		%d0,%fpcr
10622 	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
10623 	bra.w		t_pinx2			# set INEX2
10624 
10625 #
10626 # ld_mpi2(): return negative PI/2.
10627 #
10628 	global		ld_mpi2
10629 ld_mpi2:
10630 	fmov.l		%d0,%fpcr
10631 	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
10632 	bra.w		t_minx2			# set INEX2
10633 
10634 ####################################################
10635 # The following routines give support for fsincos. #
10636 ####################################################
10637 
10638 #
10639 # ssincosz(): When the src operand is ZERO, store a one in the
10640 #	      cosine register and return a ZERO in fp0 w/ the same sign
10641 #	      as the src operand.
10642 #
10643 	global		ssincosz
10644 ssincosz:
10645 	fmov.s		&0x3f800000,%fp1
10646 	tst.b		SRC_EX(%a0)		# test sign
10647 	bpl.b		sincoszp
10648 	fmov.s		&0x80000000,%fp0	# return sin result in fp0
10649 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
10650 	bra.b		sto_cos			# store cosine result
10651 sincoszp:
10652 	fmov.s		&0x00000000,%fp0	# return sin result in fp0
10653 	mov.b		&z_bmask,FPSR_CC(%a6)
10654 	bra.b		sto_cos			# store cosine result
10655 
10656 #
10657 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10658 #	      register and jump to the operand error routine for negative
10659 #	      src operands.
10660 #
10661 	global		ssincosi
10662 ssincosi:
10663 	fmov.x		qnan(%pc),%fp1		# load NAN
10664 	bsr.l		sto_cos			# store cosine result
10665 	bra.w		t_operr
10666 
10667 #
10668 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10669 #		 register and branch to the src QNAN routine.
10670 #
10671 	global		ssincosqnan
10672 ssincosqnan:
10673 	fmov.x		LOCAL_EX(%a0),%fp1
10674 	bsr.l		sto_cos
10675 	bra.w		src_qnan
10676 
10677 #
10678 # ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10679 #		 in the cosine register and branch to the src SNAN routine.
10680 #
10681 	global		ssincossnan
10682 ssincossnan:
10683 	fmov.x		LOCAL_EX(%a0),%fp1
10684 	bsr.l		sto_cos
10685 	bra.w		src_snan
10686 
10687 ########################################################################
10688 
10689 #########################################################################
10690 # sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
10691 #	     fp1 holds the result of the cosine portion of ssincos().	#
10692 #	     the value in fp1 will not take any exceptions when moved.	#
10693 # INPUT:								#
10694 #	fp1 : fp value to store						#
10695 # MODIFIED:								#
10696 #	d0								#
10697 #########################################################################
10698 	global		sto_cos
10699 sto_cos:
10700 	mov.b		1+EXC_CMDREG(%a6),%d0
10701 	andi.w		&0x7,%d0
10702 	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
10703 	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
10704 
10705 tbl_sto_cos:
10706 	short		sto_cos_0 - tbl_sto_cos
10707 	short		sto_cos_1 - tbl_sto_cos
10708 	short		sto_cos_2 - tbl_sto_cos
10709 	short		sto_cos_3 - tbl_sto_cos
10710 	short		sto_cos_4 - tbl_sto_cos
10711 	short		sto_cos_5 - tbl_sto_cos
10712 	short		sto_cos_6 - tbl_sto_cos
10713 	short		sto_cos_7 - tbl_sto_cos
10714 
10715 sto_cos_0:
10716 	fmovm.x		&0x40,EXC_FP0(%a6)
10717 	rts
10718 sto_cos_1:
10719 	fmovm.x		&0x40,EXC_FP1(%a6)
10720 	rts
10721 sto_cos_2:
10722 	fmov.x		%fp1,%fp2
10723 	rts
10724 sto_cos_3:
10725 	fmov.x		%fp1,%fp3
10726 	rts
10727 sto_cos_4:
10728 	fmov.x		%fp1,%fp4
10729 	rts
10730 sto_cos_5:
10731 	fmov.x		%fp1,%fp5
10732 	rts
10733 sto_cos_6:
10734 	fmov.x		%fp1,%fp6
10735 	rts
10736 sto_cos_7:
10737 	fmov.x		%fp1,%fp7
10738 	rts
10739 
10740 ##################################################################
10741 	global		smod_sdnrm
10742 	global		smod_snorm
10743 smod_sdnrm:
10744 smod_snorm:
10745 	mov.b		DTAG(%a6),%d1
10746 	beq.l		smod
10747 	cmpi.b		%d1,&ZERO
10748 	beq.w		smod_zro
10749 	cmpi.b		%d1,&INF
10750 	beq.l		t_operr
10751 	cmpi.b		%d1,&DENORM
10752 	beq.l		smod
10753 	cmpi.b		%d1,&SNAN
10754 	beq.l		dst_snan
10755 	bra.l		dst_qnan
10756 
10757 	global		smod_szero
10758 smod_szero:
10759 	mov.b		DTAG(%a6),%d1
10760 	beq.l		t_operr
10761 	cmpi.b		%d1,&ZERO
10762 	beq.l		t_operr
10763 	cmpi.b		%d1,&INF
10764 	beq.l		t_operr
10765 	cmpi.b		%d1,&DENORM
10766 	beq.l		t_operr
10767 	cmpi.b		%d1,&QNAN
10768 	beq.l		dst_qnan
10769 	bra.l		dst_snan
10770 
10771 	global		smod_sinf
10772 smod_sinf:
10773 	mov.b		DTAG(%a6),%d1
10774 	beq.l		smod_fpn
10775 	cmpi.b		%d1,&ZERO
10776 	beq.l		smod_zro
10777 	cmpi.b		%d1,&INF
10778 	beq.l		t_operr
10779 	cmpi.b		%d1,&DENORM
10780 	beq.l		smod_fpn
10781 	cmpi.b		%d1,&QNAN
10782 	beq.l		dst_qnan
10783 	bra.l		dst_snan
10784 
10785 smod_zro:
10786 srem_zro:
10787 	mov.b		SRC_EX(%a0),%d1		# get src sign
10788 	mov.b		DST_EX(%a1),%d0		# get dst sign
10789 	eor.b		%d0,%d1			# get qbyte sign
10790 	andi.b		&0x80,%d1
10791 	mov.b		%d1,FPSR_QBYTE(%a6)
10792 	tst.b		%d0
10793 	bpl.w		ld_pzero
10794 	bra.w		ld_mzero
10795 
10796 smod_fpn:
10797 srem_fpn:
10798 	clr.b		FPSR_QBYTE(%a6)
10799 	mov.l		%d0,-(%sp)
10800 	mov.b		SRC_EX(%a0),%d1		# get src sign
10801 	mov.b		DST_EX(%a1),%d0		# get dst sign
10802 	eor.b		%d0,%d1			# get qbyte sign
10803 	andi.b		&0x80,%d1
10804 	mov.b		%d1,FPSR_QBYTE(%a6)
10805 	cmpi.b		DTAG(%a6),&DENORM
10806 	bne.b		smod_nrm
10807 	lea		DST(%a1),%a0
10808 	mov.l		(%sp)+,%d0
10809 	bra		t_resdnrm
10810 smod_nrm:
10811 	fmov.l		(%sp)+,%fpcr
10812 	fmov.x		DST(%a1),%fp0
10813 	tst.b		DST_EX(%a1)
10814 	bmi.b		smod_nrm_neg
10815 	rts
10816 
10817 smod_nrm_neg:
10818 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
10819 	rts
10820 
10821 #########################################################################
10822 	global		srem_snorm
10823 	global		srem_sdnrm
10824 srem_sdnrm:
10825 srem_snorm:
10826 	mov.b		DTAG(%a6),%d1
10827 	beq.l		srem
10828 	cmpi.b		%d1,&ZERO
10829 	beq.w		srem_zro
10830 	cmpi.b		%d1,&INF
10831 	beq.l		t_operr
10832 	cmpi.b		%d1,&DENORM
10833 	beq.l		srem
10834 	cmpi.b		%d1,&QNAN
10835 	beq.l		dst_qnan
10836 	bra.l		dst_snan
10837 
10838 	global		srem_szero
10839 srem_szero:
10840 	mov.b		DTAG(%a6),%d1
10841 	beq.l		t_operr
10842 	cmpi.b		%d1,&ZERO
10843 	beq.l		t_operr
10844 	cmpi.b		%d1,&INF
10845 	beq.l		t_operr
10846 	cmpi.b		%d1,&DENORM
10847 	beq.l		t_operr
10848 	cmpi.b		%d1,&QNAN
10849 	beq.l		dst_qnan
10850 	bra.l		dst_snan
10851 
10852 	global		srem_sinf
10853 srem_sinf:
10854 	mov.b		DTAG(%a6),%d1
10855 	beq.w		srem_fpn
10856 	cmpi.b		%d1,&ZERO
10857 	beq.w		srem_zro
10858 	cmpi.b		%d1,&INF
10859 	beq.l		t_operr
10860 	cmpi.b		%d1,&DENORM
10861 	beq.l		srem_fpn
10862 	cmpi.b		%d1,&QNAN
10863 	beq.l		dst_qnan
10864 	bra.l		dst_snan
10865 
10866 #########################################################################
10867 	global		sscale_snorm
10868 	global		sscale_sdnrm
10869 sscale_snorm:
10870 sscale_sdnrm:
10871 	mov.b		DTAG(%a6),%d1
10872 	beq.l		sscale
10873 	cmpi.b		%d1,&ZERO
10874 	beq.l		dst_zero
10875 	cmpi.b		%d1,&INF
10876 	beq.l		dst_inf
10877 	cmpi.b		%d1,&DENORM
10878 	beq.l		sscale
10879 	cmpi.b		%d1,&QNAN
10880 	beq.l		dst_qnan
10881 	bra.l		dst_snan
10882 
10883 	global		sscale_szero
10884 sscale_szero:
10885 	mov.b		DTAG(%a6),%d1
10886 	beq.l		sscale
10887 	cmpi.b		%d1,&ZERO
10888 	beq.l		dst_zero
10889 	cmpi.b		%d1,&INF
10890 	beq.l		dst_inf
10891 	cmpi.b		%d1,&DENORM
10892 	beq.l		sscale
10893 	cmpi.b		%d1,&QNAN
10894 	beq.l		dst_qnan
10895 	bra.l		dst_snan
10896 
10897 	global		sscale_sinf
10898 sscale_sinf:
10899 	mov.b		DTAG(%a6),%d1
10900 	beq.l		t_operr
10901 	cmpi.b		%d1,&QNAN
10902 	beq.l		dst_qnan
10903 	cmpi.b		%d1,&SNAN
10904 	beq.l		dst_snan
10905 	bra.l		t_operr
10906 
10907 ########################################################################
10908 
10909 #
10910 # sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10911 #
10912 	global		sop_sqnan
10913 sop_sqnan:
10914 	mov.b		DTAG(%a6),%d1
10915 	cmpi.b		%d1,&QNAN
10916 	beq.b		dst_qnan
10917 	cmpi.b		%d1,&SNAN
10918 	beq.b		dst_snan
10919 	bra.b		src_qnan
10920 
10921 #
10922 # sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10923 #
10924 	global		sop_ssnan
10925 sop_ssnan:
10926 	mov.b		DTAG(%a6),%d1
10927 	cmpi.b		%d1,&QNAN
10928 	beq.b		dst_qnan_src_snan
10929 	cmpi.b		%d1,&SNAN
10930 	beq.b		dst_snan
10931 	bra.b		src_snan
10932 
10933 dst_qnan_src_snan:
10934 	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10935 	bra.b		dst_qnan
10936 
10937 #
10938 # dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10939 #
10940 	global		dst_snan
10941 dst_snan:
10942 	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
10943 	fmov.l		%fpsr,%d0		# catch resulting status
10944 	or.l		%d0,USER_FPSR(%a6)	# store status
10945 	rts
10946 
10947 #
10948 # dst_qnan(): Return the dst QNAN.
10949 #
10950 	global		dst_qnan
10951 dst_qnan:
10952 	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
10953 	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
10954 	bmi.b		dst_qnan_m
10955 dst_qnan_p:
10956 	mov.b		&nan_bmask,FPSR_CC(%a6)
10957 	rts
10958 dst_qnan_m:
10959 	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10960 	rts
10961 
10962 #
10963 # src_snan(): Return the src SNAN w/ the SNAN bit set.
10964 #
10965 	global		src_snan
10966 src_snan:
10967 	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
10968 	fmov.l		%fpsr,%d0		# catch resulting status
10969 	or.l		%d0,USER_FPSR(%a6)	# store status
10970 	rts
10971 
10972 #
10973 # src_qnan(): Return the src QNAN.
10974 #
10975 	global		src_qnan
10976 src_qnan:
10977 	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
10978 	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
10979 	bmi.b		dst_qnan_m
10980 src_qnan_p:
10981 	mov.b		&nan_bmask,FPSR_CC(%a6)
10982 	rts
10983 src_qnan_m:
10984 	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10985 	rts
10986 
10987 #
10988 # fkern2.s:
10989 #	These entry points are used by the exception handler
10990 # routines where an instruction is selected by an index into
10991 # a large jump table corresponding to a given instruction which
10992 # has been decoded. Flow continues here where we now decode
10993 # further according to the source operand type.
10994 #
10995 
10996 	global		fsinh
10997 fsinh:
10998 	mov.b		STAG(%a6),%d1
10999 	beq.l		ssinh
11000 	cmpi.b		%d1,&ZERO
11001 	beq.l		src_zero
11002 	cmpi.b		%d1,&INF
11003 	beq.l		src_inf
11004 	cmpi.b		%d1,&DENORM
11005 	beq.l		ssinhd
11006 	cmpi.b		%d1,&QNAN
11007 	beq.l		src_qnan
11008 	bra.l		src_snan
11009 
11010 	global		flognp1
11011 flognp1:
11012 	mov.b		STAG(%a6),%d1
11013 	beq.l		slognp1
11014 	cmpi.b		%d1,&ZERO
11015 	beq.l		src_zero
11016 	cmpi.b		%d1,&INF
11017 	beq.l		sopr_inf
11018 	cmpi.b		%d1,&DENORM
11019 	beq.l		slognp1d
11020 	cmpi.b		%d1,&QNAN
11021 	beq.l		src_qnan
11022 	bra.l		src_snan
11023 
11024 	global		fetoxm1
11025 fetoxm1:
11026 	mov.b		STAG(%a6),%d1
11027 	beq.l		setoxm1
11028 	cmpi.b		%d1,&ZERO
11029 	beq.l		src_zero
11030 	cmpi.b		%d1,&INF
11031 	beq.l		setoxm1i
11032 	cmpi.b		%d1,&DENORM
11033 	beq.l		setoxm1d
11034 	cmpi.b		%d1,&QNAN
11035 	beq.l		src_qnan
11036 	bra.l		src_snan
11037 
11038 	global		ftanh
11039 ftanh:
11040 	mov.b		STAG(%a6),%d1
11041 	beq.l		stanh
11042 	cmpi.b		%d1,&ZERO
11043 	beq.l		src_zero
11044 	cmpi.b		%d1,&INF
11045 	beq.l		src_one
11046 	cmpi.b		%d1,&DENORM
11047 	beq.l		stanhd
11048 	cmpi.b		%d1,&QNAN
11049 	beq.l		src_qnan
11050 	bra.l		src_snan
11051 
11052 	global		fatan
11053 fatan:
11054 	mov.b		STAG(%a6),%d1
11055 	beq.l		satan
11056 	cmpi.b		%d1,&ZERO
11057 	beq.l		src_zero
11058 	cmpi.b		%d1,&INF
11059 	beq.l		spi_2
11060 	cmpi.b		%d1,&DENORM
11061 	beq.l		satand
11062 	cmpi.b		%d1,&QNAN
11063 	beq.l		src_qnan
11064 	bra.l		src_snan
11065 
11066 	global		fasin
11067 fasin:
11068 	mov.b		STAG(%a6),%d1
11069 	beq.l		sasin
11070 	cmpi.b		%d1,&ZERO
11071 	beq.l		src_zero
11072 	cmpi.b		%d1,&INF
11073 	beq.l		t_operr
11074 	cmpi.b		%d1,&DENORM
11075 	beq.l		sasind
11076 	cmpi.b		%d1,&QNAN
11077 	beq.l		src_qnan
11078 	bra.l		src_snan
11079 
11080 	global		fatanh
11081 fatanh:
11082 	mov.b		STAG(%a6),%d1
11083 	beq.l		satanh
11084 	cmpi.b		%d1,&ZERO
11085 	beq.l		src_zero
11086 	cmpi.b		%d1,&INF
11087 	beq.l		t_operr
11088 	cmpi.b		%d1,&DENORM
11089 	beq.l		satanhd
11090 	cmpi.b		%d1,&QNAN
11091 	beq.l		src_qnan
11092 	bra.l		src_snan
11093 
11094 	global		fsine
11095 fsine:
11096 	mov.b		STAG(%a6),%d1
11097 	beq.l		ssin
11098 	cmpi.b		%d1,&ZERO
11099 	beq.l		src_zero
11100 	cmpi.b		%d1,&INF
11101 	beq.l		t_operr
11102 	cmpi.b		%d1,&DENORM
11103 	beq.l		ssind
11104 	cmpi.b		%d1,&QNAN
11105 	beq.l		src_qnan
11106 	bra.l		src_snan
11107 
11108 	global		ftan
11109 ftan:
11110 	mov.b		STAG(%a6),%d1
11111 	beq.l		stan
11112 	cmpi.b		%d1,&ZERO
11113 	beq.l		src_zero
11114 	cmpi.b		%d1,&INF
11115 	beq.l		t_operr
11116 	cmpi.b		%d1,&DENORM
11117 	beq.l		stand
11118 	cmpi.b		%d1,&QNAN
11119 	beq.l		src_qnan
11120 	bra.l		src_snan
11121 
11122 	global		fetox
11123 fetox:
11124 	mov.b		STAG(%a6),%d1
11125 	beq.l		setox
11126 	cmpi.b		%d1,&ZERO
11127 	beq.l		ld_pone
11128 	cmpi.b		%d1,&INF
11129 	beq.l		szr_inf
11130 	cmpi.b		%d1,&DENORM
11131 	beq.l		setoxd
11132 	cmpi.b		%d1,&QNAN
11133 	beq.l		src_qnan
11134 	bra.l		src_snan
11135 
11136 	global		ftwotox
11137 ftwotox:
11138 	mov.b		STAG(%a6),%d1
11139 	beq.l		stwotox
11140 	cmpi.b		%d1,&ZERO
11141 	beq.l		ld_pone
11142 	cmpi.b		%d1,&INF
11143 	beq.l		szr_inf
11144 	cmpi.b		%d1,&DENORM
11145 	beq.l		stwotoxd
11146 	cmpi.b		%d1,&QNAN
11147 	beq.l		src_qnan
11148 	bra.l		src_snan
11149 
11150 	global		ftentox
11151 ftentox:
11152 	mov.b		STAG(%a6),%d1
11153 	beq.l		stentox
11154 	cmpi.b		%d1,&ZERO
11155 	beq.l		ld_pone
11156 	cmpi.b		%d1,&INF
11157 	beq.l		szr_inf
11158 	cmpi.b		%d1,&DENORM
11159 	beq.l		stentoxd
11160 	cmpi.b		%d1,&QNAN
11161 	beq.l		src_qnan
11162 	bra.l		src_snan
11163 
11164 	global		flogn
11165 flogn:
11166 	mov.b		STAG(%a6),%d1
11167 	beq.l		slogn
11168 	cmpi.b		%d1,&ZERO
11169 	beq.l		t_dz2
11170 	cmpi.b		%d1,&INF
11171 	beq.l		sopr_inf
11172 	cmpi.b		%d1,&DENORM
11173 	beq.l		slognd
11174 	cmpi.b		%d1,&QNAN
11175 	beq.l		src_qnan
11176 	bra.l		src_snan
11177 
11178 	global		flog10
11179 flog10:
11180 	mov.b		STAG(%a6),%d1
11181 	beq.l		slog10
11182 	cmpi.b		%d1,&ZERO
11183 	beq.l		t_dz2
11184 	cmpi.b		%d1,&INF
11185 	beq.l		sopr_inf
11186 	cmpi.b		%d1,&DENORM
11187 	beq.l		slog10d
11188 	cmpi.b		%d1,&QNAN
11189 	beq.l		src_qnan
11190 	bra.l		src_snan
11191 
11192 	global		flog2
11193 flog2:
11194 	mov.b		STAG(%a6),%d1
11195 	beq.l		slog2
11196 	cmpi.b		%d1,&ZERO
11197 	beq.l		t_dz2
11198 	cmpi.b		%d1,&INF
11199 	beq.l		sopr_inf
11200 	cmpi.b		%d1,&DENORM
11201 	beq.l		slog2d
11202 	cmpi.b		%d1,&QNAN
11203 	beq.l		src_qnan
11204 	bra.l		src_snan
11205 
11206 	global		fcosh
11207 fcosh:
11208 	mov.b		STAG(%a6),%d1
11209 	beq.l		scosh
11210 	cmpi.b		%d1,&ZERO
11211 	beq.l		ld_pone
11212 	cmpi.b		%d1,&INF
11213 	beq.l		ld_pinf
11214 	cmpi.b		%d1,&DENORM
11215 	beq.l		scoshd
11216 	cmpi.b		%d1,&QNAN
11217 	beq.l		src_qnan
11218 	bra.l		src_snan
11219 
11220 	global		facos
11221 facos:
11222 	mov.b		STAG(%a6),%d1
11223 	beq.l		sacos
11224 	cmpi.b		%d1,&ZERO
11225 	beq.l		ld_ppi2
11226 	cmpi.b		%d1,&INF
11227 	beq.l		t_operr
11228 	cmpi.b		%d1,&DENORM
11229 	beq.l		sacosd
11230 	cmpi.b		%d1,&QNAN
11231 	beq.l		src_qnan
11232 	bra.l		src_snan
11233 
11234 	global		fcos
11235 fcos:
11236 	mov.b		STAG(%a6),%d1
11237 	beq.l		scos
11238 	cmpi.b		%d1,&ZERO
11239 	beq.l		ld_pone
11240 	cmpi.b		%d1,&INF
11241 	beq.l		t_operr
11242 	cmpi.b		%d1,&DENORM
11243 	beq.l		scosd
11244 	cmpi.b		%d1,&QNAN
11245 	beq.l		src_qnan
11246 	bra.l		src_snan
11247 
11248 	global		fgetexp
11249 fgetexp:
11250 	mov.b		STAG(%a6),%d1
11251 	beq.l		sgetexp
11252 	cmpi.b		%d1,&ZERO
11253 	beq.l		src_zero
11254 	cmpi.b		%d1,&INF
11255 	beq.l		t_operr
11256 	cmpi.b		%d1,&DENORM
11257 	beq.l		sgetexpd
11258 	cmpi.b		%d1,&QNAN
11259 	beq.l		src_qnan
11260 	bra.l		src_snan
11261 
11262 	global		fgetman
11263 fgetman:
11264 	mov.b		STAG(%a6),%d1
11265 	beq.l		sgetman
11266 	cmpi.b		%d1,&ZERO
11267 	beq.l		src_zero
11268 	cmpi.b		%d1,&INF
11269 	beq.l		t_operr
11270 	cmpi.b		%d1,&DENORM
11271 	beq.l		sgetmand
11272 	cmpi.b		%d1,&QNAN
11273 	beq.l		src_qnan
11274 	bra.l		src_snan
11275 
11276 	global		fsincos
11277 fsincos:
11278 	mov.b		STAG(%a6),%d1
11279 	beq.l		ssincos
11280 	cmpi.b		%d1,&ZERO
11281 	beq.l		ssincosz
11282 	cmpi.b		%d1,&INF
11283 	beq.l		ssincosi
11284 	cmpi.b		%d1,&DENORM
11285 	beq.l		ssincosd
11286 	cmpi.b		%d1,&QNAN
11287 	beq.l		ssincosqnan
11288 	bra.l		ssincossnan
11289 
11290 	global		fmod
11291 fmod:
11292 	mov.b		STAG(%a6),%d1
11293 	beq.l		smod_snorm
11294 	cmpi.b		%d1,&ZERO
11295 	beq.l		smod_szero
11296 	cmpi.b		%d1,&INF
11297 	beq.l		smod_sinf
11298 	cmpi.b		%d1,&DENORM
11299 	beq.l		smod_sdnrm
11300 	cmpi.b		%d1,&QNAN
11301 	beq.l		sop_sqnan
11302 	bra.l		sop_ssnan
11303 
11304 	global		frem
11305 frem:
11306 	mov.b		STAG(%a6),%d1
11307 	beq.l		srem_snorm
11308 	cmpi.b		%d1,&ZERO
11309 	beq.l		srem_szero
11310 	cmpi.b		%d1,&INF
11311 	beq.l		srem_sinf
11312 	cmpi.b		%d1,&DENORM
11313 	beq.l		srem_sdnrm
11314 	cmpi.b		%d1,&QNAN
11315 	beq.l		sop_sqnan
11316 	bra.l		sop_ssnan
11317 
11318 	global		fscale
11319 fscale:
11320 	mov.b		STAG(%a6),%d1
11321 	beq.l		sscale_snorm
11322 	cmpi.b		%d1,&ZERO
11323 	beq.l		sscale_szero
11324 	cmpi.b		%d1,&INF
11325 	beq.l		sscale_sinf
11326 	cmpi.b		%d1,&DENORM
11327 	beq.l		sscale_sdnrm
11328 	cmpi.b		%d1,&QNAN
11329 	beq.l		sop_sqnan
11330 	bra.l		sop_ssnan
11331 
11332 #########################################################################
11333 # XDEF ****************************************************************	#
11334 #	fgen_except(): catch an exception during transcendental		#
11335 #		       emulation					#
11336 #									#
11337 # XREF ****************************************************************	#
11338 #	fmul() - emulate a multiply instruction				#
11339 #	fadd() - emulate an add instruction				#
11340 #	fin() - emulate an fmove instruction				#
11341 #									#
11342 # INPUT ***************************************************************	#
11343 #	fp0 = destination operand					#
11344 #	d0  = type of instruction that took exception			#
11345 #	fsave frame = source operand					#
11346 #									#
11347 # OUTPUT **************************************************************	#
11348 #	fp0 = result							#
11349 #	fp1 = EXOP							#
11350 #									#
11351 # ALGORITHM ***********************************************************	#
11352 #	An exception occurred on the last instruction of the		#
11353 # transcendental emulation. hopefully, this won't be happening much	#
11354 # because it will be VERY slow.						#
11355 #	The only exceptions capable of passing through here are		#
11356 # Overflow, Underflow, and Unsupported Data Type.			#
11357 #									#
11358 #########################################################################
11359 
11360 	global		fgen_except
11361 fgen_except:
11362 	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
11363 	beq.b		fge_unsupp		# yes
11364 
11365 	mov.b		&NORM,STAG(%a6)
11366 
11367 fge_cont:
11368 	mov.b		&NORM,DTAG(%a6)
11369 
11370 # ok, I have a problem with putting the dst op at FP_DST. the emulation
11371 # routines aren't supposed to alter the operands but we've just squashed
11372 # FP_DST here...
11373 
11374 # 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11375 # then a potential bug. to begin with, only the dyadic functions
11376 # frem,fmod, and fscale would get the dst trashed here. But, for
11377 # the 060SP, the FP_DST is never used again anyways.
11378 	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
11379 
11380 	lea		0x4(%sp),%a0		# pass: ptr to src op
11381 	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
11382 
11383 	cmpi.b		%d1,&FMOV_OP
11384 	beq.b		fge_fin			# it was an "fmov"
11385 	cmpi.b		%d1,&FADD_OP
11386 	beq.b		fge_fadd		# it was an "fadd"
11387 fge_fmul:
11388 	bsr.l		fmul
11389 	rts
11390 fge_fadd:
11391 	bsr.l		fadd
11392 	rts
11393 fge_fin:
11394 	bsr.l		fin
11395 	rts
11396 
11397 fge_unsupp:
11398 	mov.b		&DENORM,STAG(%a6)
11399 	bra.b		fge_cont
11400 
11401 #
11402 # This table holds the offsets of the emulation routines for each individual
11403 # math operation relative to the address of this table. Included are
11404 # routines like fadd/fmul/fabs as well as the transcendentals.
11405 # The location within the table is determined by the extension bits of the
11406 # operation longword.
11407 #
11408 
11409 	swbeg		&109
11410 tbl_unsupp:
11411 	long		fin		- tbl_unsupp	# 00: fmove
11412 	long		fint		- tbl_unsupp	# 01: fint
11413 	long		fsinh		- tbl_unsupp	# 02: fsinh
11414 	long		fintrz		- tbl_unsupp	# 03: fintrz
11415 	long		fsqrt		- tbl_unsupp	# 04: fsqrt
11416 	long		tbl_unsupp	- tbl_unsupp
11417 	long		flognp1		- tbl_unsupp	# 06: flognp1
11418 	long		tbl_unsupp	- tbl_unsupp
11419 	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
11420 	long		ftanh		- tbl_unsupp	# 09: ftanh
11421 	long		fatan		- tbl_unsupp	# 0a: fatan
11422 	long		tbl_unsupp	- tbl_unsupp
11423 	long		fasin		- tbl_unsupp	# 0c: fasin
11424 	long		fatanh		- tbl_unsupp	# 0d: fatanh
11425 	long		fsine		- tbl_unsupp	# 0e: fsin
11426 	long		ftan		- tbl_unsupp	# 0f: ftan
11427 	long		fetox		- tbl_unsupp	# 10: fetox
11428 	long		ftwotox		- tbl_unsupp	# 11: ftwotox
11429 	long		ftentox		- tbl_unsupp	# 12: ftentox
11430 	long		tbl_unsupp	- tbl_unsupp
11431 	long		flogn		- tbl_unsupp	# 14: flogn
11432 	long		flog10		- tbl_unsupp	# 15: flog10
11433 	long		flog2		- tbl_unsupp	# 16: flog2
11434 	long		tbl_unsupp	- tbl_unsupp
11435 	long		fabs		- tbl_unsupp	# 18: fabs
11436 	long		fcosh		- tbl_unsupp	# 19: fcosh
11437 	long		fneg		- tbl_unsupp	# 1a: fneg
11438 	long		tbl_unsupp	- tbl_unsupp
11439 	long		facos		- tbl_unsupp	# 1c: facos
11440 	long		fcos		- tbl_unsupp	# 1d: fcos
11441 	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
11442 	long		fgetman		- tbl_unsupp	# 1f: fgetman
11443 	long		fdiv		- tbl_unsupp	# 20: fdiv
11444 	long		fmod		- tbl_unsupp	# 21: fmod
11445 	long		fadd		- tbl_unsupp	# 22: fadd
11446 	long		fmul		- tbl_unsupp	# 23: fmul
11447 	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
11448 	long		frem		- tbl_unsupp	# 25: frem
11449 	long		fscale		- tbl_unsupp	# 26: fscale
11450 	long		fsglmul		- tbl_unsupp	# 27: fsglmul
11451 	long		fsub		- tbl_unsupp	# 28: fsub
11452 	long		tbl_unsupp	- tbl_unsupp
11453 	long		tbl_unsupp	- tbl_unsupp
11454 	long		tbl_unsupp	- tbl_unsupp
11455 	long		tbl_unsupp	- tbl_unsupp
11456 	long		tbl_unsupp	- tbl_unsupp
11457 	long		tbl_unsupp	- tbl_unsupp
11458 	long		tbl_unsupp	- tbl_unsupp
11459 	long		fsincos		- tbl_unsupp	# 30: fsincos
11460 	long		fsincos		- tbl_unsupp	# 31: fsincos
11461 	long		fsincos		- tbl_unsupp	# 32: fsincos
11462 	long		fsincos		- tbl_unsupp	# 33: fsincos
11463 	long		fsincos		- tbl_unsupp	# 34: fsincos
11464 	long		fsincos		- tbl_unsupp	# 35: fsincos
11465 	long		fsincos		- tbl_unsupp	# 36: fsincos
11466 	long		fsincos		- tbl_unsupp	# 37: fsincos
11467 	long		fcmp		- tbl_unsupp	# 38: fcmp
11468 	long		tbl_unsupp	- tbl_unsupp
11469 	long		ftst		- tbl_unsupp	# 3a: ftst
11470 	long		tbl_unsupp	- tbl_unsupp
11471 	long		tbl_unsupp	- tbl_unsupp
11472 	long		tbl_unsupp	- tbl_unsupp
11473 	long		tbl_unsupp	- tbl_unsupp
11474 	long		tbl_unsupp	- tbl_unsupp
11475 	long		fsin		- tbl_unsupp	# 40: fsmove
11476 	long		fssqrt		- tbl_unsupp	# 41: fssqrt
11477 	long		tbl_unsupp	- tbl_unsupp
11478 	long		tbl_unsupp	- tbl_unsupp
11479 	long		fdin		- tbl_unsupp	# 44: fdmove
11480 	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
11481 	long		tbl_unsupp	- tbl_unsupp
11482 	long		tbl_unsupp	- tbl_unsupp
11483 	long		tbl_unsupp	- tbl_unsupp
11484 	long		tbl_unsupp	- tbl_unsupp
11485 	long		tbl_unsupp	- tbl_unsupp
11486 	long		tbl_unsupp	- tbl_unsupp
11487 	long		tbl_unsupp	- tbl_unsupp
11488 	long		tbl_unsupp	- tbl_unsupp
11489 	long		tbl_unsupp	- tbl_unsupp
11490 	long		tbl_unsupp	- tbl_unsupp
11491 	long		tbl_unsupp	- tbl_unsupp
11492 	long		tbl_unsupp	- tbl_unsupp
11493 	long		tbl_unsupp	- tbl_unsupp
11494 	long		tbl_unsupp	- tbl_unsupp
11495 	long		tbl_unsupp	- tbl_unsupp
11496 	long		tbl_unsupp	- tbl_unsupp
11497 	long		tbl_unsupp	- tbl_unsupp
11498 	long		tbl_unsupp	- tbl_unsupp
11499 	long		fsabs		- tbl_unsupp	# 58: fsabs
11500 	long		tbl_unsupp	- tbl_unsupp
11501 	long		fsneg		- tbl_unsupp	# 5a: fsneg
11502 	long		tbl_unsupp	- tbl_unsupp
11503 	long		fdabs		- tbl_unsupp	# 5c: fdabs
11504 	long		tbl_unsupp	- tbl_unsupp
11505 	long		fdneg		- tbl_unsupp	# 5e: fdneg
11506 	long		tbl_unsupp	- tbl_unsupp
11507 	long		fsdiv		- tbl_unsupp	# 60: fsdiv
11508 	long		tbl_unsupp	- tbl_unsupp
11509 	long		fsadd		- tbl_unsupp	# 62: fsadd
11510 	long		fsmul		- tbl_unsupp	# 63: fsmul
11511 	long		fddiv		- tbl_unsupp	# 64: fddiv
11512 	long		tbl_unsupp	- tbl_unsupp
11513 	long		fdadd		- tbl_unsupp	# 66: fdadd
11514 	long		fdmul		- tbl_unsupp	# 67: fdmul
11515 	long		fssub		- tbl_unsupp	# 68: fssub
11516 	long		tbl_unsupp	- tbl_unsupp
11517 	long		tbl_unsupp	- tbl_unsupp
11518 	long		tbl_unsupp	- tbl_unsupp
11519 	long		fdsub		- tbl_unsupp	# 6c: fdsub
11520 
11521 #########################################################################
11522 # XDEF ****************************************************************	#
11523 #	fmul(): emulates the fmul instruction				#
11524 #	fsmul(): emulates the fsmul instruction				#
11525 #	fdmul(): emulates the fdmul instruction				#
11526 #									#
11527 # XREF ****************************************************************	#
11528 #	scale_to_zero_src() - scale src exponent to zero		#
11529 #	scale_to_zero_dst() - scale dst exponent to zero		#
11530 #	unf_res() - return default underflow result			#
11531 #	ovf_res() - return default overflow result			#
11532 #	res_qnan() - return QNAN result					#
11533 #	res_snan() - return SNAN result					#
11534 #									#
11535 # INPUT ***************************************************************	#
11536 #	a0 = pointer to extended precision source operand		#
11537 #	a1 = pointer to extended precision destination operand		#
11538 #	d0  rnd prec,mode						#
11539 #									#
11540 # OUTPUT **************************************************************	#
11541 #	fp0 = result							#
11542 #	fp1 = EXOP (if exception occurred)				#
11543 #									#
11544 # ALGORITHM ***********************************************************	#
11545 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
11546 # norms/denorms into ext/sgl/dbl precision.				#
11547 #	For norms/denorms, scale the exponents such that a multiply	#
11548 # instruction won't cause an exception. Use the regular fmul to		#
11549 # compute a result. Check if the regular operands would have taken	#
11550 # an exception. If so, return the default overflow/underflow result	#
11551 # and return the EXOP if exceptions are enabled. Else, scale the	#
11552 # result operand to the proper exponent.				#
11553 #									#
11554 #########################################################################
11555 
11556 	align		0x10
11557 tbl_fmul_ovfl:
11558 	long		0x3fff - 0x7ffe		# ext_max
11559 	long		0x3fff - 0x407e		# sgl_max
11560 	long		0x3fff - 0x43fe		# dbl_max
11561 tbl_fmul_unfl:
11562 	long		0x3fff + 0x0001		# ext_unfl
11563 	long		0x3fff - 0x3f80		# sgl_unfl
11564 	long		0x3fff - 0x3c00		# dbl_unfl
11565 
11566 	global		fsmul
11567 fsmul:
11568 	andi.b		&0x30,%d0		# clear rnd prec
11569 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11570 	bra.b		fmul
11571 
11572 	global		fdmul
11573 fdmul:
11574 	andi.b		&0x30,%d0
11575 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11576 
11577 	global		fmul
11578 fmul:
11579 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11580 
11581 	clr.w		%d1
11582 	mov.b		DTAG(%a6),%d1
11583 	lsl.b		&0x3,%d1
11584 	or.b		STAG(%a6),%d1		# combine src tags
11585 	bne.w		fmul_not_norm		# optimize on non-norm input
11586 
11587 fmul_norm:
11588 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11589 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11590 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11591 
11592 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11593 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11594 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11595 
11596 	bsr.l		scale_to_zero_src	# scale src exponent
11597 	mov.l		%d0,-(%sp)		# save scale factor 1
11598 
11599 	bsr.l		scale_to_zero_dst	# scale dst exponent
11600 
11601 	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
11602 
11603 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
11604 	lsr.b		&0x6,%d1		# shift to lo bits
11605 	mov.l		(%sp)+,%d0		# load S.F.
11606 	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11607 	beq.w		fmul_may_ovfl		# result may rnd to overflow
11608 	blt.w		fmul_ovfl		# result will overflow
11609 
11610 	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11611 	beq.w		fmul_may_unfl		# result may rnd to no unfl
11612 	bgt.w		fmul_unfl		# result will underflow
11613 
11614 #
11615 # NORMAL:
11616 # - the result of the multiply operation will neither overflow nor underflow.
11617 # - do the multiply to the proper precision and rounding mode.
11618 # - scale the result exponent using the scale factor. if both operands were
11619 # normalized then we really don't need to go through this scaling. but for now,
11620 # this will do.
11621 #
11622 fmul_normal:
11623 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11624 
11625 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11626 	fmov.l		&0x0,%fpsr		# clear FPSR
11627 
11628 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11629 
11630 	fmov.l		%fpsr,%d1		# save status
11631 	fmov.l		&0x0,%fpcr		# clear FPCR
11632 
11633 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11634 
11635 fmul_normal_exit:
11636 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11637 	mov.l		%d2,-(%sp)		# save d2
11638 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
11639 	mov.l		%d1,%d2			# make a copy
11640 	andi.l		&0x7fff,%d1		# strip sign
11641 	andi.w		&0x8000,%d2		# keep old sign
11642 	sub.l		%d0,%d1			# add scale factor
11643 	or.w		%d2,%d1			# concat old sign,new exp
11644 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11645 	mov.l		(%sp)+,%d2		# restore d2
11646 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11647 	rts
11648 
11649 #
11650 # OVERFLOW:
11651 # - the result of the multiply operation is an overflow.
11652 # - do the multiply to the proper precision and rounding mode in order to
11653 # set the inexact bits.
11654 # - calculate the default result and return it in fp0.
11655 # - if overflow or inexact is enabled, we need a multiply result rounded to
11656 # extended precision. if the original operation was extended, then we have this
11657 # result. if the original operation was single or double, we have to do another
11658 # multiply using extended precision and the correct rounding mode. the result
11659 # of this operation then has its exponent scaled by -0x6000 to create the
11660 # exceptional operand.
11661 #
11662 fmul_ovfl:
11663 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11664 
11665 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11666 	fmov.l		&0x0,%fpsr		# clear FPSR
11667 
11668 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11669 
11670 	fmov.l		%fpsr,%d1		# save status
11671 	fmov.l		&0x0,%fpcr		# clear FPCR
11672 
11673 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11674 
11675 # save setting this until now because this is where fmul_may_ovfl may jump in
11676 fmul_ovfl_tst:
11677 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11678 
11679 	mov.b		FPCR_ENABLE(%a6),%d1
11680 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11681 	bne.b		fmul_ovfl_ena		# yes
11682 
11683 # calculate the default result
11684 fmul_ovfl_dis:
11685 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11686 	sne		%d1			# set sign param accordingly
11687 	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
11688 	bsr.l		ovf_res			# calculate default result
11689 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11690 	fmovm.x		(%a0),&0x80		# return default result in fp0
11691 	rts
11692 
11693 #
11694 # OVFL is enabled; Create EXOP:
11695 # - if precision is extended, then we have the EXOP. simply bias the exponent
11696 # with an extra -0x6000. if the precision is single or double, we need to
11697 # calculate a result rounded to extended precision.
11698 #
11699 fmul_ovfl_ena:
11700 	mov.l		L_SCR3(%a6),%d1
11701 	andi.b		&0xc0,%d1		# test the rnd prec
11702 	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
11703 
11704 fmul_ovfl_ena_cont:
11705 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
11706 
11707 	mov.l		%d2,-(%sp)		# save d2
11708 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11709 	mov.w		%d1,%d2			# make a copy
11710 	andi.l		&0x7fff,%d1		# strip sign
11711 	sub.l		%d0,%d1			# add scale factor
11712 	subi.l		&0x6000,%d1		# subtract bias
11713 	andi.w		&0x7fff,%d1		# clear sign bit
11714 	andi.w		&0x8000,%d2		# keep old sign
11715 	or.w		%d2,%d1			# concat old sign,new exp
11716 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11717 	mov.l		(%sp)+,%d2		# restore d2
11718 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11719 	bra.b		fmul_ovfl_dis
11720 
11721 fmul_ovfl_ena_sd:
11722 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11723 
11724 	mov.l		L_SCR3(%a6),%d1
11725 	andi.b		&0x30,%d1		# keep rnd mode only
11726 	fmov.l		%d1,%fpcr		# set FPCR
11727 
11728 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11729 
11730 	fmov.l		&0x0,%fpcr		# clear FPCR
11731 	bra.b		fmul_ovfl_ena_cont
11732 
11733 #
11734 # may OVERFLOW:
11735 # - the result of the multiply operation MAY overflow.
11736 # - do the multiply to the proper precision and rounding mode in order to
11737 # set the inexact bits.
11738 # - calculate the default result and return it in fp0.
11739 #
11740 fmul_may_ovfl:
11741 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11742 
11743 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11744 	fmov.l		&0x0,%fpsr		# clear FPSR
11745 
11746 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11747 
11748 	fmov.l		%fpsr,%d1		# save status
11749 	fmov.l		&0x0,%fpcr		# clear FPCR
11750 
11751 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11752 
11753 	fabs.x		%fp0,%fp1		# make a copy of result
11754 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
11755 	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
11756 
11757 # no, it didn't overflow; we have correct result
11758 	bra.w		fmul_normal_exit
11759 
11760 #
11761 # UNDERFLOW:
11762 # - the result of the multiply operation is an underflow.
11763 # - do the multiply to the proper precision and rounding mode in order to
11764 # set the inexact bits.
11765 # - calculate the default result and return it in fp0.
11766 # - if overflow or inexact is enabled, we need a multiply result rounded to
11767 # extended precision. if the original operation was extended, then we have this
11768 # result. if the original operation was single or double, we have to do another
11769 # multiply using extended precision and the correct rounding mode. the result
11770 # of this operation then has its exponent scaled by -0x6000 to create the
11771 # exceptional operand.
11772 #
11773 fmul_unfl:
11774 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11775 
11776 # for fun, let's use only extended precision, round to zero. then, let
11777 # the unf_res() routine figure out all the rest.
11778 # will we get the correct answer.
11779 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11780 
11781 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11782 	fmov.l		&0x0,%fpsr		# clear FPSR
11783 
11784 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11785 
11786 	fmov.l		%fpsr,%d1		# save status
11787 	fmov.l		&0x0,%fpcr		# clear FPCR
11788 
11789 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11790 
11791 	mov.b		FPCR_ENABLE(%a6),%d1
11792 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11793 	bne.b		fmul_unfl_ena		# yes
11794 
11795 fmul_unfl_dis:
11796 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11797 
11798 	lea		FP_SCR0(%a6),%a0	# pass: result addr
11799 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11800 	bsr.l		unf_res			# calculate default result
11801 	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
11802 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11803 	rts
11804 
11805 #
11806 # UNFL is enabled.
11807 #
11808 fmul_unfl_ena:
11809 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11810 
11811 	mov.l		L_SCR3(%a6),%d1
11812 	andi.b		&0xc0,%d1		# is precision extended?
11813 	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
11814 
11815 # if the rnd mode is anything but RZ, then we have to re-do the above
11816 # multiplication because we used RZ for all.
11817 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11818 
11819 fmul_unfl_ena_cont:
11820 	fmov.l		&0x0,%fpsr		# clear FPSR
11821 
11822 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11823 
11824 	fmov.l		&0x0,%fpcr		# clear FPCR
11825 
11826 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11827 	mov.l		%d2,-(%sp)		# save d2
11828 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11829 	mov.l		%d1,%d2			# make a copy
11830 	andi.l		&0x7fff,%d1		# strip sign
11831 	andi.w		&0x8000,%d2		# keep old sign
11832 	sub.l		%d0,%d1			# add scale factor
11833 	addi.l		&0x6000,%d1		# add bias
11834 	andi.w		&0x7fff,%d1
11835 	or.w		%d2,%d1			# concat old sign,new exp
11836 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11837 	mov.l		(%sp)+,%d2		# restore d2
11838 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11839 	bra.w		fmul_unfl_dis
11840 
11841 fmul_unfl_ena_sd:
11842 	mov.l		L_SCR3(%a6),%d1
11843 	andi.b		&0x30,%d1		# use only rnd mode
11844 	fmov.l		%d1,%fpcr		# set FPCR
11845 
11846 	bra.b		fmul_unfl_ena_cont
11847 
11848 # MAY UNDERFLOW:
11849 # -use the correct rounding mode and precision. this code favors operations
11850 # that do not underflow.
11851 fmul_may_unfl:
11852 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11853 
11854 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11855 	fmov.l		&0x0,%fpsr		# clear FPSR
11856 
11857 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11858 
11859 	fmov.l		%fpsr,%d1		# save status
11860 	fmov.l		&0x0,%fpcr		# clear FPCR
11861 
11862 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11863 
11864 	fabs.x		%fp0,%fp1		# make a copy of result
11865 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
11866 	fbgt.w		fmul_normal_exit	# no; no underflow occurred
11867 	fblt.w		fmul_unfl		# yes; underflow occurred
11868 
11869 #
11870 # we still don't know if underflow occurred. result is ~ equal to 2. but,
11871 # we don't know if the result was an underflow that rounded up to a 2 or
11872 # a normalized number that rounded down to a 2. so, redo the entire operation
11873 # using RZ as the rounding mode to see what the pre-rounded result is.
11874 # this case should be relatively rare.
11875 #
11876 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
11877 
11878 	mov.l		L_SCR3(%a6),%d1
11879 	andi.b		&0xc0,%d1		# keep rnd prec
11880 	ori.b		&rz_mode*0x10,%d1	# insert RZ
11881 
11882 	fmov.l		%d1,%fpcr		# set FPCR
11883 	fmov.l		&0x0,%fpsr		# clear FPSR
11884 
11885 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11886 
11887 	fmov.l		&0x0,%fpcr		# clear FPCR
11888 	fabs.x		%fp1			# make absolute value
11889 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
11890 	fbge.w		fmul_normal_exit	# no; no underflow occurred
11891 	bra.w		fmul_unfl		# yes, underflow occurred
11892 
11893 ################################################################################
11894 
11895 #
11896 # Multiply: inputs are not both normalized; what are they?
11897 #
11898 fmul_not_norm:
11899 	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
11900 	jmp		(tbl_fmul_op.b,%pc,%d1.w)
11901 
11902 	swbeg		&48
11903 tbl_fmul_op:
11904 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11905 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11906 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11907 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11908 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11909 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11910 	short		tbl_fmul_op	- tbl_fmul_op #
11911 	short		tbl_fmul_op	- tbl_fmul_op #
11912 
11913 	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
11914 	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
11915 	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
11916 	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
11917 	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
11918 	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
11919 	short		tbl_fmul_op	- tbl_fmul_op #
11920 	short		tbl_fmul_op	- tbl_fmul_op #
11921 
11922 	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
11923 	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
11924 	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
11925 	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
11926 	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
11927 	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
11928 	short		tbl_fmul_op	- tbl_fmul_op #
11929 	short		tbl_fmul_op	- tbl_fmul_op #
11930 
11931 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
11932 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
11933 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
11934 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
11935 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
11936 	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
11937 	short		tbl_fmul_op	- tbl_fmul_op #
11938 	short		tbl_fmul_op	- tbl_fmul_op #
11939 
11940 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11941 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11942 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11943 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11944 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11945 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11946 	short		tbl_fmul_op	- tbl_fmul_op #
11947 	short		tbl_fmul_op	- tbl_fmul_op #
11948 
11949 	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
11950 	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
11951 	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
11952 	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
11953 	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
11954 	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
11955 	short		tbl_fmul_op	- tbl_fmul_op #
11956 	short		tbl_fmul_op	- tbl_fmul_op #
11957 
11958 fmul_res_operr:
11959 	bra.l		res_operr
11960 fmul_res_snan:
11961 	bra.l		res_snan
11962 fmul_res_qnan:
11963 	bra.l		res_qnan
11964 
11965 #
11966 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11967 #
11968 	global		fmul_zero		# global for fsglmul
11969 fmul_zero:
11970 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11971 	mov.b		DST_EX(%a1),%d1
11972 	eor.b		%d0,%d1
11973 	bpl.b		fmul_zero_p		# result ZERO is pos.
11974 fmul_zero_n:
11975 	fmov.s		&0x80000000,%fp0	# load -ZERO
11976 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11977 	rts
11978 fmul_zero_p:
11979 	fmov.s		&0x00000000,%fp0	# load +ZERO
11980 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11981 	rts
11982 
11983 #
11984 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11985 #
11986 # Note: The j-bit for an infinity is a don't-care. However, to be
11987 # strictly compatible w/ the 68881/882, we make sure to return an
11988 # INF w/ the j-bit set if the input INF j-bit was set. Destination
11989 # INFs take priority.
11990 #
11991 	global		fmul_inf_dst		# global for fsglmul
11992 fmul_inf_dst:
11993 	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
11994 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11995 	mov.b		DST_EX(%a1),%d1
11996 	eor.b		%d0,%d1
11997 	bpl.b		fmul_inf_dst_p		# result INF is pos.
11998 fmul_inf_dst_n:
11999 	fabs.x		%fp0			# clear result sign
12000 	fneg.x		%fp0			# set result sign
12001 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12002 	rts
12003 fmul_inf_dst_p:
12004 	fabs.x		%fp0			# clear result sign
12005 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12006 	rts
12007 
12008 	global		fmul_inf_src		# global for fsglmul
12009 fmul_inf_src:
12010 	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
12011 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
12012 	mov.b		DST_EX(%a1),%d1
12013 	eor.b		%d0,%d1
12014 	bpl.b		fmul_inf_dst_p		# result INF is pos.
12015 	bra.b		fmul_inf_dst_n
12016 
12017 #########################################################################
12018 # XDEF ****************************************************************	#
12019 #	fin(): emulates the fmove instruction				#
12020 #	fsin(): emulates the fsmove instruction				#
12021 #	fdin(): emulates the fdmove instruction				#
12022 #									#
12023 # XREF ****************************************************************	#
12024 #	norm() - normalize mantissa for EXOP on denorm			#
12025 #	scale_to_zero_src() - scale src exponent to zero		#
12026 #	ovf_res() - return default overflow result			#
12027 #	unf_res() - return default underflow result			#
12028 #	res_qnan_1op() - return QNAN result				#
12029 #	res_snan_1op() - return SNAN result				#
12030 #									#
12031 # INPUT ***************************************************************	#
12032 #	a0 = pointer to extended precision source operand		#
12033 #	d0 = round prec/mode						#
12034 #									#
12035 # OUTPUT **************************************************************	#
12036 #	fp0 = result							#
12037 #	fp1 = EXOP (if exception occurred)				#
12038 #									#
12039 # ALGORITHM ***********************************************************	#
12040 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
12041 # norms into extended, single, and double precision.			#
12042 #	Norms can be emulated w/ a regular fmove instruction. For	#
12043 # sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
12044 # if the result would have overflowed/underflowed. If so, use unf_res()	#
12045 # or ovf_res() to return the default result. Also return EXOP if	#
12046 # exception is enabled. If no exception, return the default result.	#
12047 #	Unnorms don't pass through here.				#
12048 #									#
12049 #########################################################################
12050 
12051 	global		fsin
12052 fsin:
12053 	andi.b		&0x30,%d0		# clear rnd prec
12054 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12055 	bra.b		fin
12056 
12057 	global		fdin
12058 fdin:
12059 	andi.b		&0x30,%d0		# clear rnd prec
12060 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
12061 
12062 	global		fin
12063 fin:
12064 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12065 
12066 	mov.b		STAG(%a6),%d1		# fetch src optype tag
12067 	bne.w		fin_not_norm		# optimize on non-norm input
12068 
12069 #
12070 # FP MOVE IN: NORMs and DENORMs ONLY!
12071 #
12072 fin_norm:
12073 	andi.b		&0xc0,%d0		# is precision extended?
12074 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12075 
12076 #
12077 # precision selected is extended. so...we cannot get an underflow
12078 # or overflow because of rounding to the correct precision. so...
12079 # skip the scaling and unscaling...
12080 #
12081 	tst.b		SRC_EX(%a0)		# is the operand negative?
12082 	bpl.b		fin_norm_done		# no
12083 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12084 fin_norm_done:
12085 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12086 	rts
12087 
12088 #
12089 # for an extended precision DENORM, the UNFL exception bit is set
12090 # the accrued bit is NOT set in this instance(no inexactness!)
12091 #
12092 fin_denorm:
12093 	andi.b		&0xc0,%d0		# is precision extended?
12094 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12095 
12096 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12097 	tst.b		SRC_EX(%a0)		# is the operand negative?
12098 	bpl.b		fin_denorm_done		# no
12099 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12100 fin_denorm_done:
12101 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12102 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12103 	bne.b		fin_denorm_unfl_ena	# yes
12104 	rts
12105 
12106 #
12107 # the input is an extended DENORM and underflow is enabled in the FPCR.
12108 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12109 # exponent and insert back into the operand.
12110 #
12111 fin_denorm_unfl_ena:
12112 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12113 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12114 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12115 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12116 	bsr.l		norm			# normalize result
12117 	neg.w		%d0			# new exponent = -(shft val)
12118 	addi.w		&0x6000,%d0		# add new bias to exponent
12119 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12120 	andi.w		&0x8000,%d1		# keep old sign
12121 	andi.w		&0x7fff,%d0		# clear sign position
12122 	or.w		%d1,%d0			# concat new exo,old sign
12123 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12124 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12125 	rts
12126 
12127 #
12128 # operand is to be rounded to single or double precision
12129 #
12130 fin_not_ext:
12131 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12132 	bne.b		fin_dbl
12133 
12134 #
12135 # operand is to be rounded to single precision
12136 #
12137 fin_sgl:
12138 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12139 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12140 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12141 	bsr.l		scale_to_zero_src	# calculate scale factor
12142 
12143 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12144 	bge.w		fin_sd_unfl		# yes; go handle underflow
12145 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12146 	beq.w		fin_sd_may_ovfl		# maybe; go check
12147 	blt.w		fin_sd_ovfl		# yes; go handle overflow
12148 
12149 #
12150 # operand will NOT overflow or underflow when moved into the fp reg file
12151 #
12152 fin_sd_normal:
12153 	fmov.l		&0x0,%fpsr		# clear FPSR
12154 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12155 
12156 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12157 
12158 	fmov.l		%fpsr,%d1		# save FPSR
12159 	fmov.l		&0x0,%fpcr		# clear FPCR
12160 
12161 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12162 
12163 fin_sd_normal_exit:
12164 	mov.l		%d2,-(%sp)		# save d2
12165 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12166 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12167 	mov.w		%d1,%d2			# make a copy
12168 	andi.l		&0x7fff,%d1		# strip sign
12169 	sub.l		%d0,%d1			# add scale factor
12170 	andi.w		&0x8000,%d2		# keep old sign
12171 	or.w		%d1,%d2			# concat old sign,new exponent
12172 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12173 	mov.l		(%sp)+,%d2		# restore d2
12174 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12175 	rts
12176 
12177 #
12178 # operand is to be rounded to double precision
12179 #
12180 fin_dbl:
12181 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12182 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12183 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12184 	bsr.l		scale_to_zero_src	# calculate scale factor
12185 
12186 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12187 	bge.w		fin_sd_unfl		# yes; go handle underflow
12188 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12189 	beq.w		fin_sd_may_ovfl		# maybe; go check
12190 	blt.w		fin_sd_ovfl		# yes; go handle overflow
12191 	bra.w		fin_sd_normal		# no; ho handle normalized op
12192 
12193 #
12194 # operand WILL underflow when moved in to the fp register file
12195 #
12196 fin_sd_unfl:
12197 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12198 
12199 	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
12200 	bpl.b		fin_sd_unfl_tst
12201 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12202 
12203 # if underflow or inexact is enabled, then go calculate the EXOP first.
12204 fin_sd_unfl_tst:
12205 	mov.b		FPCR_ENABLE(%a6),%d1
12206 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12207 	bne.b		fin_sd_unfl_ena		# yes
12208 
12209 fin_sd_unfl_dis:
12210 	lea		FP_SCR0(%a6),%a0	# pass: result addr
12211 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12212 	bsr.l		unf_res			# calculate default result
12213 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12214 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12215 	rts
12216 
12217 #
12218 # operand will underflow AND underflow or inexact is enabled.
12219 # Therefore, we must return the result rounded to extended precision.
12220 #
12221 fin_sd_unfl_ena:
12222 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12223 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12224 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12225 
12226 	mov.l		%d2,-(%sp)		# save d2
12227 	mov.w		%d1,%d2			# make a copy
12228 	andi.l		&0x7fff,%d1		# strip sign
12229 	sub.l		%d0,%d1			# subtract scale factor
12230 	andi.w		&0x8000,%d2		# extract old sign
12231 	addi.l		&0x6000,%d1		# add new bias
12232 	andi.w		&0x7fff,%d1
12233 	or.w		%d1,%d2			# concat old sign,new exp
12234 	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
12235 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12236 	mov.l		(%sp)+,%d2		# restore d2
12237 	bra.b		fin_sd_unfl_dis
12238 
12239 #
12240 # operand WILL overflow.
12241 #
12242 fin_sd_ovfl:
12243 	fmov.l		&0x0,%fpsr		# clear FPSR
12244 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12245 
12246 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12247 
12248 	fmov.l		&0x0,%fpcr		# clear FPCR
12249 	fmov.l		%fpsr,%d1		# save FPSR
12250 
12251 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12252 
12253 fin_sd_ovfl_tst:
12254 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12255 
12256 	mov.b		FPCR_ENABLE(%a6),%d1
12257 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12258 	bne.b		fin_sd_ovfl_ena		# yes
12259 
12260 #
12261 # OVFL is not enabled; therefore, we must create the default result by
12262 # calling ovf_res().
12263 #
12264 fin_sd_ovfl_dis:
12265 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12266 	sne		%d1			# set sign param accordingly
12267 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12268 	bsr.l		ovf_res			# calculate default result
12269 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12270 	fmovm.x		(%a0),&0x80		# return default result in fp0
12271 	rts
12272 
12273 #
12274 # OVFL is enabled.
12275 # the INEX2 bit has already been updated by the round to the correct precision.
12276 # now, round to extended(and don't alter the FPSR).
12277 #
12278 fin_sd_ovfl_ena:
12279 	mov.l		%d2,-(%sp)		# save d2
12280 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12281 	mov.l		%d1,%d2			# make a copy
12282 	andi.l		&0x7fff,%d1		# strip sign
12283 	andi.w		&0x8000,%d2		# keep old sign
12284 	sub.l		%d0,%d1			# add scale factor
12285 	sub.l		&0x6000,%d1		# subtract bias
12286 	andi.w		&0x7fff,%d1
12287 	or.w		%d2,%d1
12288 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12289 	mov.l		(%sp)+,%d2		# restore d2
12290 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12291 	bra.b		fin_sd_ovfl_dis
12292 
12293 #
12294 # the move in MAY overflow. so...
12295 #
12296 fin_sd_may_ovfl:
12297 	fmov.l		&0x0,%fpsr		# clear FPSR
12298 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12299 
12300 	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
12301 
12302 	fmov.l		%fpsr,%d1		# save status
12303 	fmov.l		&0x0,%fpcr		# clear FPCR
12304 
12305 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12306 
12307 	fabs.x		%fp0,%fp1		# make a copy of result
12308 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
12309 	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
12310 
12311 # no, it didn't overflow; we have correct result
12312 	bra.w		fin_sd_normal_exit
12313 
12314 ##########################################################################
12315 
12316 #
12317 # operand is not a NORM: check its optype and branch accordingly
12318 #
12319 fin_not_norm:
12320 	cmpi.b		%d1,&DENORM		# weed out DENORM
12321 	beq.w		fin_denorm
12322 	cmpi.b		%d1,&SNAN		# weed out SNANs
12323 	beq.l		res_snan_1op
12324 	cmpi.b		%d1,&QNAN		# weed out QNANs
12325 	beq.l		res_qnan_1op
12326 
12327 #
12328 # do the fmove in; at this point, only possible ops are ZERO and INF.
12329 # use fmov to determine ccodes.
12330 # prec:mode should be zero at this point but it won't affect answer anyways.
12331 #
12332 	fmov.x		SRC(%a0),%fp0		# do fmove in
12333 	fmov.l		%fpsr,%d0		# no exceptions possible
12334 	rol.l		&0x8,%d0		# put ccodes in lo byte
12335 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
12336 	rts
12337 
12338 #########################################################################
12339 # XDEF ****************************************************************	#
12340 #	fdiv(): emulates the fdiv instruction				#
12341 #	fsdiv(): emulates the fsdiv instruction				#
12342 #	fddiv(): emulates the fddiv instruction				#
12343 #									#
12344 # XREF ****************************************************************	#
12345 #	scale_to_zero_src() - scale src exponent to zero		#
12346 #	scale_to_zero_dst() - scale dst exponent to zero		#
12347 #	unf_res() - return default underflow result			#
12348 #	ovf_res() - return default overflow result			#
12349 #	res_qnan() - return QNAN result					#
12350 #	res_snan() - return SNAN result					#
12351 #									#
12352 # INPUT ***************************************************************	#
12353 #	a0 = pointer to extended precision source operand		#
12354 #	a1 = pointer to extended precision destination operand		#
12355 #	d0  rnd prec,mode						#
12356 #									#
12357 # OUTPUT **************************************************************	#
12358 #	fp0 = result							#
12359 #	fp1 = EXOP (if exception occurred)				#
12360 #									#
12361 # ALGORITHM ***********************************************************	#
12362 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
12363 # norms/denorms into ext/sgl/dbl precision.				#
12364 #	For norms/denorms, scale the exponents such that a divide	#
12365 # instruction won't cause an exception. Use the regular fdiv to		#
12366 # compute a result. Check if the regular operands would have taken	#
12367 # an exception. If so, return the default overflow/underflow result	#
12368 # and return the EXOP if exceptions are enabled. Else, scale the	#
12369 # result operand to the proper exponent.				#
12370 #									#
12371 #########################################################################
12372 
12373 	align		0x10
12374 tbl_fdiv_unfl:
12375 	long		0x3fff - 0x0000		# ext_unfl
12376 	long		0x3fff - 0x3f81		# sgl_unfl
12377 	long		0x3fff - 0x3c01		# dbl_unfl
12378 
12379 tbl_fdiv_ovfl:
12380 	long		0x3fff - 0x7ffe		# ext overflow exponent
12381 	long		0x3fff - 0x407e		# sgl overflow exponent
12382 	long		0x3fff - 0x43fe		# dbl overflow exponent
12383 
12384 	global		fsdiv
12385 fsdiv:
12386 	andi.b		&0x30,%d0		# clear rnd prec
12387 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
12388 	bra.b		fdiv
12389 
12390 	global		fddiv
12391 fddiv:
12392 	andi.b		&0x30,%d0		# clear rnd prec
12393 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12394 
12395 	global		fdiv
12396 fdiv:
12397 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12398 
12399 	clr.w		%d1
12400 	mov.b		DTAG(%a6),%d1
12401 	lsl.b		&0x3,%d1
12402 	or.b		STAG(%a6),%d1		# combine src tags
12403 
12404 	bne.w		fdiv_not_norm		# optimize on non-norm input
12405 
12406 #
12407 # DIVIDE: NORMs and DENORMs ONLY!
12408 #
12409 fdiv_norm:
12410 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
12411 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
12412 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
12413 
12414 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12415 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12416 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12417 
12418 	bsr.l		scale_to_zero_src	# scale src exponent
12419 	mov.l		%d0,-(%sp)		# save scale factor 1
12420 
12421 	bsr.l		scale_to_zero_dst	# scale dst exponent
12422 
12423 	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
12424 	add.l		%d0,(%sp)
12425 
12426 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
12427 	lsr.b		&0x6,%d1		# shift to lo bits
12428 	mov.l		(%sp)+,%d0		# load S.F.
12429 	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12430 	ble.w		fdiv_may_ovfl		# result will overflow
12431 
12432 	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12433 	beq.w		fdiv_may_unfl		# maybe
12434 	bgt.w		fdiv_unfl		# yes; go handle underflow
12435 
12436 fdiv_normal:
12437 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12438 
12439 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
12440 	fmov.l		&0x0,%fpsr		# clear FPSR
12441 
12442 	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
12443 
12444 	fmov.l		%fpsr,%d1		# save FPSR
12445 	fmov.l		&0x0,%fpcr		# clear FPCR
12446 
12447 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12448 
12449 fdiv_normal_exit:
12450 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
12451 	mov.l		%d2,-(%sp)		# store d2
12452 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12453 	mov.l		%d1,%d2			# make a copy
12454 	andi.l		&0x7fff,%d1		# strip sign
12455 	andi.w		&0x8000,%d2		# keep old sign
12456 	sub.l		%d0,%d1			# add scale factor
12457 	or.w		%d2,%d1			# concat old sign,new exp
12458 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12459 	mov.l		(%sp)+,%d2		# restore d2
12460 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12461 	rts
12462 
12463 tbl_fdiv_ovfl2:
12464 	long		0x7fff
12465 	long		0x407f
12466 	long		0x43ff
12467 
12468 fdiv_no_ovfl:
12469 	mov.l		(%sp)+,%d0		# restore scale factor
12470 	bra.b		fdiv_normal_exit
12471 
12472 fdiv_may_ovfl:
12473 	mov.l		%d0,-(%sp)		# save scale factor
12474 
12475 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12476 
12477 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12478 	fmov.l		&0x0,%fpsr		# set FPSR
12479 
12480 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12481 
12482 	fmov.l		%fpsr,%d0
12483 	fmov.l		&0x0,%fpcr
12484 
12485 	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
12486 
12487 	fmovm.x		&0x01,-(%sp)		# save result to stack
12488 	mov.w		(%sp),%d0		# fetch new exponent
12489 	add.l		&0xc,%sp		# clear result from stack
12490 	andi.l		&0x7fff,%d0		# strip sign
12491 	sub.l		(%sp),%d0		# add scale factor
12492 	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12493 	blt.b		fdiv_no_ovfl
12494 	mov.l		(%sp)+,%d0
12495 
12496 fdiv_ovfl_tst:
12497 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12498 
12499 	mov.b		FPCR_ENABLE(%a6),%d1
12500 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12501 	bne.b		fdiv_ovfl_ena		# yes
12502 
12503 fdiv_ovfl_dis:
12504 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12505 	sne		%d1			# set sign param accordingly
12506 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
12507 	bsr.l		ovf_res			# calculate default result
12508 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
12509 	fmovm.x		(%a0),&0x80		# return default result in fp0
12510 	rts
12511 
12512 fdiv_ovfl_ena:
12513 	mov.l		L_SCR3(%a6),%d1
12514 	andi.b		&0xc0,%d1		# is precision extended?
12515 	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
12516 
12517 fdiv_ovfl_ena_cont:
12518 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
12519 
12520 	mov.l		%d2,-(%sp)		# save d2
12521 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12522 	mov.w		%d1,%d2			# make a copy
12523 	andi.l		&0x7fff,%d1		# strip sign
12524 	sub.l		%d0,%d1			# add scale factor
12525 	subi.l		&0x6000,%d1		# subtract bias
12526 	andi.w		&0x7fff,%d1		# clear sign bit
12527 	andi.w		&0x8000,%d2		# keep old sign
12528 	or.w		%d2,%d1			# concat old sign,new exp
12529 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12530 	mov.l		(%sp)+,%d2		# restore d2
12531 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12532 	bra.b		fdiv_ovfl_dis
12533 
12534 fdiv_ovfl_ena_sd:
12535 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
12536 
12537 	mov.l		L_SCR3(%a6),%d1
12538 	andi.b		&0x30,%d1		# keep rnd mode
12539 	fmov.l		%d1,%fpcr		# set FPCR
12540 
12541 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12542 
12543 	fmov.l		&0x0,%fpcr		# clear FPCR
12544 	bra.b		fdiv_ovfl_ena_cont
12545 
12546 fdiv_unfl:
12547 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12548 
12549 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12550 
12551 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12552 	fmov.l		&0x0,%fpsr		# clear FPSR
12553 
12554 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12555 
12556 	fmov.l		%fpsr,%d1		# save status
12557 	fmov.l		&0x0,%fpcr		# clear FPCR
12558 
12559 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12560 
12561 	mov.b		FPCR_ENABLE(%a6),%d1
12562 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12563 	bne.b		fdiv_unfl_ena		# yes
12564 
12565 fdiv_unfl_dis:
12566 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12567 
12568 	lea		FP_SCR0(%a6),%a0	# pass: result addr
12569 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12570 	bsr.l		unf_res			# calculate default result
12571 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
12572 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12573 	rts
12574 
12575 #
12576 # UNFL is enabled.
12577 #
12578 fdiv_unfl_ena:
12579 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
12580 
12581 	mov.l		L_SCR3(%a6),%d1
12582 	andi.b		&0xc0,%d1		# is precision extended?
12583 	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
12584 
12585 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12586 
12587 fdiv_unfl_ena_cont:
12588 	fmov.l		&0x0,%fpsr		# clear FPSR
12589 
12590 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12591 
12592 	fmov.l		&0x0,%fpcr		# clear FPCR
12593 
12594 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
12595 	mov.l		%d2,-(%sp)		# save d2
12596 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12597 	mov.l		%d1,%d2			# make a copy
12598 	andi.l		&0x7fff,%d1		# strip sign
12599 	andi.w		&0x8000,%d2		# keep old sign
12600 	sub.l		%d0,%d1			# add scale factoer
12601 	addi.l		&0x6000,%d1		# add bias
12602 	andi.w		&0x7fff,%d1
12603 	or.w		%d2,%d1			# concat old sign,new exp
12604 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
12605 	mov.l		(%sp)+,%d2		# restore d2
12606 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12607 	bra.w		fdiv_unfl_dis
12608 
12609 fdiv_unfl_ena_sd:
12610 	mov.l		L_SCR3(%a6),%d1
12611 	andi.b		&0x30,%d1		# use only rnd mode
12612 	fmov.l		%d1,%fpcr		# set FPCR
12613 
12614 	bra.b		fdiv_unfl_ena_cont
12615 
12616 #
12617 # the divide operation MAY underflow:
12618 #
12619 fdiv_may_unfl:
12620 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12621 
12622 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12623 	fmov.l		&0x0,%fpsr		# clear FPSR
12624 
12625 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12626 
12627 	fmov.l		%fpsr,%d1		# save status
12628 	fmov.l		&0x0,%fpcr		# clear FPCR
12629 
12630 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12631 
12632 	fabs.x		%fp0,%fp1		# make a copy of result
12633 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
12634 	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
12635 	fblt.w		fdiv_unfl		# yes; underflow occurred
12636 
12637 #
12638 # we still don't know if underflow occurred. result is ~ equal to 1. but,
12639 # we don't know if the result was an underflow that rounded up to a 1
12640 # or a normalized number that rounded down to a 1. so, redo the entire
12641 # operation using RZ as the rounding mode to see what the pre-rounded
12642 # result is. this case should be relatively rare.
12643 #
12644 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
12645 
12646 	mov.l		L_SCR3(%a6),%d1
12647 	andi.b		&0xc0,%d1		# keep rnd prec
12648 	ori.b		&rz_mode*0x10,%d1	# insert RZ
12649 
12650 	fmov.l		%d1,%fpcr		# set FPCR
12651 	fmov.l		&0x0,%fpsr		# clear FPSR
12652 
12653 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12654 
12655 	fmov.l		&0x0,%fpcr		# clear FPCR
12656 	fabs.x		%fp1			# make absolute value
12657 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
12658 	fbge.w		fdiv_normal_exit	# no; no underflow occurred
12659 	bra.w		fdiv_unfl		# yes; underflow occurred
12660 
12661 ############################################################################
12662 
12663 #
12664 # Divide: inputs are not both normalized; what are they?
12665 #
12666 fdiv_not_norm:
12667 	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12668 	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
12669 
12670 	swbeg		&48
12671 tbl_fdiv_op:
12672 	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
12673 	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
12674 	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
12675 	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
12676 	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
12677 	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
12678 	short		tbl_fdiv_op	- tbl_fdiv_op #
12679 	short		tbl_fdiv_op	- tbl_fdiv_op #
12680 
12681 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
12682 	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
12683 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
12684 	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
12685 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
12686 	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
12687 	short		tbl_fdiv_op	- tbl_fdiv_op #
12688 	short		tbl_fdiv_op	- tbl_fdiv_op #
12689 
12690 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
12691 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
12692 	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
12693 	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
12694 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
12695 	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
12696 	short		tbl_fdiv_op	- tbl_fdiv_op #
12697 	short		tbl_fdiv_op	- tbl_fdiv_op #
12698 
12699 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
12700 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
12701 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
12702 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
12703 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
12704 	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
12705 	short		tbl_fdiv_op	- tbl_fdiv_op #
12706 	short		tbl_fdiv_op	- tbl_fdiv_op #
12707 
12708 	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
12709 	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
12710 	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
12711 	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
12712 	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
12713 	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
12714 	short		tbl_fdiv_op	- tbl_fdiv_op #
12715 	short		tbl_fdiv_op	- tbl_fdiv_op #
12716 
12717 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
12718 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
12719 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
12720 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
12721 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
12722 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
12723 	short		tbl_fdiv_op	- tbl_fdiv_op #
12724 	short		tbl_fdiv_op	- tbl_fdiv_op #
12725 
12726 fdiv_res_qnan:
12727 	bra.l		res_qnan
12728 fdiv_res_snan:
12729 	bra.l		res_snan
12730 fdiv_res_operr:
12731 	bra.l		res_operr
12732 
12733 	global		fdiv_zero_load		# global for fsgldiv
12734 fdiv_zero_load:
12735 	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
12736 	mov.b		DST_EX(%a1),%d1		# or of input signs.
12737 	eor.b		%d0,%d1
12738 	bpl.b		fdiv_zero_load_p	# result is positive
12739 	fmov.s		&0x80000000,%fp0	# load a -ZERO
12740 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
12741 	rts
12742 fdiv_zero_load_p:
12743 	fmov.s		&0x00000000,%fp0	# load a +ZERO
12744 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
12745 	rts
12746 
12747 #
12748 # The destination was In Range and the source was a ZERO. The result,
12749 # Therefore, is an INF w/ the proper sign.
12750 # So, determine the sign and return a new INF (w/ the j-bit cleared).
12751 #
12752 	global		fdiv_inf_load		# global for fsgldiv
12753 fdiv_inf_load:
12754 	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12755 	mov.b		SRC_EX(%a0),%d0		# load both signs
12756 	mov.b		DST_EX(%a1),%d1
12757 	eor.b		%d0,%d1
12758 	bpl.b		fdiv_inf_load_p		# result is positive
12759 	fmov.s		&0xff800000,%fp0	# make result -INF
12760 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12761 	rts
12762 fdiv_inf_load_p:
12763 	fmov.s		&0x7f800000,%fp0	# make result +INF
12764 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12765 	rts
12766 
12767 #
12768 # The destination was an INF w/ an In Range or ZERO source, the result is
12769 # an INF w/ the proper sign.
12770 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12771 # dst INF is set, then then j-bit of the result INF is also set).
12772 #
12773 	global		fdiv_inf_dst		# global for fsgldiv
12774 fdiv_inf_dst:
12775 	mov.b		DST_EX(%a1),%d0		# load both signs
12776 	mov.b		SRC_EX(%a0),%d1
12777 	eor.b		%d0,%d1
12778 	bpl.b		fdiv_inf_dst_p		# result is positive
12779 
12780 	fmovm.x		DST(%a1),&0x80		# return result in fp0
12781 	fabs.x		%fp0			# clear sign bit
12782 	fneg.x		%fp0			# set sign bit
12783 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12784 	rts
12785 
12786 fdiv_inf_dst_p:
12787 	fmovm.x		DST(%a1),&0x80		# return result in fp0
12788 	fabs.x		%fp0			# return positive INF
12789 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
12790 	rts
12791 
12792 #########################################################################
12793 # XDEF ****************************************************************	#
12794 #	fneg(): emulates the fneg instruction				#
12795 #	fsneg(): emulates the fsneg instruction				#
12796 #	fdneg(): emulates the fdneg instruction				#
12797 #									#
12798 # XREF ****************************************************************	#
12799 #	norm() - normalize a denorm to provide EXOP			#
12800 #	scale_to_zero_src() - scale sgl/dbl source exponent		#
12801 #	ovf_res() - return default overflow result			#
12802 #	unf_res() - return default underflow result			#
12803 #	res_qnan_1op() - return QNAN result				#
12804 #	res_snan_1op() - return SNAN result				#
12805 #									#
12806 # INPUT ***************************************************************	#
12807 #	a0 = pointer to extended precision source operand		#
12808 #	d0 = rnd prec,mode						#
12809 #									#
12810 # OUTPUT **************************************************************	#
12811 #	fp0 = result							#
12812 #	fp1 = EXOP (if exception occurred)				#
12813 #									#
12814 # ALGORITHM ***********************************************************	#
12815 #	Handle NANs, zeroes, and infinities as special cases. Separate	#
12816 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
12817 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
12818 # and an actual fneg performed to see if overflow/underflow would have	#
12819 # occurred. If so, return default underflow/overflow result. Else,	#
12820 # scale the result exponent and return result. FPSR gets set based on	#
12821 # the result value.							#
12822 #									#
12823 #########################################################################
12824 
12825 	global		fsneg
12826 fsneg:
12827 	andi.b		&0x30,%d0		# clear rnd prec
12828 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12829 	bra.b		fneg
12830 
12831 	global		fdneg
12832 fdneg:
12833 	andi.b		&0x30,%d0		# clear rnd prec
12834 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12835 
12836 	global		fneg
12837 fneg:
12838 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12839 	mov.b		STAG(%a6),%d1
12840 	bne.w		fneg_not_norm		# optimize on non-norm input
12841 
12842 #
12843 # NEGATE SIGN : norms and denorms ONLY!
12844 #
12845 fneg_norm:
12846 	andi.b		&0xc0,%d0		# is precision extended?
12847 	bne.w		fneg_not_ext		# no; go handle sgl or dbl
12848 
12849 #
12850 # precision selected is extended. so...we can not get an underflow
12851 # or overflow because of rounding to the correct precision. so...
12852 # skip the scaling and unscaling...
12853 #
12854 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12855 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12856 	mov.w		SRC_EX(%a0),%d0
12857 	eori.w		&0x8000,%d0		# negate sign
12858 	bpl.b		fneg_norm_load		# sign is positive
12859 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
12860 fneg_norm_load:
12861 	mov.w		%d0,FP_SCR0_EX(%a6)
12862 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12863 	rts
12864 
12865 #
12866 # for an extended precision DENORM, the UNFL exception bit is set
12867 # the accrued bit is NOT set in this instance(no inexactness!)
12868 #
12869 fneg_denorm:
12870 	andi.b		&0xc0,%d0		# is precision extended?
12871 	bne.b		fneg_not_ext		# no; go handle sgl or dbl
12872 
12873 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12874 
12875 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12876 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12877 	mov.w		SRC_EX(%a0),%d0
12878 	eori.w		&0x8000,%d0		# negate sign
12879 	bpl.b		fneg_denorm_done	# no
12880 	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
12881 fneg_denorm_done:
12882 	mov.w		%d0,FP_SCR0_EX(%a6)
12883 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12884 
12885 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12886 	bne.b		fneg_ext_unfl_ena	# yes
12887 	rts
12888 
12889 #
12890 # the input is an extended DENORM and underflow is enabled in the FPCR.
12891 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12892 # exponent and insert back into the operand.
12893 #
12894 fneg_ext_unfl_ena:
12895 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12896 	bsr.l		norm			# normalize result
12897 	neg.w		%d0			# new exponent = -(shft val)
12898 	addi.w		&0x6000,%d0		# add new bias to exponent
12899 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12900 	andi.w		&0x8000,%d1		# keep old sign
12901 	andi.w		&0x7fff,%d0		# clear sign position
12902 	or.w		%d1,%d0			# concat old sign, new exponent
12903 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12904 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12905 	rts
12906 
12907 #
12908 # operand is either single or double
12909 #
12910 fneg_not_ext:
12911 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12912 	bne.b		fneg_dbl
12913 
12914 #
12915 # operand is to be rounded to single precision
12916 #
12917 fneg_sgl:
12918 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12919 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12920 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12921 	bsr.l		scale_to_zero_src	# calculate scale factor
12922 
12923 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12924 	bge.w		fneg_sd_unfl		# yes; go handle underflow
12925 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12926 	beq.w		fneg_sd_may_ovfl	# maybe; go check
12927 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12928 
12929 #
12930 # operand will NOT overflow or underflow when moved in to the fp reg file
12931 #
12932 fneg_sd_normal:
12933 	fmov.l		&0x0,%fpsr		# clear FPSR
12934 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12935 
12936 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
12937 
12938 	fmov.l		%fpsr,%d1		# save FPSR
12939 	fmov.l		&0x0,%fpcr		# clear FPCR
12940 
12941 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12942 
12943 fneg_sd_normal_exit:
12944 	mov.l		%d2,-(%sp)		# save d2
12945 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12946 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12947 	mov.w		%d1,%d2			# make a copy
12948 	andi.l		&0x7fff,%d1		# strip sign
12949 	sub.l		%d0,%d1			# add scale factor
12950 	andi.w		&0x8000,%d2		# keep old sign
12951 	or.w		%d1,%d2			# concat old sign,new exp
12952 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12953 	mov.l		(%sp)+,%d2		# restore d2
12954 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12955 	rts
12956 
12957 #
12958 # operand is to be rounded to double precision
12959 #
12960 fneg_dbl:
12961 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12962 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12963 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12964 	bsr.l		scale_to_zero_src	# calculate scale factor
12965 
12966 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12967 	bge.b		fneg_sd_unfl		# yes; go handle underflow
12968 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12969 	beq.w		fneg_sd_may_ovfl	# maybe; go check
12970 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12971 	bra.w		fneg_sd_normal		# no; ho handle normalized op
12972 
12973 #
12974 # operand WILL underflow when moved in to the fp register file
12975 #
12976 fneg_sd_unfl:
12977 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12978 
12979 	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
12980 	bpl.b		fneg_sd_unfl_tst
12981 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12982 
12983 # if underflow or inexact is enabled, go calculate EXOP first.
12984 fneg_sd_unfl_tst:
12985 	mov.b		FPCR_ENABLE(%a6),%d1
12986 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12987 	bne.b		fneg_sd_unfl_ena	# yes
12988 
12989 fneg_sd_unfl_dis:
12990 	lea		FP_SCR0(%a6),%a0	# pass: result addr
12991 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12992 	bsr.l		unf_res			# calculate default result
12993 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12994 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12995 	rts
12996 
12997 #
12998 # operand will underflow AND underflow is enabled.
12999 # Therefore, we must return the result rounded to extended precision.
13000 #
13001 fneg_sd_unfl_ena:
13002 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13003 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13004 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13005 
13006 	mov.l		%d2,-(%sp)		# save d2
13007 	mov.l		%d1,%d2			# make a copy
13008 	andi.l		&0x7fff,%d1		# strip sign
13009 	andi.w		&0x8000,%d2		# keep old sign
13010 	sub.l		%d0,%d1			# subtract scale factor
13011 	addi.l		&0x6000,%d1		# add new bias
13012 	andi.w		&0x7fff,%d1
13013 	or.w		%d2,%d1			# concat new sign,new exp
13014 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13015 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13016 	mov.l		(%sp)+,%d2		# restore d2
13017 	bra.b		fneg_sd_unfl_dis
13018 
13019 #
13020 # operand WILL overflow.
13021 #
13022 fneg_sd_ovfl:
13023 	fmov.l		&0x0,%fpsr		# clear FPSR
13024 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13025 
13026 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13027 
13028 	fmov.l		&0x0,%fpcr		# clear FPCR
13029 	fmov.l		%fpsr,%d1		# save FPSR
13030 
13031 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13032 
13033 fneg_sd_ovfl_tst:
13034 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13035 
13036 	mov.b		FPCR_ENABLE(%a6),%d1
13037 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13038 	bne.b		fneg_sd_ovfl_ena	# yes
13039 
13040 #
13041 # OVFL is not enabled; therefore, we must create the default result by
13042 # calling ovf_res().
13043 #
13044 fneg_sd_ovfl_dis:
13045 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13046 	sne		%d1			# set sign param accordingly
13047 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13048 	bsr.l		ovf_res			# calculate default result
13049 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13050 	fmovm.x		(%a0),&0x80		# return default result in fp0
13051 	rts
13052 
13053 #
13054 # OVFL is enabled.
13055 # the INEX2 bit has already been updated by the round to the correct precision.
13056 # now, round to extended(and don't alter the FPSR).
13057 #
13058 fneg_sd_ovfl_ena:
13059 	mov.l		%d2,-(%sp)		# save d2
13060 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13061 	mov.l		%d1,%d2			# make a copy
13062 	andi.l		&0x7fff,%d1		# strip sign
13063 	andi.w		&0x8000,%d2		# keep old sign
13064 	sub.l		%d0,%d1			# add scale factor
13065 	subi.l		&0x6000,%d1		# subtract bias
13066 	andi.w		&0x7fff,%d1
13067 	or.w		%d2,%d1			# concat sign,exp
13068 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13069 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13070 	mov.l		(%sp)+,%d2		# restore d2
13071 	bra.b		fneg_sd_ovfl_dis
13072 
13073 #
13074 # the move in MAY underflow. so...
13075 #
13076 fneg_sd_may_ovfl:
13077 	fmov.l		&0x0,%fpsr		# clear FPSR
13078 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13079 
13080 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13081 
13082 	fmov.l		%fpsr,%d1		# save status
13083 	fmov.l		&0x0,%fpcr		# clear FPCR
13084 
13085 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13086 
13087 	fabs.x		%fp0,%fp1		# make a copy of result
13088 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13089 	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
13090 
13091 # no, it didn't overflow; we have correct result
13092 	bra.w		fneg_sd_normal_exit
13093 
13094 ##########################################################################
13095 
13096 #
13097 # input is not normalized; what is it?
13098 #
13099 fneg_not_norm:
13100 	cmpi.b		%d1,&DENORM		# weed out DENORM
13101 	beq.w		fneg_denorm
13102 	cmpi.b		%d1,&SNAN		# weed out SNAN
13103 	beq.l		res_snan_1op
13104 	cmpi.b		%d1,&QNAN		# weed out QNAN
13105 	beq.l		res_qnan_1op
13106 
13107 #
13108 # do the fneg; at this point, only possible ops are ZERO and INF.
13109 # use fneg to determine ccodes.
13110 # prec:mode should be zero at this point but it won't affect answer anyways.
13111 #
13112 	fneg.x		SRC_EX(%a0),%fp0	# do fneg
13113 	fmov.l		%fpsr,%d0
13114 	rol.l		&0x8,%d0		# put ccodes in lo byte
13115 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
13116 	rts
13117 
13118 #########################################################################
13119 # XDEF ****************************************************************	#
13120 #	ftst(): emulates the ftest instruction				#
13121 #									#
13122 # XREF ****************************************************************	#
13123 #	res{s,q}nan_1op() - set NAN result for monadic instruction	#
13124 #									#
13125 # INPUT ***************************************************************	#
13126 #	a0 = pointer to extended precision source operand		#
13127 #									#
13128 # OUTPUT **************************************************************	#
13129 #	none								#
13130 #									#
13131 # ALGORITHM ***********************************************************	#
13132 #	Check the source operand tag (STAG) and set the FPCR according	#
13133 # to the operand type and sign.						#
13134 #									#
13135 #########################################################################
13136 
13137 	global		ftst
13138 ftst:
13139 	mov.b		STAG(%a6),%d1
13140 	bne.b		ftst_not_norm		# optimize on non-norm input
13141 
13142 #
13143 # Norm:
13144 #
13145 ftst_norm:
13146 	tst.b		SRC_EX(%a0)		# is operand negative?
13147 	bmi.b		ftst_norm_m		# yes
13148 	rts
13149 ftst_norm_m:
13150 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13151 	rts
13152 
13153 #
13154 # input is not normalized; what is it?
13155 #
13156 ftst_not_norm:
13157 	cmpi.b		%d1,&ZERO		# weed out ZERO
13158 	beq.b		ftst_zero
13159 	cmpi.b		%d1,&INF		# weed out INF
13160 	beq.b		ftst_inf
13161 	cmpi.b		%d1,&SNAN		# weed out SNAN
13162 	beq.l		res_snan_1op
13163 	cmpi.b		%d1,&QNAN		# weed out QNAN
13164 	beq.l		res_qnan_1op
13165 
13166 #
13167 # Denorm:
13168 #
13169 ftst_denorm:
13170 	tst.b		SRC_EX(%a0)		# is operand negative?
13171 	bmi.b		ftst_denorm_m		# yes
13172 	rts
13173 ftst_denorm_m:
13174 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13175 	rts
13176 
13177 #
13178 # Infinity:
13179 #
13180 ftst_inf:
13181 	tst.b		SRC_EX(%a0)		# is operand negative?
13182 	bmi.b		ftst_inf_m		# yes
13183 ftst_inf_p:
13184 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13185 	rts
13186 ftst_inf_m:
13187 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13188 	rts
13189 
13190 #
13191 # Zero:
13192 #
13193 ftst_zero:
13194 	tst.b		SRC_EX(%a0)		# is operand negative?
13195 	bmi.b		ftst_zero_m		# yes
13196 ftst_zero_p:
13197 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13198 	rts
13199 ftst_zero_m:
13200 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
13201 	rts
13202 
13203 #########################################################################
13204 # XDEF ****************************************************************	#
13205 #	fint(): emulates the fint instruction				#
13206 #									#
13207 # XREF ****************************************************************	#
13208 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13209 #									#
13210 # INPUT ***************************************************************	#
13211 #	a0 = pointer to extended precision source operand		#
13212 #	d0 = round precision/mode					#
13213 #									#
13214 # OUTPUT **************************************************************	#
13215 #	fp0 = result							#
13216 #									#
13217 # ALGORITHM ***********************************************************	#
13218 #	Separate according to operand type. Unnorms don't pass through	#
13219 # here. For norms, load the rounding mode/prec, execute a "fint", then	#
13220 # store the resulting FPSR bits.					#
13221 #	For denorms, force the j-bit to a one and do the same as for	#
13222 # norms. Denorms are so low that the answer will either be a zero or a	#
13223 # one.									#
13224 #	For zeroes/infs/NANs, return the same while setting the FPSR	#
13225 # as appropriate.							#
13226 #									#
13227 #########################################################################
13228 
13229 	global		fint
13230 fint:
13231 	mov.b		STAG(%a6),%d1
13232 	bne.b		fint_not_norm		# optimize on non-norm input
13233 
13234 #
13235 # Norm:
13236 #
13237 fint_norm:
13238 	andi.b		&0x30,%d0		# set prec = ext
13239 
13240 	fmov.l		%d0,%fpcr		# set FPCR
13241 	fmov.l		&0x0,%fpsr		# clear FPSR
13242 
13243 	fint.x		SRC(%a0),%fp0		# execute fint
13244 
13245 	fmov.l		&0x0,%fpcr		# clear FPCR
13246 	fmov.l		%fpsr,%d0		# save FPSR
13247 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13248 
13249 	rts
13250 
13251 #
13252 # input is not normalized; what is it?
13253 #
13254 fint_not_norm:
13255 	cmpi.b		%d1,&ZERO		# weed out ZERO
13256 	beq.b		fint_zero
13257 	cmpi.b		%d1,&INF		# weed out INF
13258 	beq.b		fint_inf
13259 	cmpi.b		%d1,&DENORM		# weed out DENORM
13260 	beq.b		fint_denorm
13261 	cmpi.b		%d1,&SNAN		# weed out SNAN
13262 	beq.l		res_snan_1op
13263 	bra.l		res_qnan_1op		# weed out QNAN
13264 
13265 #
13266 # Denorm:
13267 #
13268 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13269 # also, the INEX2 and AINEX exception bits will be set.
13270 # so, we could either set these manually or force the DENORM
13271 # to a very small NORM and ship it to the NORM routine.
13272 # I do the latter.
13273 #
13274 fint_denorm:
13275 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13276 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13277 	lea		FP_SCR0(%a6),%a0
13278 	bra.b		fint_norm
13279 
13280 #
13281 # Zero:
13282 #
13283 fint_zero:
13284 	tst.b		SRC_EX(%a0)		# is ZERO negative?
13285 	bmi.b		fint_zero_m		# yes
13286 fint_zero_p:
13287 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13288 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13289 	rts
13290 fint_zero_m:
13291 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13292 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13293 	rts
13294 
13295 #
13296 # Infinity:
13297 #
13298 fint_inf:
13299 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13300 	tst.b		SRC_EX(%a0)		# is INF negative?
13301 	bmi.b		fint_inf_m		# yes
13302 fint_inf_p:
13303 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13304 	rts
13305 fint_inf_m:
13306 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13307 	rts
13308 
13309 #########################################################################
13310 # XDEF ****************************************************************	#
13311 #	fintrz(): emulates the fintrz instruction			#
13312 #									#
13313 # XREF ****************************************************************	#
13314 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13315 #									#
13316 # INPUT ***************************************************************	#
13317 #	a0 = pointer to extended precision source operand		#
13318 #	d0 = round precision/mode					#
13319 #									#
13320 # OUTPUT **************************************************************	#
13321 #	fp0 = result							#
13322 #									#
13323 # ALGORITHM ***********************************************************	#
13324 #	Separate according to operand type. Unnorms don't pass through	#
13325 # here. For norms, load the rounding mode/prec, execute a "fintrz",	#
13326 # then store the resulting FPSR bits.					#
13327 #	For denorms, force the j-bit to a one and do the same as for	#
13328 # norms. Denorms are so low that the answer will either be a zero or a	#
13329 # one.									#
13330 #	For zeroes/infs/NANs, return the same while setting the FPSR	#
13331 # as appropriate.							#
13332 #									#
13333 #########################################################################
13334 
13335 	global		fintrz
13336 fintrz:
13337 	mov.b		STAG(%a6),%d1
13338 	bne.b		fintrz_not_norm		# optimize on non-norm input
13339 
13340 #
13341 # Norm:
13342 #
13343 fintrz_norm:
13344 	fmov.l		&0x0,%fpsr		# clear FPSR
13345 
13346 	fintrz.x	SRC(%a0),%fp0		# execute fintrz
13347 
13348 	fmov.l		%fpsr,%d0		# save FPSR
13349 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13350 
13351 	rts
13352 
13353 #
13354 # input is not normalized; what is it?
13355 #
13356 fintrz_not_norm:
13357 	cmpi.b		%d1,&ZERO		# weed out ZERO
13358 	beq.b		fintrz_zero
13359 	cmpi.b		%d1,&INF		# weed out INF
13360 	beq.b		fintrz_inf
13361 	cmpi.b		%d1,&DENORM		# weed out DENORM
13362 	beq.b		fintrz_denorm
13363 	cmpi.b		%d1,&SNAN		# weed out SNAN
13364 	beq.l		res_snan_1op
13365 	bra.l		res_qnan_1op		# weed out QNAN
13366 
13367 #
13368 # Denorm:
13369 #
13370 # for DENORMs, the result will be (+/-)ZERO.
13371 # also, the INEX2 and AINEX exception bits will be set.
13372 # so, we could either set these manually or force the DENORM
13373 # to a very small NORM and ship it to the NORM routine.
13374 # I do the latter.
13375 #
13376 fintrz_denorm:
13377 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13378 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13379 	lea		FP_SCR0(%a6),%a0
13380 	bra.b		fintrz_norm
13381 
13382 #
13383 # Zero:
13384 #
13385 fintrz_zero:
13386 	tst.b		SRC_EX(%a0)		# is ZERO negative?
13387 	bmi.b		fintrz_zero_m		# yes
13388 fintrz_zero_p:
13389 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13390 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13391 	rts
13392 fintrz_zero_m:
13393 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13394 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13395 	rts
13396 
13397 #
13398 # Infinity:
13399 #
13400 fintrz_inf:
13401 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13402 	tst.b		SRC_EX(%a0)		# is INF negative?
13403 	bmi.b		fintrz_inf_m		# yes
13404 fintrz_inf_p:
13405 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13406 	rts
13407 fintrz_inf_m:
13408 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13409 	rts
13410 
13411 #########################################################################
13412 # XDEF ****************************************************************	#
13413 #	fabs():  emulates the fabs instruction				#
13414 #	fsabs(): emulates the fsabs instruction				#
13415 #	fdabs(): emulates the fdabs instruction				#
13416 #									#
13417 # XREF **************************************************************** #
13418 #	norm() - normalize denorm mantissa to provide EXOP		#
13419 #	scale_to_zero_src() - make exponent. = 0; get scale factor	#
13420 #	unf_res() - calculate underflow result				#
13421 #	ovf_res() - calculate overflow result				#
13422 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13423 #									#
13424 # INPUT *************************************************************** #
13425 #	a0 = pointer to extended precision source operand		#
13426 #	d0 = rnd precision/mode						#
13427 #									#
13428 # OUTPUT ************************************************************** #
13429 #	fp0 = result							#
13430 #	fp1 = EXOP (if exception occurred)				#
13431 #									#
13432 # ALGORITHM ***********************************************************	#
13433 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
13434 # norms into extended, single, and double precision.			#
13435 #	Simply clear sign for extended precision norm. Ext prec denorm	#
13436 # gets an EXOP created for it since it's an underflow.			#
13437 #	Double and single precision can overflow and underflow. First,	#
13438 # scale the operand such that the exponent is zero. Perform an "fabs"	#
13439 # using the correct rnd mode/prec. Check to see if the original		#
13440 # exponent would take an exception. If so, use unf_res() or ovf_res()	#
13441 # to calculate the default result. Also, create the EXOP for the	#
13442 # exceptional case. If no exception should occur, insert the correct	#
13443 # result exponent and return.						#
13444 #	Unnorms don't pass through here.				#
13445 #									#
13446 #########################################################################
13447 
13448 	global		fsabs
13449 fsabs:
13450 	andi.b		&0x30,%d0		# clear rnd prec
13451 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
13452 	bra.b		fabs
13453 
13454 	global		fdabs
13455 fdabs:
13456 	andi.b		&0x30,%d0		# clear rnd prec
13457 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
13458 
13459 	global		fabs
13460 fabs:
13461 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13462 	mov.b		STAG(%a6),%d1
13463 	bne.w		fabs_not_norm		# optimize on non-norm input
13464 
13465 #
13466 # ABSOLUTE VALUE: norms and denorms ONLY!
13467 #
13468 fabs_norm:
13469 	andi.b		&0xc0,%d0		# is precision extended?
13470 	bne.b		fabs_not_ext		# no; go handle sgl or dbl
13471 
13472 #
13473 # precision selected is extended. so...we can not get an underflow
13474 # or overflow because of rounding to the correct precision. so...
13475 # skip the scaling and unscaling...
13476 #
13477 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13478 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13479 	mov.w		SRC_EX(%a0),%d1
13480 	bclr		&15,%d1			# force absolute value
13481 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
13482 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13483 	rts
13484 
13485 #
13486 # for an extended precision DENORM, the UNFL exception bit is set
13487 # the accrued bit is NOT set in this instance(no inexactness!)
13488 #
13489 fabs_denorm:
13490 	andi.b		&0xc0,%d0		# is precision extended?
13491 	bne.b		fabs_not_ext		# no
13492 
13493 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13494 
13495 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13496 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13497 	mov.w		SRC_EX(%a0),%d0
13498 	bclr		&15,%d0			# clear sign
13499 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
13500 
13501 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13502 
13503 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13504 	bne.b		fabs_ext_unfl_ena
13505 	rts
13506 
13507 #
13508 # the input is an extended DENORM and underflow is enabled in the FPCR.
13509 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
13510 # exponent and insert back into the operand.
13511 #
13512 fabs_ext_unfl_ena:
13513 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
13514 	bsr.l		norm			# normalize result
13515 	neg.w		%d0			# new exponent = -(shft val)
13516 	addi.w		&0x6000,%d0		# add new bias to exponent
13517 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
13518 	andi.w		&0x8000,%d1		# keep old sign
13519 	andi.w		&0x7fff,%d0		# clear sign position
13520 	or.w		%d1,%d0			# concat old sign, new exponent
13521 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
13522 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13523 	rts
13524 
13525 #
13526 # operand is either single or double
13527 #
13528 fabs_not_ext:
13529 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
13530 	bne.b		fabs_dbl
13531 
13532 #
13533 # operand is to be rounded to single precision
13534 #
13535 fabs_sgl:
13536 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13537 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13538 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13539 	bsr.l		scale_to_zero_src	# calculate scale factor
13540 
13541 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
13542 	bge.w		fabs_sd_unfl		# yes; go handle underflow
13543 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
13544 	beq.w		fabs_sd_may_ovfl	# maybe; go check
13545 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13546 
13547 #
13548 # operand will NOT overflow or underflow when moved in to the fp reg file
13549 #
13550 fabs_sd_normal:
13551 	fmov.l		&0x0,%fpsr		# clear FPSR
13552 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13553 
13554 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13555 
13556 	fmov.l		%fpsr,%d1		# save FPSR
13557 	fmov.l		&0x0,%fpcr		# clear FPCR
13558 
13559 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13560 
13561 fabs_sd_normal_exit:
13562 	mov.l		%d2,-(%sp)		# save d2
13563 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
13564 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
13565 	mov.l		%d1,%d2			# make a copy
13566 	andi.l		&0x7fff,%d1		# strip sign
13567 	sub.l		%d0,%d1			# add scale factor
13568 	andi.w		&0x8000,%d2		# keep old sign
13569 	or.w		%d1,%d2			# concat old sign,new exp
13570 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
13571 	mov.l		(%sp)+,%d2		# restore d2
13572 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13573 	rts
13574 
13575 #
13576 # operand is to be rounded to double precision
13577 #
13578 fabs_dbl:
13579 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13580 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13581 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13582 	bsr.l		scale_to_zero_src	# calculate scale factor
13583 
13584 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
13585 	bge.b		fabs_sd_unfl		# yes; go handle underflow
13586 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
13587 	beq.w		fabs_sd_may_ovfl	# maybe; go check
13588 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13589 	bra.w		fabs_sd_normal		# no; ho handle normalized op
13590 
13591 #
13592 # operand WILL underflow when moved in to the fp register file
13593 #
13594 fabs_sd_unfl:
13595 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13596 
13597 	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
13598 
13599 # if underflow or inexact is enabled, go calculate EXOP first.
13600 	mov.b		FPCR_ENABLE(%a6),%d1
13601 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
13602 	bne.b		fabs_sd_unfl_ena	# yes
13603 
13604 fabs_sd_unfl_dis:
13605 	lea		FP_SCR0(%a6),%a0	# pass: result addr
13606 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
13607 	bsr.l		unf_res			# calculate default result
13608 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
13609 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13610 	rts
13611 
13612 #
13613 # operand will underflow AND underflow is enabled.
13614 # Therefore, we must return the result rounded to extended precision.
13615 #
13616 fabs_sd_unfl_ena:
13617 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13618 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13619 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13620 
13621 	mov.l		%d2,-(%sp)		# save d2
13622 	mov.l		%d1,%d2			# make a copy
13623 	andi.l		&0x7fff,%d1		# strip sign
13624 	andi.w		&0x8000,%d2		# keep old sign
13625 	sub.l		%d0,%d1			# subtract scale factor
13626 	addi.l		&0x6000,%d1		# add new bias
13627 	andi.w		&0x7fff,%d1
13628 	or.w		%d2,%d1			# concat new sign,new exp
13629 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13630 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13631 	mov.l		(%sp)+,%d2		# restore d2
13632 	bra.b		fabs_sd_unfl_dis
13633 
13634 #
13635 # operand WILL overflow.
13636 #
13637 fabs_sd_ovfl:
13638 	fmov.l		&0x0,%fpsr		# clear FPSR
13639 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13640 
13641 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13642 
13643 	fmov.l		&0x0,%fpcr		# clear FPCR
13644 	fmov.l		%fpsr,%d1		# save FPSR
13645 
13646 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13647 
13648 fabs_sd_ovfl_tst:
13649 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13650 
13651 	mov.b		FPCR_ENABLE(%a6),%d1
13652 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13653 	bne.b		fabs_sd_ovfl_ena	# yes
13654 
13655 #
13656 # OVFL is not enabled; therefore, we must create the default result by
13657 # calling ovf_res().
13658 #
13659 fabs_sd_ovfl_dis:
13660 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13661 	sne		%d1			# set sign param accordingly
13662 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13663 	bsr.l		ovf_res			# calculate default result
13664 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13665 	fmovm.x		(%a0),&0x80		# return default result in fp0
13666 	rts
13667 
13668 #
13669 # OVFL is enabled.
13670 # the INEX2 bit has already been updated by the round to the correct precision.
13671 # now, round to extended(and don't alter the FPSR).
13672 #
13673 fabs_sd_ovfl_ena:
13674 	mov.l		%d2,-(%sp)		# save d2
13675 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13676 	mov.l		%d1,%d2			# make a copy
13677 	andi.l		&0x7fff,%d1		# strip sign
13678 	andi.w		&0x8000,%d2		# keep old sign
13679 	sub.l		%d0,%d1			# add scale factor
13680 	subi.l		&0x6000,%d1		# subtract bias
13681 	andi.w		&0x7fff,%d1
13682 	or.w		%d2,%d1			# concat sign,exp
13683 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13684 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13685 	mov.l		(%sp)+,%d2		# restore d2
13686 	bra.b		fabs_sd_ovfl_dis
13687 
13688 #
13689 # the move in MAY underflow. so...
13690 #
13691 fabs_sd_may_ovfl:
13692 	fmov.l		&0x0,%fpsr		# clear FPSR
13693 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13694 
13695 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13696 
13697 	fmov.l		%fpsr,%d1		# save status
13698 	fmov.l		&0x0,%fpcr		# clear FPCR
13699 
13700 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13701 
13702 	fabs.x		%fp0,%fp1		# make a copy of result
13703 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13704 	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
13705 
13706 # no, it didn't overflow; we have correct result
13707 	bra.w		fabs_sd_normal_exit
13708 
13709 ##########################################################################
13710 
13711 #
13712 # input is not normalized; what is it?
13713 #
13714 fabs_not_norm:
13715 	cmpi.b		%d1,&DENORM		# weed out DENORM
13716 	beq.w		fabs_denorm
13717 	cmpi.b		%d1,&SNAN		# weed out SNAN
13718 	beq.l		res_snan_1op
13719 	cmpi.b		%d1,&QNAN		# weed out QNAN
13720 	beq.l		res_qnan_1op
13721 
13722 	fabs.x		SRC(%a0),%fp0		# force absolute value
13723 
13724 	cmpi.b		%d1,&INF		# weed out INF
13725 	beq.b		fabs_inf
13726 fabs_zero:
13727 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13728 	rts
13729 fabs_inf:
13730 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13731 	rts
13732 
13733 #########################################################################
13734 # XDEF ****************************************************************	#
13735 #	fcmp(): fp compare op routine					#
13736 #									#
13737 # XREF ****************************************************************	#
13738 #	res_qnan() - return QNAN result					#
13739 #	res_snan() - return SNAN result					#
13740 #									#
13741 # INPUT ***************************************************************	#
13742 #	a0 = pointer to extended precision source operand		#
13743 #	a1 = pointer to extended precision destination operand		#
13744 #	d0 = round prec/mode						#
13745 #									#
13746 # OUTPUT ************************************************************** #
13747 #	None								#
13748 #									#
13749 # ALGORITHM ***********************************************************	#
13750 #	Handle NANs and denorms as special cases. For everything else,	#
13751 # just use the actual fcmp instruction to produce the correct condition	#
13752 # codes.								#
13753 #									#
13754 #########################################################################
13755 
13756 	global		fcmp
13757 fcmp:
13758 	clr.w		%d1
13759 	mov.b		DTAG(%a6),%d1
13760 	lsl.b		&0x3,%d1
13761 	or.b		STAG(%a6),%d1
13762 	bne.b		fcmp_not_norm		# optimize on non-norm input
13763 
13764 #
13765 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13766 #
13767 fcmp_norm:
13768 	fmovm.x		DST(%a1),&0x80		# load dst op
13769 
13770 	fcmp.x		%fp0,SRC(%a0)		# do compare
13771 
13772 	fmov.l		%fpsr,%d0		# save FPSR
13773 	rol.l		&0x8,%d0		# extract ccode bits
13774 	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
13775 
13776 	rts
13777 
13778 #
13779 # fcmp: inputs are not both normalized; what are they?
13780 #
13781 fcmp_not_norm:
13782 	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13783 	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
13784 
13785 	swbeg		&48
13786 tbl_fcmp_op:
13787 	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
13788 	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
13789 	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
13790 	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
13791 	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
13792 	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
13793 	short		tbl_fcmp_op	- tbl_fcmp_op #
13794 	short		tbl_fcmp_op	- tbl_fcmp_op #
13795 
13796 	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
13797 	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
13798 	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
13799 	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
13800 	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
13801 	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
13802 	short		tbl_fcmp_op	- tbl_fcmp_op #
13803 	short		tbl_fcmp_op	- tbl_fcmp_op #
13804 
13805 	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
13806 	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
13807 	short		fcmp_norm	- tbl_fcmp_op # INF - INF
13808 	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
13809 	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
13810 	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
13811 	short		tbl_fcmp_op	- tbl_fcmp_op #
13812 	short		tbl_fcmp_op	- tbl_fcmp_op #
13813 
13814 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
13815 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
13816 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
13817 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
13818 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
13819 	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
13820 	short		tbl_fcmp_op	- tbl_fcmp_op #
13821 	short		tbl_fcmp_op	- tbl_fcmp_op #
13822 
13823 	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
13824 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
13825 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
13826 	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
13827 	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
13828 	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
13829 	short		tbl_fcmp_op	- tbl_fcmp_op #
13830 	short		tbl_fcmp_op	- tbl_fcmp_op #
13831 
13832 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
13833 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
13834 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
13835 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
13836 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
13837 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
13838 	short		tbl_fcmp_op	- tbl_fcmp_op #
13839 	short		tbl_fcmp_op	- tbl_fcmp_op #
13840 
13841 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13842 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13843 fcmp_res_qnan:
13844 	bsr.l		res_qnan
13845 	andi.b		&0xf7,FPSR_CC(%a6)
13846 	rts
13847 fcmp_res_snan:
13848 	bsr.l		res_snan
13849 	andi.b		&0xf7,FPSR_CC(%a6)
13850 	rts
13851 
13852 #
13853 # DENORMs are a little more difficult.
13854 # If you have a 2 DENORMs, then you can just force the j-bit to a one
13855 # and use the fcmp_norm routine.
13856 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13857 # and use the fcmp_norm routine.
13858 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13859 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
13860 # (1) signs are (+) and the DENORM is the dst or
13861 # (2) signs are (-) and the DENORM is the src
13862 #
13863 
13864 fcmp_dnrm_s:
13865 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13866 	mov.l		SRC_HI(%a0),%d0
13867 	bset		&31,%d0			# DENORM src; make into small norm
13868 	mov.l		%d0,FP_SCR0_HI(%a6)
13869 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13870 	lea		FP_SCR0(%a6),%a0
13871 	bra.w		fcmp_norm
13872 
13873 fcmp_dnrm_d:
13874 	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
13875 	mov.l		DST_HI(%a1),%d0
13876 	bset		&31,%d0			# DENORM src; make into small norm
13877 	mov.l		%d0,FP_SCR0_HI(%a6)
13878 	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
13879 	lea		FP_SCR0(%a6),%a1
13880 	bra.w		fcmp_norm
13881 
13882 fcmp_dnrm_sd:
13883 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13884 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13885 	mov.l		DST_HI(%a1),%d0
13886 	bset		&31,%d0			# DENORM dst; make into small norm
13887 	mov.l		%d0,FP_SCR1_HI(%a6)
13888 	mov.l		SRC_HI(%a0),%d0
13889 	bset		&31,%d0			# DENORM dst; make into small norm
13890 	mov.l		%d0,FP_SCR0_HI(%a6)
13891 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13892 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13893 	lea		FP_SCR1(%a6),%a1
13894 	lea		FP_SCR0(%a6),%a0
13895 	bra.w		fcmp_norm
13896 
13897 fcmp_nrm_dnrm:
13898 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13899 	mov.b		DST_EX(%a1),%d1
13900 	eor.b		%d0,%d1
13901 	bmi.w		fcmp_dnrm_s
13902 
13903 # signs are the same, so must determine the answer ourselves.
13904 	tst.b		%d0			# is src op negative?
13905 	bmi.b		fcmp_nrm_dnrm_m		# yes
13906 	rts
13907 fcmp_nrm_dnrm_m:
13908 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13909 	rts
13910 
13911 fcmp_dnrm_nrm:
13912 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13913 	mov.b		DST_EX(%a1),%d1
13914 	eor.b		%d0,%d1
13915 	bmi.w		fcmp_dnrm_d
13916 
13917 # signs are the same, so must determine the answer ourselves.
13918 	tst.b		%d0			# is src op negative?
13919 	bpl.b		fcmp_dnrm_nrm_m		# no
13920 	rts
13921 fcmp_dnrm_nrm_m:
13922 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13923 	rts
13924 
13925 #########################################################################
13926 # XDEF ****************************************************************	#
13927 #	fsglmul(): emulates the fsglmul instruction			#
13928 #									#
13929 # XREF ****************************************************************	#
13930 #	scale_to_zero_src() - scale src exponent to zero		#
13931 #	scale_to_zero_dst() - scale dst exponent to zero		#
13932 #	unf_res4() - return default underflow result for sglop		#
13933 #	ovf_res() - return default overflow result			#
13934 #	res_qnan() - return QNAN result					#
13935 #	res_snan() - return SNAN result					#
13936 #									#
13937 # INPUT ***************************************************************	#
13938 #	a0 = pointer to extended precision source operand		#
13939 #	a1 = pointer to extended precision destination operand		#
13940 #	d0  rnd prec,mode						#
13941 #									#
13942 # OUTPUT **************************************************************	#
13943 #	fp0 = result							#
13944 #	fp1 = EXOP (if exception occurred)				#
13945 #									#
13946 # ALGORITHM ***********************************************************	#
13947 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
13948 # norms/denorms into ext/sgl/dbl precision.				#
13949 #	For norms/denorms, scale the exponents such that a multiply	#
13950 # instruction won't cause an exception. Use the regular fsglmul to	#
13951 # compute a result. Check if the regular operands would have taken	#
13952 # an exception. If so, return the default overflow/underflow result	#
13953 # and return the EXOP if exceptions are enabled. Else, scale the	#
13954 # result operand to the proper exponent.				#
13955 #									#
13956 #########################################################################
13957 
13958 	global		fsglmul
13959 fsglmul:
13960 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13961 
13962 	clr.w		%d1
13963 	mov.b		DTAG(%a6),%d1
13964 	lsl.b		&0x3,%d1
13965 	or.b		STAG(%a6),%d1
13966 
13967 	bne.w		fsglmul_not_norm	# optimize on non-norm input
13968 
13969 fsglmul_norm:
13970 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13971 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
13972 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13973 
13974 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13975 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13976 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13977 
13978 	bsr.l		scale_to_zero_src	# scale exponent
13979 	mov.l		%d0,-(%sp)		# save scale factor 1
13980 
13981 	bsr.l		scale_to_zero_dst	# scale dst exponent
13982 
13983 	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
13984 
13985 	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
13986 	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
13987 	blt.w		fsglmul_ovfl		# result will overflow
13988 
13989 	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
13990 	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
13991 	bgt.w		fsglmul_unfl		# result will underflow
13992 
13993 fsglmul_normal:
13994 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
13995 
13996 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13997 	fmov.l		&0x0,%fpsr		# clear FPSR
13998 
13999 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14000 
14001 	fmov.l		%fpsr,%d1		# save status
14002 	fmov.l		&0x0,%fpcr		# clear FPCR
14003 
14004 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14005 
14006 fsglmul_normal_exit:
14007 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14008 	mov.l		%d2,-(%sp)		# save d2
14009 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14010 	mov.l		%d1,%d2			# make a copy
14011 	andi.l		&0x7fff,%d1		# strip sign
14012 	andi.w		&0x8000,%d2		# keep old sign
14013 	sub.l		%d0,%d1			# add scale factor
14014 	or.w		%d2,%d1			# concat old sign,new exp
14015 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14016 	mov.l		(%sp)+,%d2		# restore d2
14017 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14018 	rts
14019 
14020 fsglmul_ovfl:
14021 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14022 
14023 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14024 	fmov.l		&0x0,%fpsr		# clear FPSR
14025 
14026 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14027 
14028 	fmov.l		%fpsr,%d1		# save status
14029 	fmov.l		&0x0,%fpcr		# clear FPCR
14030 
14031 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14032 
14033 fsglmul_ovfl_tst:
14034 
14035 # save setting this until now because this is where fsglmul_may_ovfl may jump in
14036 	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14037 
14038 	mov.b		FPCR_ENABLE(%a6),%d1
14039 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14040 	bne.b		fsglmul_ovfl_ena	# yes
14041 
14042 fsglmul_ovfl_dis:
14043 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14044 	sne		%d1			# set sign param accordingly
14045 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14046 	andi.b		&0x30,%d0		# force prec = ext
14047 	bsr.l		ovf_res			# calculate default result
14048 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14049 	fmovm.x		(%a0),&0x80		# return default result in fp0
14050 	rts
14051 
14052 fsglmul_ovfl_ena:
14053 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14054 
14055 	mov.l		%d2,-(%sp)		# save d2
14056 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14057 	mov.l		%d1,%d2			# make a copy
14058 	andi.l		&0x7fff,%d1		# strip sign
14059 	sub.l		%d0,%d1			# add scale factor
14060 	subi.l		&0x6000,%d1		# subtract bias
14061 	andi.w		&0x7fff,%d1
14062 	andi.w		&0x8000,%d2		# keep old sign
14063 	or.w		%d2,%d1			# concat old sign,new exp
14064 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14065 	mov.l		(%sp)+,%d2		# restore d2
14066 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14067 	bra.b		fsglmul_ovfl_dis
14068 
14069 fsglmul_may_ovfl:
14070 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14071 
14072 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14073 	fmov.l		&0x0,%fpsr		# clear FPSR
14074 
14075 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14076 
14077 	fmov.l		%fpsr,%d1		# save status
14078 	fmov.l		&0x0,%fpcr		# clear FPCR
14079 
14080 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14081 
14082 	fabs.x		%fp0,%fp1		# make a copy of result
14083 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
14084 	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
14085 
14086 # no, it didn't overflow; we have correct result
14087 	bra.w		fsglmul_normal_exit
14088 
14089 fsglmul_unfl:
14090 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14091 
14092 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14093 
14094 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14095 	fmov.l		&0x0,%fpsr		# clear FPSR
14096 
14097 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14098 
14099 	fmov.l		%fpsr,%d1		# save status
14100 	fmov.l		&0x0,%fpcr		# clear FPCR
14101 
14102 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14103 
14104 	mov.b		FPCR_ENABLE(%a6),%d1
14105 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14106 	bne.b		fsglmul_unfl_ena	# yes
14107 
14108 fsglmul_unfl_dis:
14109 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14110 
14111 	lea		FP_SCR0(%a6),%a0	# pass: result addr
14112 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14113 	bsr.l		unf_res4		# calculate default result
14114 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14115 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14116 	rts
14117 
14118 #
14119 # UNFL is enabled.
14120 #
14121 fsglmul_unfl_ena:
14122 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14123 
14124 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14125 	fmov.l		&0x0,%fpsr		# clear FPSR
14126 
14127 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14128 
14129 	fmov.l		&0x0,%fpcr		# clear FPCR
14130 
14131 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14132 	mov.l		%d2,-(%sp)		# save d2
14133 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14134 	mov.l		%d1,%d2			# make a copy
14135 	andi.l		&0x7fff,%d1		# strip sign
14136 	andi.w		&0x8000,%d2		# keep old sign
14137 	sub.l		%d0,%d1			# add scale factor
14138 	addi.l		&0x6000,%d1		# add bias
14139 	andi.w		&0x7fff,%d1
14140 	or.w		%d2,%d1			# concat old sign,new exp
14141 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14142 	mov.l		(%sp)+,%d2		# restore d2
14143 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14144 	bra.w		fsglmul_unfl_dis
14145 
14146 fsglmul_may_unfl:
14147 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14148 
14149 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14150 	fmov.l		&0x0,%fpsr		# clear FPSR
14151 
14152 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14153 
14154 	fmov.l		%fpsr,%d1		# save status
14155 	fmov.l		&0x0,%fpcr		# clear FPCR
14156 
14157 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14158 
14159 	fabs.x		%fp0,%fp1		# make a copy of result
14160 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
14161 	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
14162 	fblt.w		fsglmul_unfl		# yes; underflow occurred
14163 
14164 #
14165 # we still don't know if underflow occurred. result is ~ equal to 2. but,
14166 # we don't know if the result was an underflow that rounded up to a 2 or
14167 # a normalized number that rounded down to a 2. so, redo the entire operation
14168 # using RZ as the rounding mode to see what the pre-rounded result is.
14169 # this case should be relatively rare.
14170 #
14171 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14172 
14173 	mov.l		L_SCR3(%a6),%d1
14174 	andi.b		&0xc0,%d1		# keep rnd prec
14175 	ori.b		&rz_mode*0x10,%d1	# insert RZ
14176 
14177 	fmov.l		%d1,%fpcr		# set FPCR
14178 	fmov.l		&0x0,%fpsr		# clear FPSR
14179 
14180 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14181 
14182 	fmov.l		&0x0,%fpcr		# clear FPCR
14183 	fabs.x		%fp1			# make absolute value
14184 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
14185 	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
14186 	bra.w		fsglmul_unfl		# yes, underflow occurred
14187 
14188 ##############################################################################
14189 
14190 #
14191 # Single Precision Multiply: inputs are not both normalized; what are they?
14192 #
14193 fsglmul_not_norm:
14194 	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14195 	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
14196 
14197 	swbeg		&48
14198 tbl_fsglmul_op:
14199 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14200 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14201 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14202 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14203 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14204 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14205 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14206 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14207 
14208 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
14209 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
14210 	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
14211 	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
14212 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
14213 	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
14214 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14215 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14216 
14217 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
14218 	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
14219 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
14220 	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
14221 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
14222 	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
14223 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14224 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14225 
14226 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
14227 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
14228 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
14229 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
14230 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
14231 	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
14232 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14233 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14234 
14235 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14236 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14237 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14238 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14239 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14240 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14241 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14242 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14243 
14244 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
14245 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
14246 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
14247 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
14248 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
14249 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
14250 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14251 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14252 
14253 fsglmul_res_operr:
14254 	bra.l		res_operr
14255 fsglmul_res_snan:
14256 	bra.l		res_snan
14257 fsglmul_res_qnan:
14258 	bra.l		res_qnan
14259 fsglmul_zero:
14260 	bra.l		fmul_zero
14261 fsglmul_inf_src:
14262 	bra.l		fmul_inf_src
14263 fsglmul_inf_dst:
14264 	bra.l		fmul_inf_dst
14265 
14266 #########################################################################
14267 # XDEF ****************************************************************	#
14268 #	fsgldiv(): emulates the fsgldiv instruction			#
14269 #									#
14270 # XREF ****************************************************************	#
14271 #	scale_to_zero_src() - scale src exponent to zero		#
14272 #	scale_to_zero_dst() - scale dst exponent to zero		#
14273 #	unf_res4() - return default underflow result for sglop		#
14274 #	ovf_res() - return default overflow result			#
14275 #	res_qnan() - return QNAN result					#
14276 #	res_snan() - return SNAN result					#
14277 #									#
14278 # INPUT ***************************************************************	#
14279 #	a0 = pointer to extended precision source operand		#
14280 #	a1 = pointer to extended precision destination operand		#
14281 #	d0  rnd prec,mode						#
14282 #									#
14283 # OUTPUT **************************************************************	#
14284 #	fp0 = result							#
14285 #	fp1 = EXOP (if exception occurred)				#
14286 #									#
14287 # ALGORITHM ***********************************************************	#
14288 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
14289 # norms/denorms into ext/sgl/dbl precision.				#
14290 #	For norms/denorms, scale the exponents such that a divide	#
14291 # instruction won't cause an exception. Use the regular fsgldiv to	#
14292 # compute a result. Check if the regular operands would have taken	#
14293 # an exception. If so, return the default overflow/underflow result	#
14294 # and return the EXOP if exceptions are enabled. Else, scale the	#
14295 # result operand to the proper exponent.				#
14296 #									#
14297 #########################################################################
14298 
14299 	global		fsgldiv
14300 fsgldiv:
14301 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14302 
14303 	clr.w		%d1
14304 	mov.b		DTAG(%a6),%d1
14305 	lsl.b		&0x3,%d1
14306 	or.b		STAG(%a6),%d1		# combine src tags
14307 
14308 	bne.w		fsgldiv_not_norm	# optimize on non-norm input
14309 
14310 #
14311 # DIVIDE: NORMs and DENORMs ONLY!
14312 #
14313 fsgldiv_norm:
14314 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
14315 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
14316 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
14317 
14318 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14319 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
14320 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
14321 
14322 	bsr.l		scale_to_zero_src	# calculate scale factor 1
14323 	mov.l		%d0,-(%sp)		# save scale factor 1
14324 
14325 	bsr.l		scale_to_zero_dst	# calculate scale factor 2
14326 
14327 	neg.l		(%sp)			# S.F. = scale1 - scale2
14328 	add.l		%d0,(%sp)
14329 
14330 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
14331 	lsr.b		&0x6,%d1
14332 	mov.l		(%sp)+,%d0
14333 	cmpi.l		%d0,&0x3fff-0x7ffe
14334 	ble.w		fsgldiv_may_ovfl
14335 
14336 	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
14337 	beq.w		fsgldiv_may_unfl	# maybe
14338 	bgt.w		fsgldiv_unfl		# yes; go handle underflow
14339 
14340 fsgldiv_normal:
14341 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14342 
14343 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
14344 	fmov.l		&0x0,%fpsr		# clear FPSR
14345 
14346 	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
14347 
14348 	fmov.l		%fpsr,%d1		# save FPSR
14349 	fmov.l		&0x0,%fpcr		# clear FPCR
14350 
14351 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14352 
14353 fsgldiv_normal_exit:
14354 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
14355 	mov.l		%d2,-(%sp)		# save d2
14356 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14357 	mov.l		%d1,%d2			# make a copy
14358 	andi.l		&0x7fff,%d1		# strip sign
14359 	andi.w		&0x8000,%d2		# keep old sign
14360 	sub.l		%d0,%d1			# add scale factor
14361 	or.w		%d2,%d1			# concat old sign,new exp
14362 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14363 	mov.l		(%sp)+,%d2		# restore d2
14364 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14365 	rts
14366 
14367 fsgldiv_may_ovfl:
14368 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14369 
14370 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14371 	fmov.l		&0x0,%fpsr		# set FPSR
14372 
14373 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
14374 
14375 	fmov.l		%fpsr,%d1
14376 	fmov.l		&0x0,%fpcr
14377 
14378 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14379 
14380 	fmovm.x		&0x01,-(%sp)		# save result to stack
14381 	mov.w		(%sp),%d1		# fetch new exponent
14382 	add.l		&0xc,%sp		# clear result
14383 	andi.l		&0x7fff,%d1		# strip sign
14384 	sub.l		%d0,%d1			# add scale factor
14385 	cmp.l		%d1,&0x7fff		# did divide overflow?
14386 	blt.b		fsgldiv_normal_exit
14387 
14388 fsgldiv_ovfl_tst:
14389 	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14390 
14391 	mov.b		FPCR_ENABLE(%a6),%d1
14392 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14393 	bne.b		fsgldiv_ovfl_ena	# yes
14394 
14395 fsgldiv_ovfl_dis:
14396 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
14397 	sne		%d1			# set sign param accordingly
14398 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14399 	andi.b		&0x30,%d0		# kill precision
14400 	bsr.l		ovf_res			# calculate default result
14401 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
14402 	fmovm.x		(%a0),&0x80		# return default result in fp0
14403 	rts
14404 
14405 fsgldiv_ovfl_ena:
14406 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14407 
14408 	mov.l		%d2,-(%sp)		# save d2
14409 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14410 	mov.l		%d1,%d2			# make a copy
14411 	andi.l		&0x7fff,%d1		# strip sign
14412 	andi.w		&0x8000,%d2		# keep old sign
14413 	sub.l		%d0,%d1			# add scale factor
14414 	subi.l		&0x6000,%d1		# subtract new bias
14415 	andi.w		&0x7fff,%d1		# clear ms bit
14416 	or.w		%d2,%d1			# concat old sign,new exp
14417 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14418 	mov.l		(%sp)+,%d2		# restore d2
14419 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14420 	bra.b		fsgldiv_ovfl_dis
14421 
14422 fsgldiv_unfl:
14423 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14424 
14425 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14426 
14427 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14428 	fmov.l		&0x0,%fpsr		# clear FPSR
14429 
14430 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14431 
14432 	fmov.l		%fpsr,%d1		# save status
14433 	fmov.l		&0x0,%fpcr		# clear FPCR
14434 
14435 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14436 
14437 	mov.b		FPCR_ENABLE(%a6),%d1
14438 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14439 	bne.b		fsgldiv_unfl_ena	# yes
14440 
14441 fsgldiv_unfl_dis:
14442 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14443 
14444 	lea		FP_SCR0(%a6),%a0	# pass: result addr
14445 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14446 	bsr.l		unf_res4		# calculate default result
14447 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14448 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14449 	rts
14450 
14451 #
14452 # UNFL is enabled.
14453 #
14454 fsgldiv_unfl_ena:
14455 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14456 
14457 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14458 	fmov.l		&0x0,%fpsr		# clear FPSR
14459 
14460 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14461 
14462 	fmov.l		&0x0,%fpcr		# clear FPCR
14463 
14464 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14465 	mov.l		%d2,-(%sp)		# save d2
14466 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14467 	mov.l		%d1,%d2			# make a copy
14468 	andi.l		&0x7fff,%d1		# strip sign
14469 	andi.w		&0x8000,%d2		# keep old sign
14470 	sub.l		%d0,%d1			# add scale factor
14471 	addi.l		&0x6000,%d1		# add bias
14472 	andi.w		&0x7fff,%d1		# clear top bit
14473 	or.w		%d2,%d1			# concat old sign, new exp
14474 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14475 	mov.l		(%sp)+,%d2		# restore d2
14476 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14477 	bra.b		fsgldiv_unfl_dis
14478 
14479 #
14480 # the divide operation MAY underflow:
14481 #
14482 fsgldiv_may_unfl:
14483 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14484 
14485 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14486 	fmov.l		&0x0,%fpsr		# clear FPSR
14487 
14488 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14489 
14490 	fmov.l		%fpsr,%d1		# save status
14491 	fmov.l		&0x0,%fpcr		# clear FPCR
14492 
14493 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14494 
14495 	fabs.x		%fp0,%fp1		# make a copy of result
14496 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
14497 	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
14498 	fblt.w		fsgldiv_unfl		# yes; underflow occurred
14499 
14500 #
14501 # we still don't know if underflow occurred. result is ~ equal to 1. but,
14502 # we don't know if the result was an underflow that rounded up to a 1
14503 # or a normalized number that rounded down to a 1. so, redo the entire
14504 # operation using RZ as the rounding mode to see what the pre-rounded
14505 # result is. this case should be relatively rare.
14506 #
14507 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
14508 
14509 	clr.l		%d1			# clear scratch register
14510 	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
14511 
14512 	fmov.l		%d1,%fpcr		# set FPCR
14513 	fmov.l		&0x0,%fpsr		# clear FPSR
14514 
14515 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14516 
14517 	fmov.l		&0x0,%fpcr		# clear FPCR
14518 	fabs.x		%fp1			# make absolute value
14519 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
14520 	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
14521 	bra.w		fsgldiv_unfl		# yes; underflow occurred
14522 
14523 ############################################################################
14524 
14525 #
14526 # Divide: inputs are not both normalized; what are they?
14527 #
14528 fsgldiv_not_norm:
14529 	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14530 	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
14531 
14532 	swbeg		&48
14533 tbl_fsgldiv_op:
14534 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
14535 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
14536 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
14537 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
14538 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
14539 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
14540 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14541 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14542 
14543 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
14544 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
14545 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
14546 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
14547 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
14548 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
14549 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14550 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14551 
14552 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
14553 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
14554 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
14555 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
14556 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
14557 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
14558 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14559 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14560 
14561 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
14562 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
14563 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
14564 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
14565 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
14566 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
14567 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14568 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14569 
14570 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
14571 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
14572 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
14573 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
14574 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
14575 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
14576 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14577 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14578 
14579 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
14580 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
14581 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
14582 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
14583 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
14584 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
14585 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14586 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14587 
14588 fsgldiv_res_qnan:
14589 	bra.l		res_qnan
14590 fsgldiv_res_snan:
14591 	bra.l		res_snan
14592 fsgldiv_res_operr:
14593 	bra.l		res_operr
14594 fsgldiv_inf_load:
14595 	bra.l		fdiv_inf_load
14596 fsgldiv_zero_load:
14597 	bra.l		fdiv_zero_load
14598 fsgldiv_inf_dst:
14599 	bra.l		fdiv_inf_dst
14600 
14601 #########################################################################
14602 # XDEF ****************************************************************	#
14603 #	fadd(): emulates the fadd instruction				#
14604 #	fsadd(): emulates the fadd instruction				#
14605 #	fdadd(): emulates the fdadd instruction				#
14606 #									#
14607 # XREF ****************************************************************	#
14608 #	addsub_scaler2() - scale the operands so they won't take exc	#
14609 #	ovf_res() - return default overflow result			#
14610 #	unf_res() - return default underflow result			#
14611 #	res_qnan() - set QNAN result					#
14612 #	res_snan() - set SNAN result					#
14613 #	res_operr() - set OPERR result					#
14614 #	scale_to_zero_src() - set src operand exponent equal to zero	#
14615 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
14616 #									#
14617 # INPUT ***************************************************************	#
14618 #	a0 = pointer to extended precision source operand		#
14619 #	a1 = pointer to extended precision destination operand		#
14620 #									#
14621 # OUTPUT **************************************************************	#
14622 #	fp0 = result							#
14623 #	fp1 = EXOP (if exception occurred)				#
14624 #									#
14625 # ALGORITHM ***********************************************************	#
14626 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
14627 # norms into extended, single, and double precision.			#
14628 #	Do addition after scaling exponents such that exception won't	#
14629 # occur. Then, check result exponent to see if exception would have	#
14630 # occurred. If so, return default result and maybe EXOP. Else, insert	#
14631 # the correct result exponent and return. Set FPSR bits as appropriate.	#
14632 #									#
14633 #########################################################################
14634 
14635 	global		fsadd
14636 fsadd:
14637 	andi.b		&0x30,%d0		# clear rnd prec
14638 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
14639 	bra.b		fadd
14640 
14641 	global		fdadd
14642 fdadd:
14643 	andi.b		&0x30,%d0		# clear rnd prec
14644 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
14645 
14646 	global		fadd
14647 fadd:
14648 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14649 
14650 	clr.w		%d1
14651 	mov.b		DTAG(%a6),%d1
14652 	lsl.b		&0x3,%d1
14653 	or.b		STAG(%a6),%d1		# combine src tags
14654 
14655 	bne.w		fadd_not_norm		# optimize on non-norm input
14656 
14657 #
14658 # ADD: norms and denorms
14659 #
14660 fadd_norm:
14661 	bsr.l		addsub_scaler2		# scale exponents
14662 
14663 fadd_zero_entry:
14664 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14665 
14666 	fmov.l		&0x0,%fpsr		# clear FPSR
14667 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14668 
14669 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14670 
14671 	fmov.l		&0x0,%fpcr		# clear FPCR
14672 	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
14673 
14674 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
14675 
14676 	fbeq.w		fadd_zero_exit		# if result is zero, end now
14677 
14678 	mov.l		%d2,-(%sp)		# save d2
14679 
14680 	fmovm.x		&0x01,-(%sp)		# save result to stack
14681 
14682 	mov.w		2+L_SCR3(%a6),%d1
14683 	lsr.b		&0x6,%d1
14684 
14685 	mov.w		(%sp),%d2		# fetch new sign, exp
14686 	andi.l		&0x7fff,%d2		# strip sign
14687 	sub.l		%d0,%d2			# add scale factor
14688 
14689 	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14690 	bge.b		fadd_ovfl		# yes
14691 
14692 	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14693 	blt.w		fadd_unfl		# yes
14694 	beq.w		fadd_may_unfl		# maybe; go find out
14695 
14696 fadd_normal:
14697 	mov.w		(%sp),%d1
14698 	andi.w		&0x8000,%d1		# keep sign
14699 	or.w		%d2,%d1			# concat sign,new exp
14700 	mov.w		%d1,(%sp)		# insert new exponent
14701 
14702 	fmovm.x		(%sp)+,&0x80		# return result in fp0
14703 
14704 	mov.l		(%sp)+,%d2		# restore d2
14705 	rts
14706 
14707 fadd_zero_exit:
14708 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
14709 	rts
14710 
14711 tbl_fadd_ovfl:
14712 	long		0x7fff			# ext ovfl
14713 	long		0x407f			# sgl ovfl
14714 	long		0x43ff			# dbl ovfl
14715 
14716 tbl_fadd_unfl:
14717 	long	        0x0000			# ext unfl
14718 	long		0x3f81			# sgl unfl
14719 	long		0x3c01			# dbl unfl
14720 
14721 fadd_ovfl:
14722 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14723 
14724 	mov.b		FPCR_ENABLE(%a6),%d1
14725 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14726 	bne.b		fadd_ovfl_ena		# yes
14727 
14728 	add.l		&0xc,%sp
14729 fadd_ovfl_dis:
14730 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14731 	sne		%d1			# set sign param accordingly
14732 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14733 	bsr.l		ovf_res			# calculate default result
14734 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14735 	fmovm.x		(%a0),&0x80		# return default result in fp0
14736 	mov.l		(%sp)+,%d2		# restore d2
14737 	rts
14738 
14739 fadd_ovfl_ena:
14740 	mov.b		L_SCR3(%a6),%d1
14741 	andi.b		&0xc0,%d1		# is precision extended?
14742 	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
14743 
14744 fadd_ovfl_ena_cont:
14745 	mov.w		(%sp),%d1
14746 	andi.w		&0x8000,%d1		# keep sign
14747 	subi.l		&0x6000,%d2		# add extra bias
14748 	andi.w		&0x7fff,%d2
14749 	or.w		%d2,%d1			# concat sign,new exp
14750 	mov.w		%d1,(%sp)		# insert new exponent
14751 
14752 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
14753 	bra.b		fadd_ovfl_dis
14754 
14755 fadd_ovfl_ena_sd:
14756 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14757 
14758 	mov.l		L_SCR3(%a6),%d1
14759 	andi.b		&0x30,%d1		# keep rnd mode
14760 	fmov.l		%d1,%fpcr		# set FPCR
14761 
14762 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14763 
14764 	fmov.l		&0x0,%fpcr		# clear FPCR
14765 
14766 	add.l		&0xc,%sp
14767 	fmovm.x		&0x01,-(%sp)
14768 	bra.b		fadd_ovfl_ena_cont
14769 
14770 fadd_unfl:
14771 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14772 
14773 	add.l		&0xc,%sp
14774 
14775 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14776 
14777 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14778 	fmov.l		&0x0,%fpsr		# clear FPSR
14779 
14780 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14781 
14782 	fmov.l		&0x0,%fpcr		# clear FPCR
14783 	fmov.l		%fpsr,%d1		# save status
14784 
14785 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14786 
14787 	mov.b		FPCR_ENABLE(%a6),%d1
14788 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14789 	bne.b		fadd_unfl_ena		# yes
14790 
14791 fadd_unfl_dis:
14792 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14793 
14794 	lea		FP_SCR0(%a6),%a0	# pass: result addr
14795 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14796 	bsr.l		unf_res			# calculate default result
14797 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14798 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14799 	mov.l		(%sp)+,%d2		# restore d2
14800 	rts
14801 
14802 fadd_unfl_ena:
14803 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14804 
14805 	mov.l		L_SCR3(%a6),%d1
14806 	andi.b		&0xc0,%d1		# is precision extended?
14807 	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
14808 
14809 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14810 
14811 fadd_unfl_ena_cont:
14812 	fmov.l		&0x0,%fpsr		# clear FPSR
14813 
14814 	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
14815 
14816 	fmov.l		&0x0,%fpcr		# clear FPCR
14817 
14818 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14819 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14820 	mov.l		%d1,%d2			# make a copy
14821 	andi.l		&0x7fff,%d1		# strip sign
14822 	andi.w		&0x8000,%d2		# keep old sign
14823 	sub.l		%d0,%d1			# add scale factor
14824 	addi.l		&0x6000,%d1		# add new bias
14825 	andi.w		&0x7fff,%d1		# clear top bit
14826 	or.w		%d2,%d1			# concat sign,new exp
14827 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14828 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14829 	bra.w		fadd_unfl_dis
14830 
14831 fadd_unfl_ena_sd:
14832 	mov.l		L_SCR3(%a6),%d1
14833 	andi.b		&0x30,%d1		# use only rnd mode
14834 	fmov.l		%d1,%fpcr		# set FPCR
14835 
14836 	bra.b		fadd_unfl_ena_cont
14837 
14838 #
14839 # result is equal to the smallest normalized number in the selected precision
14840 # if the precision is extended, this result could not have come from an
14841 # underflow that rounded up.
14842 #
14843 fadd_may_unfl:
14844 	mov.l		L_SCR3(%a6),%d1
14845 	andi.b		&0xc0,%d1
14846 	beq.w		fadd_normal		# yes; no underflow occurred
14847 
14848 	mov.l		0x4(%sp),%d1		# extract hi(man)
14849 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
14850 	bne.w		fadd_normal		# no; no underflow occurred
14851 
14852 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
14853 	bne.w		fadd_normal		# no; no underflow occurred
14854 
14855 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14856 	beq.w		fadd_normal		# no; no underflow occurred
14857 
14858 #
14859 # ok, so now the result has a exponent equal to the smallest normalized
14860 # exponent for the selected precision. also, the mantissa is equal to
14861 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
14862 # g,r,s.
14863 # now, we must determine whether the pre-rounded result was an underflow
14864 # rounded "up" or a normalized number rounded "down".
14865 # so, we do this be re-executing the add using RZ as the rounding mode and
14866 # seeing if the new result is smaller or equal to the current result.
14867 #
14868 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14869 
14870 	mov.l		L_SCR3(%a6),%d1
14871 	andi.b		&0xc0,%d1		# keep rnd prec
14872 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
14873 	fmov.l		%d1,%fpcr		# set FPCR
14874 	fmov.l		&0x0,%fpsr		# clear FPSR
14875 
14876 	fadd.x		FP_SCR0(%a6),%fp1	# execute add
14877 
14878 	fmov.l		&0x0,%fpcr		# clear FPCR
14879 
14880 	fabs.x		%fp0			# compare absolute values
14881 	fabs.x		%fp1
14882 	fcmp.x		%fp0,%fp1		# is first result > second?
14883 
14884 	fbgt.w		fadd_unfl		# yes; it's an underflow
14885 	bra.w		fadd_normal		# no; it's not an underflow
14886 
14887 ##########################################################################
14888 
14889 #
14890 # Add: inputs are not both normalized; what are they?
14891 #
14892 fadd_not_norm:
14893 	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
14894 	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
14895 
14896 	swbeg		&48
14897 tbl_fadd_op:
14898 	short		fadd_norm	- tbl_fadd_op # NORM + NORM
14899 	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
14900 	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
14901 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14902 	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
14903 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14904 	short		tbl_fadd_op	- tbl_fadd_op #
14905 	short		tbl_fadd_op	- tbl_fadd_op #
14906 
14907 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
14908 	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
14909 	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
14910 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14911 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
14912 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14913 	short		tbl_fadd_op	- tbl_fadd_op #
14914 	short		tbl_fadd_op	- tbl_fadd_op #
14915 
14916 	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
14917 	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
14918 	short		fadd_inf_2	- tbl_fadd_op # INF + INF
14919 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14920 	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
14921 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14922 	short		tbl_fadd_op	- tbl_fadd_op #
14923 	short		tbl_fadd_op	- tbl_fadd_op #
14924 
14925 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
14926 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
14927 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
14928 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
14929 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
14930 	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
14931 	short		tbl_fadd_op	- tbl_fadd_op #
14932 	short		tbl_fadd_op	- tbl_fadd_op #
14933 
14934 	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
14935 	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
14936 	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
14937 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14938 	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
14939 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14940 	short		tbl_fadd_op	- tbl_fadd_op #
14941 	short		tbl_fadd_op	- tbl_fadd_op #
14942 
14943 	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
14944 	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
14945 	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
14946 	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
14947 	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
14948 	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
14949 	short		tbl_fadd_op	- tbl_fadd_op #
14950 	short		tbl_fadd_op	- tbl_fadd_op #
14951 
14952 fadd_res_qnan:
14953 	bra.l		res_qnan
14954 fadd_res_snan:
14955 	bra.l		res_snan
14956 
14957 #
14958 # both operands are ZEROes
14959 #
14960 fadd_zero_2:
14961 	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
14962 	mov.b		DST_EX(%a1),%d1
14963 	eor.b		%d0,%d1
14964 	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
14965 
14966 # the signs are the same. so determine whether they are positive or negative
14967 # and return the appropriately signed zero.
14968 	tst.b		%d0			# are ZEROes positive or negative?
14969 	bmi.b		fadd_zero_rm		# negative
14970 	fmov.s		&0x00000000,%fp0	# return +ZERO
14971 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14972 	rts
14973 
14974 #
14975 # the ZEROes have opposite signs:
14976 # - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14977 # - -ZERO is returned in the case of RM.
14978 #
14979 fadd_zero_2_chk_rm:
14980 	mov.b		3+L_SCR3(%a6),%d1
14981 	andi.b		&0x30,%d1		# extract rnd mode
14982 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
14983 	beq.b		fadd_zero_rm		# yes
14984 	fmov.s		&0x00000000,%fp0	# return +ZERO
14985 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14986 	rts
14987 
14988 fadd_zero_rm:
14989 	fmov.s		&0x80000000,%fp0	# return -ZERO
14990 	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
14991 	rts
14992 
14993 #
14994 # one operand is a ZERO and the other is a DENORM or NORM. scale
14995 # the DENORM or NORM and jump to the regular fadd routine.
14996 #
14997 fadd_zero_dst:
14998 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14999 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15000 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15001 	bsr.l		scale_to_zero_src	# scale the operand
15002 	clr.w		FP_SCR1_EX(%a6)
15003 	clr.l		FP_SCR1_HI(%a6)
15004 	clr.l		FP_SCR1_LO(%a6)
15005 	bra.w		fadd_zero_entry		# go execute fadd
15006 
15007 fadd_zero_src:
15008 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15009 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15010 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15011 	bsr.l		scale_to_zero_dst	# scale the operand
15012 	clr.w		FP_SCR0_EX(%a6)
15013 	clr.l		FP_SCR0_HI(%a6)
15014 	clr.l		FP_SCR0_LO(%a6)
15015 	bra.w		fadd_zero_entry		# go execute fadd
15016 
15017 #
15018 # both operands are INFs. an OPERR will result if the INFs have
15019 # different signs. else, an INF of the same sign is returned
15020 #
15021 fadd_inf_2:
15022 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15023 	mov.b		DST_EX(%a1),%d1
15024 	eor.b		%d1,%d0
15025 	bmi.l		res_operr		# weed out (-INF)+(+INF)
15026 
15027 # ok, so it's not an OPERR. but, we do have to remember to return the
15028 # src INF since that's where the 881/882 gets the j-bit from...
15029 
15030 #
15031 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15032 #
15033 fadd_inf_src:
15034 	fmovm.x		SRC(%a0),&0x80		# return src INF
15035 	tst.b		SRC_EX(%a0)		# is INF positive?
15036 	bpl.b		fadd_inf_done		# yes; we're done
15037 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15038 	rts
15039 
15040 #
15041 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15042 #
15043 fadd_inf_dst:
15044 	fmovm.x		DST(%a1),&0x80		# return dst INF
15045 	tst.b		DST_EX(%a1)		# is INF positive?
15046 	bpl.b		fadd_inf_done		# yes; we're done
15047 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15048 	rts
15049 
15050 fadd_inf_done:
15051 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
15052 	rts
15053 
15054 #########################################################################
15055 # XDEF ****************************************************************	#
15056 #	fsub(): emulates the fsub instruction				#
15057 #	fssub(): emulates the fssub instruction				#
15058 #	fdsub(): emulates the fdsub instruction				#
15059 #									#
15060 # XREF ****************************************************************	#
15061 #	addsub_scaler2() - scale the operands so they won't take exc	#
15062 #	ovf_res() - return default overflow result			#
15063 #	unf_res() - return default underflow result			#
15064 #	res_qnan() - set QNAN result					#
15065 #	res_snan() - set SNAN result					#
15066 #	res_operr() - set OPERR result					#
15067 #	scale_to_zero_src() - set src operand exponent equal to zero	#
15068 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
15069 #									#
15070 # INPUT ***************************************************************	#
15071 #	a0 = pointer to extended precision source operand		#
15072 #	a1 = pointer to extended precision destination operand		#
15073 #									#
15074 # OUTPUT **************************************************************	#
15075 #	fp0 = result							#
15076 #	fp1 = EXOP (if exception occurred)				#
15077 #									#
15078 # ALGORITHM ***********************************************************	#
15079 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
15080 # norms into extended, single, and double precision.			#
15081 #	Do subtraction after scaling exponents such that exception won't#
15082 # occur. Then, check result exponent to see if exception would have	#
15083 # occurred. If so, return default result and maybe EXOP. Else, insert	#
15084 # the correct result exponent and return. Set FPSR bits as appropriate.	#
15085 #									#
15086 #########################################################################
15087 
15088 	global		fssub
15089 fssub:
15090 	andi.b		&0x30,%d0		# clear rnd prec
15091 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
15092 	bra.b		fsub
15093 
15094 	global		fdsub
15095 fdsub:
15096 	andi.b		&0x30,%d0		# clear rnd prec
15097 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
15098 
15099 	global		fsub
15100 fsub:
15101 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15102 
15103 	clr.w		%d1
15104 	mov.b		DTAG(%a6),%d1
15105 	lsl.b		&0x3,%d1
15106 	or.b		STAG(%a6),%d1		# combine src tags
15107 
15108 	bne.w		fsub_not_norm		# optimize on non-norm input
15109 
15110 #
15111 # SUB: norms and denorms
15112 #
15113 fsub_norm:
15114 	bsr.l		addsub_scaler2		# scale exponents
15115 
15116 fsub_zero_entry:
15117 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15118 
15119 	fmov.l		&0x0,%fpsr		# clear FPSR
15120 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15121 
15122 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15123 
15124 	fmov.l		&0x0,%fpcr		# clear FPCR
15125 	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
15126 
15127 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
15128 
15129 	fbeq.w		fsub_zero_exit		# if result zero, end now
15130 
15131 	mov.l		%d2,-(%sp)		# save d2
15132 
15133 	fmovm.x		&0x01,-(%sp)		# save result to stack
15134 
15135 	mov.w		2+L_SCR3(%a6),%d1
15136 	lsr.b		&0x6,%d1
15137 
15138 	mov.w		(%sp),%d2		# fetch new exponent
15139 	andi.l		&0x7fff,%d2		# strip sign
15140 	sub.l		%d0,%d2			# add scale factor
15141 
15142 	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15143 	bge.b		fsub_ovfl		# yes
15144 
15145 	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15146 	blt.w		fsub_unfl		# yes
15147 	beq.w		fsub_may_unfl		# maybe; go find out
15148 
15149 fsub_normal:
15150 	mov.w		(%sp),%d1
15151 	andi.w		&0x8000,%d1		# keep sign
15152 	or.w		%d2,%d1			# insert new exponent
15153 	mov.w		%d1,(%sp)		# insert new exponent
15154 
15155 	fmovm.x		(%sp)+,&0x80		# return result in fp0
15156 
15157 	mov.l		(%sp)+,%d2		# restore d2
15158 	rts
15159 
15160 fsub_zero_exit:
15161 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
15162 	rts
15163 
15164 tbl_fsub_ovfl:
15165 	long		0x7fff			# ext ovfl
15166 	long		0x407f			# sgl ovfl
15167 	long		0x43ff			# dbl ovfl
15168 
15169 tbl_fsub_unfl:
15170 	long	        0x0000			# ext unfl
15171 	long		0x3f81			# sgl unfl
15172 	long		0x3c01			# dbl unfl
15173 
15174 fsub_ovfl:
15175 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15176 
15177 	mov.b		FPCR_ENABLE(%a6),%d1
15178 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15179 	bne.b		fsub_ovfl_ena		# yes
15180 
15181 	add.l		&0xc,%sp
15182 fsub_ovfl_dis:
15183 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15184 	sne		%d1			# set sign param accordingly
15185 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
15186 	bsr.l		ovf_res			# calculate default result
15187 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15188 	fmovm.x		(%a0),&0x80		# return default result in fp0
15189 	mov.l		(%sp)+,%d2		# restore d2
15190 	rts
15191 
15192 fsub_ovfl_ena:
15193 	mov.b		L_SCR3(%a6),%d1
15194 	andi.b		&0xc0,%d1		# is precision extended?
15195 	bne.b		fsub_ovfl_ena_sd	# no
15196 
15197 fsub_ovfl_ena_cont:
15198 	mov.w		(%sp),%d1		# fetch {sgn,exp}
15199 	andi.w		&0x8000,%d1		# keep sign
15200 	subi.l		&0x6000,%d2		# subtract new bias
15201 	andi.w		&0x7fff,%d2		# clear top bit
15202 	or.w		%d2,%d1			# concat sign,exp
15203 	mov.w		%d1,(%sp)		# insert new exponent
15204 
15205 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
15206 	bra.b		fsub_ovfl_dis
15207 
15208 fsub_ovfl_ena_sd:
15209 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15210 
15211 	mov.l		L_SCR3(%a6),%d1
15212 	andi.b		&0x30,%d1		# clear rnd prec
15213 	fmov.l		%d1,%fpcr		# set FPCR
15214 
15215 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15216 
15217 	fmov.l		&0x0,%fpcr		# clear FPCR
15218 
15219 	add.l		&0xc,%sp
15220 	fmovm.x		&0x01,-(%sp)
15221 	bra.b		fsub_ovfl_ena_cont
15222 
15223 fsub_unfl:
15224 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15225 
15226 	add.l		&0xc,%sp
15227 
15228 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15229 
15230 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15231 	fmov.l		&0x0,%fpsr		# clear FPSR
15232 
15233 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15234 
15235 	fmov.l		&0x0,%fpcr		# clear FPCR
15236 	fmov.l		%fpsr,%d1		# save status
15237 
15238 	or.l		%d1,USER_FPSR(%a6)
15239 
15240 	mov.b		FPCR_ENABLE(%a6),%d1
15241 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15242 	bne.b		fsub_unfl_ena		# yes
15243 
15244 fsub_unfl_dis:
15245 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15246 
15247 	lea		FP_SCR0(%a6),%a0	# pass: result addr
15248 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15249 	bsr.l		unf_res			# calculate default result
15250 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
15251 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15252 	mov.l		(%sp)+,%d2		# restore d2
15253 	rts
15254 
15255 fsub_unfl_ena:
15256 	fmovm.x		FP_SCR1(%a6),&0x40
15257 
15258 	mov.l		L_SCR3(%a6),%d1
15259 	andi.b		&0xc0,%d1		# is precision extended?
15260 	bne.b		fsub_unfl_ena_sd	# no
15261 
15262 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15263 
15264 fsub_unfl_ena_cont:
15265 	fmov.l		&0x0,%fpsr		# clear FPSR
15266 
15267 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15268 
15269 	fmov.l		&0x0,%fpcr		# clear FPCR
15270 
15271 	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
15272 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15273 	mov.l		%d1,%d2			# make a copy
15274 	andi.l		&0x7fff,%d1		# strip sign
15275 	andi.w		&0x8000,%d2		# keep old sign
15276 	sub.l		%d0,%d1			# add scale factor
15277 	addi.l		&0x6000,%d1		# subtract new bias
15278 	andi.w		&0x7fff,%d1		# clear top bit
15279 	or.w		%d2,%d1			# concat sgn,exp
15280 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15281 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15282 	bra.w		fsub_unfl_dis
15283 
15284 fsub_unfl_ena_sd:
15285 	mov.l		L_SCR3(%a6),%d1
15286 	andi.b		&0x30,%d1		# clear rnd prec
15287 	fmov.l		%d1,%fpcr		# set FPCR
15288 
15289 	bra.b		fsub_unfl_ena_cont
15290 
15291 #
15292 # result is equal to the smallest normalized number in the selected precision
15293 # if the precision is extended, this result could not have come from an
15294 # underflow that rounded up.
15295 #
15296 fsub_may_unfl:
15297 	mov.l		L_SCR3(%a6),%d1
15298 	andi.b		&0xc0,%d1		# fetch rnd prec
15299 	beq.w		fsub_normal		# yes; no underflow occurred
15300 
15301 	mov.l		0x4(%sp),%d1
15302 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
15303 	bne.w		fsub_normal		# no; no underflow occurred
15304 
15305 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
15306 	bne.w		fsub_normal		# no; no underflow occurred
15307 
15308 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15309 	beq.w		fsub_normal		# no; no underflow occurred
15310 
15311 #
15312 # ok, so now the result has a exponent equal to the smallest normalized
15313 # exponent for the selected precision. also, the mantissa is equal to
15314 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
15315 # g,r,s.
15316 # now, we must determine whether the pre-rounded result was an underflow
15317 # rounded "up" or a normalized number rounded "down".
15318 # so, we do this be re-executing the add using RZ as the rounding mode and
15319 # seeing if the new result is smaller or equal to the current result.
15320 #
15321 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
15322 
15323 	mov.l		L_SCR3(%a6),%d1
15324 	andi.b		&0xc0,%d1		# keep rnd prec
15325 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
15326 	fmov.l		%d1,%fpcr		# set FPCR
15327 	fmov.l		&0x0,%fpsr		# clear FPSR
15328 
15329 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15330 
15331 	fmov.l		&0x0,%fpcr		# clear FPCR
15332 
15333 	fabs.x		%fp0			# compare absolute values
15334 	fabs.x		%fp1
15335 	fcmp.x		%fp0,%fp1		# is first result > second?
15336 
15337 	fbgt.w		fsub_unfl		# yes; it's an underflow
15338 	bra.w		fsub_normal		# no; it's not an underflow
15339 
15340 ##########################################################################
15341 
15342 #
15343 # Sub: inputs are not both normalized; what are they?
15344 #
15345 fsub_not_norm:
15346 	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
15347 	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
15348 
15349 	swbeg		&48
15350 tbl_fsub_op:
15351 	short		fsub_norm	- tbl_fsub_op # NORM - NORM
15352 	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
15353 	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
15354 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15355 	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
15356 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15357 	short		tbl_fsub_op	- tbl_fsub_op #
15358 	short		tbl_fsub_op	- tbl_fsub_op #
15359 
15360 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
15361 	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
15362 	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
15363 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15364 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
15365 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15366 	short		tbl_fsub_op	- tbl_fsub_op #
15367 	short		tbl_fsub_op	- tbl_fsub_op #
15368 
15369 	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
15370 	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
15371 	short		fsub_inf_2	- tbl_fsub_op # INF - INF
15372 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15373 	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
15374 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15375 	short		tbl_fsub_op	- tbl_fsub_op #
15376 	short		tbl_fsub_op	- tbl_fsub_op #
15377 
15378 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
15379 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
15380 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
15381 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
15382 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
15383 	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
15384 	short		tbl_fsub_op	- tbl_fsub_op #
15385 	short		tbl_fsub_op	- tbl_fsub_op #
15386 
15387 	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
15388 	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
15389 	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
15390 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15391 	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
15392 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15393 	short		tbl_fsub_op	- tbl_fsub_op #
15394 	short		tbl_fsub_op	- tbl_fsub_op #
15395 
15396 	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
15397 	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
15398 	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
15399 	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
15400 	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
15401 	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
15402 	short		tbl_fsub_op	- tbl_fsub_op #
15403 	short		tbl_fsub_op	- tbl_fsub_op #
15404 
15405 fsub_res_qnan:
15406 	bra.l		res_qnan
15407 fsub_res_snan:
15408 	bra.l		res_snan
15409 
15410 #
15411 # both operands are ZEROes
15412 #
15413 fsub_zero_2:
15414 	mov.b		SRC_EX(%a0),%d0
15415 	mov.b		DST_EX(%a1),%d1
15416 	eor.b		%d1,%d0
15417 	bpl.b		fsub_zero_2_chk_rm
15418 
15419 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15420 	tst.b		%d0			# is dst negative?
15421 	bmi.b		fsub_zero_2_rm		# yes
15422 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15423 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15424 	rts
15425 
15426 #
15427 # the ZEROes have the same signs:
15428 # - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15429 # - -ZERO is returned in the case of RM.
15430 #
15431 fsub_zero_2_chk_rm:
15432 	mov.b		3+L_SCR3(%a6),%d1
15433 	andi.b		&0x30,%d1		# extract rnd mode
15434 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
15435 	beq.b		fsub_zero_2_rm		# yes
15436 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15437 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15438 	rts
15439 
15440 fsub_zero_2_rm:
15441 	fmov.s		&0x80000000,%fp0	# return -ZERO
15442 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
15443 	rts
15444 
15445 #
15446 # one operand is a ZERO and the other is a DENORM or a NORM.
15447 # scale the DENORM or NORM and jump to the regular fsub routine.
15448 #
15449 fsub_zero_dst:
15450 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15451 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15452 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15453 	bsr.l		scale_to_zero_src	# scale the operand
15454 	clr.w		FP_SCR1_EX(%a6)
15455 	clr.l		FP_SCR1_HI(%a6)
15456 	clr.l		FP_SCR1_LO(%a6)
15457 	bra.w		fsub_zero_entry		# go execute fsub
15458 
15459 fsub_zero_src:
15460 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15461 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15462 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15463 	bsr.l		scale_to_zero_dst	# scale the operand
15464 	clr.w		FP_SCR0_EX(%a6)
15465 	clr.l		FP_SCR0_HI(%a6)
15466 	clr.l		FP_SCR0_LO(%a6)
15467 	bra.w		fsub_zero_entry		# go execute fsub
15468 
15469 #
15470 # both operands are INFs. an OPERR will result if the INFs have the
15471 # same signs. else,
15472 #
15473 fsub_inf_2:
15474 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15475 	mov.b		DST_EX(%a1),%d1
15476 	eor.b		%d1,%d0
15477 	bpl.l		res_operr		# weed out (-INF)+(+INF)
15478 
15479 # ok, so it's not an OPERR. but we do have to remember to return
15480 # the src INF since that's where the 881/882 gets the j-bit.
15481 
15482 fsub_inf_src:
15483 	fmovm.x		SRC(%a0),&0x80		# return src INF
15484 	fneg.x		%fp0			# invert sign
15485 	fbge.w		fsub_inf_done		# sign is now positive
15486 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15487 	rts
15488 
15489 fsub_inf_dst:
15490 	fmovm.x		DST(%a1),&0x80		# return dst INF
15491 	tst.b		DST_EX(%a1)		# is INF negative?
15492 	bpl.b		fsub_inf_done		# no
15493 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15494 	rts
15495 
15496 fsub_inf_done:
15497 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
15498 	rts
15499 
15500 #########################################################################
15501 # XDEF ****************************************************************	#
15502 #	fsqrt(): emulates the fsqrt instruction				#
15503 #	fssqrt(): emulates the fssqrt instruction			#
15504 #	fdsqrt(): emulates the fdsqrt instruction			#
15505 #									#
15506 # XREF ****************************************************************	#
15507 #	scale_sqrt() - scale the source operand				#
15508 #	unf_res() - return default underflow result			#
15509 #	ovf_res() - return default overflow result			#
15510 #	res_qnan_1op() - return QNAN result				#
15511 #	res_snan_1op() - return SNAN result				#
15512 #									#
15513 # INPUT ***************************************************************	#
15514 #	a0 = pointer to extended precision source operand		#
15515 #	d0  rnd prec,mode						#
15516 #									#
15517 # OUTPUT **************************************************************	#
15518 #	fp0 = result							#
15519 #	fp1 = EXOP (if exception occurred)				#
15520 #									#
15521 # ALGORITHM ***********************************************************	#
15522 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
15523 # norms/denorms into ext/sgl/dbl precision.				#
15524 #	For norms/denorms, scale the exponents such that a sqrt		#
15525 # instruction won't cause an exception. Use the regular fsqrt to	#
15526 # compute a result. Check if the regular operands would have taken	#
15527 # an exception. If so, return the default overflow/underflow result	#
15528 # and return the EXOP if exceptions are enabled. Else, scale the	#
15529 # result operand to the proper exponent.				#
15530 #									#
15531 #########################################################################
15532 
15533 	global		fssqrt
15534 fssqrt:
15535 	andi.b		&0x30,%d0		# clear rnd prec
15536 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
15537 	bra.b		fsqrt
15538 
15539 	global		fdsqrt
15540 fdsqrt:
15541 	andi.b		&0x30,%d0		# clear rnd prec
15542 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
15543 
15544 	global		fsqrt
15545 fsqrt:
15546 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15547 	clr.w		%d1
15548 	mov.b		STAG(%a6),%d1
15549 	bne.w		fsqrt_not_norm		# optimize on non-norm input
15550 
15551 #
15552 # SQUARE ROOT: norms and denorms ONLY!
15553 #
15554 fsqrt_norm:
15555 	tst.b		SRC_EX(%a0)		# is operand negative?
15556 	bmi.l		res_operr		# yes
15557 
15558 	andi.b		&0xc0,%d0		# is precision extended?
15559 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15560 
15561 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15562 	fmov.l		&0x0,%fpsr		# clear FPSR
15563 
15564 	fsqrt.x		(%a0),%fp0		# execute square root
15565 
15566 	fmov.l		%fpsr,%d1
15567 	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
15568 
15569 	rts
15570 
15571 fsqrt_denorm:
15572 	tst.b		SRC_EX(%a0)		# is operand negative?
15573 	bmi.l		res_operr		# yes
15574 
15575 	andi.b		&0xc0,%d0		# is precision extended?
15576 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15577 
15578 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15579 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15580 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15581 
15582 	bsr.l		scale_sqrt		# calculate scale factor
15583 
15584 	bra.w		fsqrt_sd_normal
15585 
15586 #
15587 # operand is either single or double
15588 #
15589 fsqrt_not_ext:
15590 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
15591 	bne.w		fsqrt_dbl
15592 
15593 #
15594 # operand is to be rounded to single precision
15595 #
15596 fsqrt_sgl:
15597 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15598 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15599 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15600 
15601 	bsr.l		scale_sqrt		# calculate scale factor
15602 
15603 	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
15604 	beq.w		fsqrt_sd_may_unfl
15605 	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
15606 	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
15607 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15608 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15609 
15610 #
15611 # operand will NOT overflow or underflow when moved in to the fp reg file
15612 #
15613 fsqrt_sd_normal:
15614 	fmov.l		&0x0,%fpsr		# clear FPSR
15615 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15616 
15617 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15618 
15619 	fmov.l		%fpsr,%d1		# save FPSR
15620 	fmov.l		&0x0,%fpcr		# clear FPCR
15621 
15622 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15623 
15624 fsqrt_sd_normal_exit:
15625 	mov.l		%d2,-(%sp)		# save d2
15626 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15627 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
15628 	mov.l		%d1,%d2			# make a copy
15629 	andi.l		&0x7fff,%d1		# strip sign
15630 	sub.l		%d0,%d1			# add scale factor
15631 	andi.w		&0x8000,%d2		# keep old sign
15632 	or.w		%d1,%d2			# concat old sign,new exp
15633 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
15634 	mov.l		(%sp)+,%d2		# restore d2
15635 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
15636 	rts
15637 
15638 #
15639 # operand is to be rounded to double precision
15640 #
15641 fsqrt_dbl:
15642 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15643 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15644 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15645 
15646 	bsr.l		scale_sqrt		# calculate scale factor
15647 
15648 	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
15649 	beq.w		fsqrt_sd_may_unfl
15650 	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
15651 	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
15652 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15653 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15654 	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
15655 
15656 # we're on the line here and the distinguising characteristic is whether
15657 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15658 # elsewise fall through to underflow.
15659 fsqrt_sd_may_unfl:
15660 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15661 	bne.w		fsqrt_sd_normal		# yes, so no underflow
15662 
15663 #
15664 # operand WILL underflow when moved in to the fp register file
15665 #
15666 fsqrt_sd_unfl:
15667 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15668 
15669 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15670 	fmov.l		&0x0,%fpsr		# clear FPSR
15671 
15672 	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
15673 
15674 	fmov.l		%fpsr,%d1		# save status
15675 	fmov.l		&0x0,%fpcr		# clear FPCR
15676 
15677 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15678 
15679 # if underflow or inexact is enabled, go calculate EXOP first.
15680 	mov.b		FPCR_ENABLE(%a6),%d1
15681 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15682 	bne.b		fsqrt_sd_unfl_ena	# yes
15683 
15684 fsqrt_sd_unfl_dis:
15685 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15686 
15687 	lea		FP_SCR0(%a6),%a0	# pass: result addr
15688 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15689 	bsr.l		unf_res			# calculate default result
15690 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
15691 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15692 	rts
15693 
15694 #
15695 # operand will underflow AND underflow is enabled.
15696 # Therefore, we must return the result rounded to extended precision.
15697 #
15698 fsqrt_sd_unfl_ena:
15699 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15700 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15701 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
15702 
15703 	mov.l		%d2,-(%sp)		# save d2
15704 	mov.l		%d1,%d2			# make a copy
15705 	andi.l		&0x7fff,%d1		# strip sign
15706 	andi.w		&0x8000,%d2		# keep old sign
15707 	sub.l		%d0,%d1			# subtract scale factor
15708 	addi.l		&0x6000,%d1		# add new bias
15709 	andi.w		&0x7fff,%d1
15710 	or.w		%d2,%d1			# concat new sign,new exp
15711 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
15712 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
15713 	mov.l		(%sp)+,%d2		# restore d2
15714 	bra.b		fsqrt_sd_unfl_dis
15715 
15716 #
15717 # operand WILL overflow.
15718 #
15719 fsqrt_sd_ovfl:
15720 	fmov.l		&0x0,%fpsr		# clear FPSR
15721 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15722 
15723 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
15724 
15725 	fmov.l		&0x0,%fpcr		# clear FPCR
15726 	fmov.l		%fpsr,%d1		# save FPSR
15727 
15728 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15729 
15730 fsqrt_sd_ovfl_tst:
15731 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15732 
15733 	mov.b		FPCR_ENABLE(%a6),%d1
15734 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15735 	bne.b		fsqrt_sd_ovfl_ena	# yes
15736 
15737 #
15738 # OVFL is not enabled; therefore, we must create the default result by
15739 # calling ovf_res().
15740 #
15741 fsqrt_sd_ovfl_dis:
15742 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15743 	sne		%d1			# set sign param accordingly
15744 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
15745 	bsr.l		ovf_res			# calculate default result
15746 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15747 	fmovm.x		(%a0),&0x80		# return default result in fp0
15748 	rts
15749 
15750 #
15751 # OVFL is enabled.
15752 # the INEX2 bit has already been updated by the round to the correct precision.
15753 # now, round to extended(and don't alter the FPSR).
15754 #
15755 fsqrt_sd_ovfl_ena:
15756 	mov.l		%d2,-(%sp)		# save d2
15757 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15758 	mov.l		%d1,%d2			# make a copy
15759 	andi.l		&0x7fff,%d1		# strip sign
15760 	andi.w		&0x8000,%d2		# keep old sign
15761 	sub.l		%d0,%d1			# add scale factor
15762 	subi.l		&0x6000,%d1		# subtract bias
15763 	andi.w		&0x7fff,%d1
15764 	or.w		%d2,%d1			# concat sign,exp
15765 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15766 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15767 	mov.l		(%sp)+,%d2		# restore d2
15768 	bra.b		fsqrt_sd_ovfl_dis
15769 
15770 #
15771 # the move in MAY underflow. so...
15772 #
15773 fsqrt_sd_may_ovfl:
15774 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15775 	bne.w		fsqrt_sd_ovfl		# yes, so overflow
15776 
15777 	fmov.l		&0x0,%fpsr		# clear FPSR
15778 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15779 
15780 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15781 
15782 	fmov.l		%fpsr,%d1		# save status
15783 	fmov.l		&0x0,%fpcr		# clear FPCR
15784 
15785 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15786 
15787 	fmov.x		%fp0,%fp1		# make a copy of result
15788 	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
15789 	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
15790 
15791 # no, it didn't overflow; we have correct result
15792 	bra.w		fsqrt_sd_normal_exit
15793 
15794 ##########################################################################
15795 
15796 #
15797 # input is not normalized; what is it?
15798 #
15799 fsqrt_not_norm:
15800 	cmpi.b		%d1,&DENORM		# weed out DENORM
15801 	beq.w		fsqrt_denorm
15802 	cmpi.b		%d1,&ZERO		# weed out ZERO
15803 	beq.b		fsqrt_zero
15804 	cmpi.b		%d1,&INF		# weed out INF
15805 	beq.b		fsqrt_inf
15806 	cmpi.b		%d1,&SNAN		# weed out SNAN
15807 	beq.l		res_snan_1op
15808 	bra.l		res_qnan_1op
15809 
15810 #
15811 #	fsqrt(+0) = +0
15812 #	fsqrt(-0) = -0
15813 #	fsqrt(+INF) = +INF
15814 #	fsqrt(-INF) = OPERR
15815 #
15816 fsqrt_zero:
15817 	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
15818 	bmi.b		fsqrt_zero_m		# negative
15819 fsqrt_zero_p:
15820 	fmov.s		&0x00000000,%fp0	# return +ZERO
15821 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
15822 	rts
15823 fsqrt_zero_m:
15824 	fmov.s		&0x80000000,%fp0	# return -ZERO
15825 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
15826 	rts
15827 
15828 fsqrt_inf:
15829 	tst.b		SRC_EX(%a0)		# is INF positive or negative?
15830 	bmi.l		res_operr		# negative
15831 fsqrt_inf_p:
15832 	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
15833 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
15834 	rts
15835 
15836 ##########################################################################
15837 
15838 #########################################################################
15839 # XDEF ****************************************************************	#
15840 #	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
15841 #			  OVFL/UNFL exceptions will result		#
15842 #									#
15843 # XREF ****************************************************************	#
15844 #	norm() - normalize mantissa after adjusting exponent		#
15845 #									#
15846 # INPUT ***************************************************************	#
15847 #	FP_SRC(a6) = fp op1(src)					#
15848 #	FP_DST(a6) = fp op2(dst)					#
15849 #									#
15850 # OUTPUT **************************************************************	#
15851 #	FP_SRC(a6) = fp op1 scaled(src)					#
15852 #	FP_DST(a6) = fp op2 scaled(dst)					#
15853 #	d0         = scale amount					#
15854 #									#
15855 # ALGORITHM ***********************************************************	#
15856 #	If the DST exponent is > the SRC exponent, set the DST exponent	#
15857 # equal to 0x3fff and scale the SRC exponent by the value that the	#
15858 # DST exponent was scaled by. If the SRC exponent is greater or equal,	#
15859 # do the opposite. Return this scale factor in d0.			#
15860 #	If the two exponents differ by > the number of mantissa bits	#
15861 # plus two, then set the smallest exponent to a very small value as a	#
15862 # quick shortcut.							#
15863 #									#
15864 #########################################################################
15865 
15866 	global		addsub_scaler2
15867 addsub_scaler2:
15868 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15869 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15870 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15871 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15872 	mov.w		SRC_EX(%a0),%d0
15873 	mov.w		DST_EX(%a1),%d1
15874 	mov.w		%d0,FP_SCR0_EX(%a6)
15875 	mov.w		%d1,FP_SCR1_EX(%a6)
15876 
15877 	andi.w		&0x7fff,%d0
15878 	andi.w		&0x7fff,%d1
15879 	mov.w		%d0,L_SCR1(%a6)		# store src exponent
15880 	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
15881 
15882 	cmp.w		%d0, %d1		# is src exp >= dst exp?
15883 	bge.l		src_exp_ge2
15884 
15885 # dst exp is >  src exp; scale dst to exp = 0x3fff
15886 dst_exp_gt2:
15887 	bsr.l		scale_to_zero_dst
15888 	mov.l		%d0,-(%sp)		# save scale factor
15889 
15890 	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
15891 	bne.b		cmpexp12
15892 
15893 	lea		FP_SCR0(%a6),%a0
15894 	bsr.l		norm			# normalize the denorm; result is new exp
15895 	neg.w		%d0			# new exp = -(shft val)
15896 	mov.w		%d0,L_SCR1(%a6)		# inset new exp
15897 
15898 cmpexp12:
15899 	mov.w		2+L_SCR1(%a6),%d0
15900 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15901 
15902 	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
15903 	bge.b		quick_scale12
15904 
15905 	mov.w		L_SCR1(%a6),%d0
15906 	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
15907 	mov.w		FP_SCR0_EX(%a6),%d1
15908 	and.w		&0x8000,%d1
15909 	or.w		%d1,%d0			# concat {sgn,new exp}
15910 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
15911 
15912 	mov.l		(%sp)+,%d0		# return SCALE factor
15913 	rts
15914 
15915 quick_scale12:
15916 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
15917 	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
15918 
15919 	mov.l		(%sp)+,%d0		# return SCALE factor
15920 	rts
15921 
15922 # src exp is >= dst exp; scale src to exp = 0x3fff
15923 src_exp_ge2:
15924 	bsr.l		scale_to_zero_src
15925 	mov.l		%d0,-(%sp)		# save scale factor
15926 
15927 	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
15928 	bne.b		cmpexp22
15929 	lea		FP_SCR1(%a6),%a0
15930 	bsr.l		norm			# normalize the denorm; result is new exp
15931 	neg.w		%d0			# new exp = -(shft val)
15932 	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
15933 
15934 cmpexp22:
15935 	mov.w		L_SCR1(%a6),%d0
15936 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15937 
15938 	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
15939 	bge.b		quick_scale22
15940 
15941 	mov.w		2+L_SCR1(%a6),%d0
15942 	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
15943 	mov.w		FP_SCR1_EX(%a6),%d1
15944 	andi.w		&0x8000,%d1
15945 	or.w		%d1,%d0			# concat {sgn,new exp}
15946 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
15947 
15948 	mov.l		(%sp)+,%d0		# return SCALE factor
15949 	rts
15950 
15951 quick_scale22:
15952 	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
15953 	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
15954 
15955 	mov.l		(%sp)+,%d0		# return SCALE factor
15956 	rts
15957 
15958 ##########################################################################
15959 
15960 #########################################################################
15961 # XDEF ****************************************************************	#
15962 #	scale_to_zero_src(): scale the exponent of extended precision	#
15963 #			     value at FP_SCR0(a6).			#
15964 #									#
15965 # XREF ****************************************************************	#
15966 #	norm() - normalize the mantissa if the operand was a DENORM	#
15967 #									#
15968 # INPUT ***************************************************************	#
15969 #	FP_SCR0(a6) = extended precision operand to be scaled		#
15970 #									#
15971 # OUTPUT **************************************************************	#
15972 #	FP_SCR0(a6) = scaled extended precision operand			#
15973 #	d0	    = scale value					#
15974 #									#
15975 # ALGORITHM ***********************************************************	#
15976 #	Set the exponent of the input operand to 0x3fff. Save the value	#
15977 # of the difference between the original and new exponent. Then,	#
15978 # normalize the operand if it was a DENORM. Add this normalization	#
15979 # value to the previous value. Return the result.			#
15980 #									#
15981 #########################################################################
15982 
15983 	global		scale_to_zero_src
15984 scale_to_zero_src:
15985 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
15986 	mov.w		%d1,%d0			# make a copy
15987 
15988 	andi.l		&0x7fff,%d1		# extract operand's exponent
15989 
15990 	andi.w		&0x8000,%d0		# extract operand's sgn
15991 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
15992 
15993 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
15994 
15995 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
15996 	beq.b		stzs_denorm		# normalize the DENORM
15997 
15998 stzs_norm:
15999 	mov.l		&0x3fff,%d0
16000 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16001 
16002 	rts
16003 
16004 stzs_denorm:
16005 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16006 	bsr.l		norm			# normalize denorm
16007 	neg.l		%d0			# new exponent = -(shft val)
16008 	mov.l		%d0,%d1			# prepare for op_norm call
16009 	bra.b		stzs_norm		# finish scaling
16010 
16011 ###
16012 
16013 #########################################################################
16014 # XDEF ****************************************************************	#
16015 #	scale_sqrt(): scale the input operand exponent so a subsequent	#
16016 #		      fsqrt operation won't take an exception.		#
16017 #									#
16018 # XREF ****************************************************************	#
16019 #	norm() - normalize the mantissa if the operand was a DENORM	#
16020 #									#
16021 # INPUT ***************************************************************	#
16022 #	FP_SCR0(a6) = extended precision operand to be scaled		#
16023 #									#
16024 # OUTPUT **************************************************************	#
16025 #	FP_SCR0(a6) = scaled extended precision operand			#
16026 #	d0	    = scale value					#
16027 #									#
16028 # ALGORITHM ***********************************************************	#
16029 #	If the input operand is a DENORM, normalize it.			#
16030 #	If the exponent of the input operand is even, set the exponent	#
16031 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
16032 # exponent of the input operand is off, set the exponent to ox3fff and	#
16033 # return a scale factor of "(exp-0x3fff)/2".				#
16034 #									#
16035 #########################################################################
16036 
16037 	global		scale_sqrt
16038 scale_sqrt:
16039 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
16040 	beq.b		ss_denorm		# normalize the DENORM
16041 
16042 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
16043 	andi.l		&0x7fff,%d1		# extract operand's exponent
16044 
16045 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
16046 
16047 	btst		&0x0,%d1		# is exp even or odd?
16048 	beq.b		ss_norm_even
16049 
16050 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16051 
16052 	mov.l		&0x3fff,%d0
16053 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16054 	asr.l		&0x1,%d0		# divide scale factor by 2
16055 	rts
16056 
16057 ss_norm_even:
16058 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16059 
16060 	mov.l		&0x3ffe,%d0
16061 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16062 	asr.l		&0x1,%d0		# divide scale factor by 2
16063 	rts
16064 
16065 ss_denorm:
16066 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16067 	bsr.l		norm			# normalize denorm
16068 
16069 	btst		&0x0,%d0		# is exp even or odd?
16070 	beq.b		ss_denorm_even
16071 
16072 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16073 
16074 	add.l		&0x3fff,%d0
16075 	asr.l		&0x1,%d0		# divide scale factor by 2
16076 	rts
16077 
16078 ss_denorm_even:
16079 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16080 
16081 	add.l		&0x3ffe,%d0
16082 	asr.l		&0x1,%d0		# divide scale factor by 2
16083 	rts
16084 
16085 ###
16086 
16087 #########################################################################
16088 # XDEF ****************************************************************	#
16089 #	scale_to_zero_dst(): scale the exponent of extended precision	#
16090 #			     value at FP_SCR1(a6).			#
16091 #									#
16092 # XREF ****************************************************************	#
16093 #	norm() - normalize the mantissa if the operand was a DENORM	#
16094 #									#
16095 # INPUT ***************************************************************	#
16096 #	FP_SCR1(a6) = extended precision operand to be scaled		#
16097 #									#
16098 # OUTPUT **************************************************************	#
16099 #	FP_SCR1(a6) = scaled extended precision operand			#
16100 #	d0	    = scale value					#
16101 #									#
16102 # ALGORITHM ***********************************************************	#
16103 #	Set the exponent of the input operand to 0x3fff. Save the value	#
16104 # of the difference between the original and new exponent. Then,	#
16105 # normalize the operand if it was a DENORM. Add this normalization	#
16106 # value to the previous value. Return the result.			#
16107 #									#
16108 #########################################################################
16109 
16110 	global		scale_to_zero_dst
16111 scale_to_zero_dst:
16112 	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
16113 	mov.w		%d1,%d0			# make a copy
16114 
16115 	andi.l		&0x7fff,%d1		# extract operand's exponent
16116 
16117 	andi.w		&0x8000,%d0		# extract operand's sgn
16118 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
16119 
16120 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
16121 
16122 	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
16123 	beq.b		stzd_denorm		# normalize the DENORM
16124 
16125 stzd_norm:
16126 	mov.l		&0x3fff,%d0
16127 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16128 	rts
16129 
16130 stzd_denorm:
16131 	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
16132 	bsr.l		norm			# normalize denorm
16133 	neg.l		%d0			# new exponent = -(shft val)
16134 	mov.l		%d0,%d1			# prepare for op_norm call
16135 	bra.b		stzd_norm		# finish scaling
16136 
16137 ##########################################################################
16138 
16139 #########################################################################
16140 # XDEF ****************************************************************	#
16141 #	res_qnan(): return default result w/ QNAN operand for dyadic	#
16142 #	res_snan(): return default result w/ SNAN operand for dyadic	#
16143 #	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
16144 #	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
16145 #									#
16146 # XREF ****************************************************************	#
16147 #	None								#
16148 #									#
16149 # INPUT ***************************************************************	#
16150 #	FP_SRC(a6) = pointer to extended precision src operand		#
16151 #	FP_DST(a6) = pointer to extended precision dst operand		#
16152 #									#
16153 # OUTPUT **************************************************************	#
16154 #	fp0 = default result						#
16155 #									#
16156 # ALGORITHM ***********************************************************	#
16157 #	If either operand (but not both operands) of an operation is a	#
16158 # nonsignalling NAN, then that NAN is returned as the result. If both	#
16159 # operands are nonsignalling NANs, then the destination operand		#
16160 # nonsignalling NAN is returned as the result.				#
16161 #	If either operand to an operation is a signalling NAN (SNAN),	#
16162 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
16163 # enable bit is set in the FPCR, then the trap is taken and the		#
16164 # destination is not modified. If the SNAN trap enable bit is not set,	#
16165 # then the SNAN is converted to a nonsignalling NAN (by setting the	#
16166 # SNAN bit in the operand to one), and the operation continues as	#
16167 # described in the preceding paragraph, for nonsignalling NANs.		#
16168 #	Make sure the appropriate FPSR bits are set before exiting.	#
16169 #									#
16170 #########################################################################
16171 
16172 	global		res_qnan
16173 	global		res_snan
16174 res_qnan:
16175 res_snan:
16176 	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
16177 	beq.b		dst_snan2
16178 	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
16179 	beq.b		dst_qnan2
16180 src_nan:
16181 	cmp.b		STAG(%a6), &QNAN
16182 	beq.b		src_qnan2
16183 	global		res_snan_1op
16184 res_snan_1op:
16185 src_snan2:
16186 	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
16187 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16188 	lea		FP_SRC(%a6), %a0
16189 	bra.b		nan_comp
16190 	global		res_qnan_1op
16191 res_qnan_1op:
16192 src_qnan2:
16193 	or.l		&nan_mask, USER_FPSR(%a6)
16194 	lea		FP_SRC(%a6), %a0
16195 	bra.b		nan_comp
16196 dst_snan2:
16197 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16198 	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
16199 	lea		FP_DST(%a6), %a0
16200 	bra.b		nan_comp
16201 dst_qnan2:
16202 	lea		FP_DST(%a6), %a0
16203 	cmp.b		STAG(%a6), &SNAN
16204 	bne		nan_done
16205 	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
16206 nan_done:
16207 	or.l		&nan_mask, USER_FPSR(%a6)
16208 nan_comp:
16209 	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
16210 	beq.b		nan_not_neg
16211 	or.l		&neg_mask, USER_FPSR(%a6)
16212 nan_not_neg:
16213 	fmovm.x		(%a0), &0x80
16214 	rts
16215 
16216 #########################################################################
16217 # XDEF ****************************************************************	#
16218 #	res_operr(): return default result during operand error		#
16219 #									#
16220 # XREF ****************************************************************	#
16221 #	None								#
16222 #									#
16223 # INPUT ***************************************************************	#
16224 #	None								#
16225 #									#
16226 # OUTPUT **************************************************************	#
16227 #	fp0 = default operand error result				#
16228 #									#
16229 # ALGORITHM ***********************************************************	#
16230 #	An nonsignalling NAN is returned as the default result when	#
16231 # an operand error occurs for the following cases:			#
16232 #									#
16233 #	Multiply: (Infinity x Zero)					#
16234 #	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
16235 #									#
16236 #########################################################################
16237 
16238 	global		res_operr
16239 res_operr:
16240 	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16241 	fmovm.x		nan_return(%pc), &0x80
16242 	rts
16243 
16244 nan_return:
16245 	long		0x7fff0000, 0xffffffff, 0xffffffff
16246 
16247 #########################################################################
16248 # fdbcc(): routine to emulate the fdbcc instruction			#
16249 #									#
16250 # XDEF **************************************************************** #
16251 #	_fdbcc()							#
16252 #									#
16253 # XREF **************************************************************** #
16254 #	fetch_dreg() - fetch Dn value					#
16255 #	store_dreg_l() - store updated Dn value				#
16256 #									#
16257 # INPUT ***************************************************************	#
16258 #	d0 = displacement						#
16259 #									#
16260 # OUTPUT ************************************************************** #
16261 #	none								#
16262 #									#
16263 # ALGORITHM ***********************************************************	#
16264 #	This routine checks which conditional predicate is specified by	#
16265 # the stacked fdbcc instruction opcode and then branches to a routine	#
16266 # for that predicate. The corresponding fbcc instruction is then used	#
16267 # to see whether the condition (specified by the stacked FPSR) is true	#
16268 # or false.								#
16269 #	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16270 # bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16271 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
16272 # enabled BSUN should not be flagged and the predicate is true, then	#
16273 # Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
16274 # the displacement value to the stacked PC so that when an "rte" is	#
16275 # finally executed, the branch occurs.					#
16276 #									#
16277 #########################################################################
16278 	global		_fdbcc
16279 _fdbcc:
16280 	mov.l		%d0,L_SCR1(%a6)		# save displacement
16281 
16282 	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16283 
16284 	clr.l		%d1			# clear scratch reg
16285 	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16286 	ror.l		&0x8,%d1		# rotate to top byte
16287 	fmov.l		%d1,%fpsr		# insert into FPSR
16288 
16289 	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16290 	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16291 
16292 tbl_fdbcc:
16293 	short		fdbcc_f		-	tbl_fdbcc	# 00
16294 	short		fdbcc_eq	-	tbl_fdbcc	# 01
16295 	short		fdbcc_ogt	-	tbl_fdbcc	# 02
16296 	short		fdbcc_oge	-	tbl_fdbcc	# 03
16297 	short		fdbcc_olt	-	tbl_fdbcc	# 04
16298 	short		fdbcc_ole	-	tbl_fdbcc	# 05
16299 	short		fdbcc_ogl	-	tbl_fdbcc	# 06
16300 	short		fdbcc_or	-	tbl_fdbcc	# 07
16301 	short		fdbcc_un	-	tbl_fdbcc	# 08
16302 	short		fdbcc_ueq	-	tbl_fdbcc	# 09
16303 	short		fdbcc_ugt	-	tbl_fdbcc	# 10
16304 	short		fdbcc_uge	-	tbl_fdbcc	# 11
16305 	short		fdbcc_ult	-	tbl_fdbcc	# 12
16306 	short		fdbcc_ule	-	tbl_fdbcc	# 13
16307 	short		fdbcc_neq	-	tbl_fdbcc	# 14
16308 	short		fdbcc_t		-	tbl_fdbcc	# 15
16309 	short		fdbcc_sf	-	tbl_fdbcc	# 16
16310 	short		fdbcc_seq	-	tbl_fdbcc	# 17
16311 	short		fdbcc_gt	-	tbl_fdbcc	# 18
16312 	short		fdbcc_ge	-	tbl_fdbcc	# 19
16313 	short		fdbcc_lt	-	tbl_fdbcc	# 20
16314 	short		fdbcc_le	-	tbl_fdbcc	# 21
16315 	short		fdbcc_gl	-	tbl_fdbcc	# 22
16316 	short		fdbcc_gle	-	tbl_fdbcc	# 23
16317 	short		fdbcc_ngle	-	tbl_fdbcc	# 24
16318 	short		fdbcc_ngl	-	tbl_fdbcc	# 25
16319 	short		fdbcc_nle	-	tbl_fdbcc	# 26
16320 	short		fdbcc_nlt	-	tbl_fdbcc	# 27
16321 	short		fdbcc_nge	-	tbl_fdbcc	# 28
16322 	short		fdbcc_ngt	-	tbl_fdbcc	# 29
16323 	short		fdbcc_sneq	-	tbl_fdbcc	# 30
16324 	short		fdbcc_st	-	tbl_fdbcc	# 31
16325 
16326 #########################################################################
16327 #									#
16328 # IEEE Nonaware tests							#
16329 #									#
16330 # For the IEEE nonaware tests, only the false branch changes the	#
16331 # counter. However, the true branch may set bsun so we check to see	#
16332 # if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16333 #									#
16334 # The cases EQ and NE are shared by the Aware and Nonaware groups	#
16335 # and are incapable of setting the BSUN exception bit.			#
16336 #									#
16337 # Typically, only one of the two possible branch directions could	#
16338 # have the NAN bit set.							#
16339 # (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
16340 #  is preserved.)							#
16341 #									#
16342 #########################################################################
16343 
16344 #
16345 # equal:
16346 #
16347 #	Z
16348 #
16349 fdbcc_eq:
16350 	fbeq.w		fdbcc_eq_yes		# equal?
16351 fdbcc_eq_no:
16352 	bra.w		fdbcc_false		# no; go handle counter
16353 fdbcc_eq_yes:
16354 	rts
16355 
16356 #
16357 # not equal:
16358 #	_
16359 #	Z
16360 #
16361 fdbcc_neq:
16362 	fbneq.w		fdbcc_neq_yes		# not equal?
16363 fdbcc_neq_no:
16364 	bra.w		fdbcc_false		# no; go handle counter
16365 fdbcc_neq_yes:
16366 	rts
16367 
16368 #
16369 # greater than:
16370 #	_______
16371 #	NANvZvN
16372 #
16373 fdbcc_gt:
16374 	fbgt.w		fdbcc_gt_yes		# greater than?
16375 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16376 	beq.w		fdbcc_false		# no;go handle counter
16377 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16378 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16379 	bne.w		fdbcc_bsun		# yes; we have an exception
16380 	bra.w		fdbcc_false		# no; go handle counter
16381 fdbcc_gt_yes:
16382 	rts					# do nothing
16383 
16384 #
16385 # not greater than:
16386 #
16387 #	NANvZvN
16388 #
16389 fdbcc_ngt:
16390 	fbngt.w		fdbcc_ngt_yes		# not greater than?
16391 fdbcc_ngt_no:
16392 	bra.w		fdbcc_false		# no; go handle counter
16393 fdbcc_ngt_yes:
16394 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16395 	beq.b		fdbcc_ngt_done		# no;go finish
16396 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16397 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16398 	bne.w		fdbcc_bsun		# yes; we have an exception
16399 fdbcc_ngt_done:
16400 	rts					# no; do nothing
16401 
16402 #
16403 # greater than or equal:
16404 #	   _____
16405 #	Zv(NANvN)
16406 #
16407 fdbcc_ge:
16408 	fbge.w		fdbcc_ge_yes		# greater than or equal?
16409 fdbcc_ge_no:
16410 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16411 	beq.w		fdbcc_false		# no;go handle counter
16412 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16413 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16414 	bne.w		fdbcc_bsun		# yes; we have an exception
16415 	bra.w		fdbcc_false		# no; go handle counter
16416 fdbcc_ge_yes:
16417 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16418 	beq.b		fdbcc_ge_yes_done	# no;go do nothing
16419 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16420 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16421 	bne.w		fdbcc_bsun		# yes; we have an exception
16422 fdbcc_ge_yes_done:
16423 	rts					# do nothing
16424 
16425 #
16426 # not (greater than or equal):
16427 #	       _
16428 #	NANv(N^Z)
16429 #
16430 fdbcc_nge:
16431 	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
16432 fdbcc_nge_no:
16433 	bra.w		fdbcc_false		# no; go handle counter
16434 fdbcc_nge_yes:
16435 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16436 	beq.b		fdbcc_nge_done		# no;go finish
16437 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16438 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16439 	bne.w		fdbcc_bsun		# yes; we have an exception
16440 fdbcc_nge_done:
16441 	rts					# no; do nothing
16442 
16443 #
16444 # less than:
16445 #	   _____
16446 #	N^(NANvZ)
16447 #
16448 fdbcc_lt:
16449 	fblt.w		fdbcc_lt_yes		# less than?
16450 fdbcc_lt_no:
16451 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16452 	beq.w		fdbcc_false		# no; go handle counter
16453 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16454 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16455 	bne.w		fdbcc_bsun		# yes; we have an exception
16456 	bra.w		fdbcc_false		# no; go handle counter
16457 fdbcc_lt_yes:
16458 	rts					# do nothing
16459 
16460 #
16461 # not less than:
16462 #	       _
16463 #	NANv(ZvN)
16464 #
16465 fdbcc_nlt:
16466 	fbnlt.w		fdbcc_nlt_yes		# not less than?
16467 fdbcc_nlt_no:
16468 	bra.w		fdbcc_false		# no; go handle counter
16469 fdbcc_nlt_yes:
16470 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16471 	beq.b		fdbcc_nlt_done		# no;go finish
16472 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16473 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16474 	bne.w		fdbcc_bsun		# yes; we have an exception
16475 fdbcc_nlt_done:
16476 	rts					# no; do nothing
16477 
16478 #
16479 # less than or equal:
16480 #	     ___
16481 #	Zv(N^NAN)
16482 #
16483 fdbcc_le:
16484 	fble.w		fdbcc_le_yes		# less than or equal?
16485 fdbcc_le_no:
16486 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16487 	beq.w		fdbcc_false		# no; go handle counter
16488 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16489 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16490 	bne.w		fdbcc_bsun		# yes; we have an exception
16491 	bra.w		fdbcc_false		# no; go handle counter
16492 fdbcc_le_yes:
16493 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16494 	beq.b		fdbcc_le_yes_done	# no; go do nothing
16495 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16496 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16497 	bne.w		fdbcc_bsun		# yes; we have an exception
16498 fdbcc_le_yes_done:
16499 	rts					# do nothing
16500 
16501 #
16502 # not (less than or equal):
16503 #	     ___
16504 #	NANv(NvZ)
16505 #
16506 fdbcc_nle:
16507 	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
16508 fdbcc_nle_no:
16509 	bra.w		fdbcc_false		# no; go handle counter
16510 fdbcc_nle_yes:
16511 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16512 	beq.w		fdbcc_nle_done		# no; go finish
16513 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16514 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16515 	bne.w		fdbcc_bsun		# yes; we have an exception
16516 fdbcc_nle_done:
16517 	rts					# no; do nothing
16518 
16519 #
16520 # greater or less than:
16521 #	_____
16522 #	NANvZ
16523 #
16524 fdbcc_gl:
16525 	fbgl.w		fdbcc_gl_yes		# greater or less than?
16526 fdbcc_gl_no:
16527 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16528 	beq.w		fdbcc_false		# no; handle counter
16529 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16530 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16531 	bne.w		fdbcc_bsun		# yes; we have an exception
16532 	bra.w		fdbcc_false		# no; go handle counter
16533 fdbcc_gl_yes:
16534 	rts					# do nothing
16535 
16536 #
16537 # not (greater or less than):
16538 #
16539 #	NANvZ
16540 #
16541 fdbcc_ngl:
16542 	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
16543 fdbcc_ngl_no:
16544 	bra.w		fdbcc_false		# no; go handle counter
16545 fdbcc_ngl_yes:
16546 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16547 	beq.b		fdbcc_ngl_done		# no; go finish
16548 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16549 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16550 	bne.w		fdbcc_bsun		# yes; we have an exception
16551 fdbcc_ngl_done:
16552 	rts					# no; do nothing
16553 
16554 #
16555 # greater, less, or equal:
16556 #	___
16557 #	NAN
16558 #
16559 fdbcc_gle:
16560 	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
16561 fdbcc_gle_no:
16562 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16563 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16564 	bne.w		fdbcc_bsun		# yes; we have an exception
16565 	bra.w		fdbcc_false		# no; go handle counter
16566 fdbcc_gle_yes:
16567 	rts					# do nothing
16568 
16569 #
16570 # not (greater, less, or equal):
16571 #
16572 #	NAN
16573 #
16574 fdbcc_ngle:
16575 	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
16576 fdbcc_ngle_no:
16577 	bra.w		fdbcc_false		# no; go handle counter
16578 fdbcc_ngle_yes:
16579 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16580 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16581 	bne.w		fdbcc_bsun		# yes; we have an exception
16582 	rts					# no; do nothing
16583 
16584 #########################################################################
16585 #									#
16586 # Miscellaneous tests							#
16587 #									#
16588 # For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16589 #									#
16590 #########################################################################
16591 
16592 #
16593 # false:
16594 #
16595 #	False
16596 #
16597 fdbcc_f:					# no bsun possible
16598 	bra.w		fdbcc_false		# go handle counter
16599 
16600 #
16601 # true:
16602 #
16603 #	True
16604 #
16605 fdbcc_t:					# no bsun possible
16606 	rts					# do nothing
16607 
16608 #
16609 # signalling false:
16610 #
16611 #	False
16612 #
16613 fdbcc_sf:
16614 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16615 	beq.w		fdbcc_false		# no;go handle counter
16616 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16617 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16618 	bne.w		fdbcc_bsun		# yes; we have an exception
16619 	bra.w		fdbcc_false		# go handle counter
16620 
16621 #
16622 # signalling true:
16623 #
16624 #	True
16625 #
16626 fdbcc_st:
16627 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16628 	beq.b		fdbcc_st_done		# no;go finish
16629 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16630 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16631 	bne.w		fdbcc_bsun		# yes; we have an exception
16632 fdbcc_st_done:
16633 	rts
16634 
16635 #
16636 # signalling equal:
16637 #
16638 #	Z
16639 #
16640 fdbcc_seq:
16641 	fbseq.w		fdbcc_seq_yes		# signalling equal?
16642 fdbcc_seq_no:
16643 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16644 	beq.w		fdbcc_false		# no;go handle counter
16645 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16646 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16647 	bne.w		fdbcc_bsun		# yes; we have an exception
16648 	bra.w		fdbcc_false		# go handle counter
16649 fdbcc_seq_yes:
16650 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16651 	beq.b		fdbcc_seq_yes_done	# no;go do nothing
16652 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16653 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16654 	bne.w		fdbcc_bsun		# yes; we have an exception
16655 fdbcc_seq_yes_done:
16656 	rts					# yes; do nothing
16657 
16658 #
16659 # signalling not equal:
16660 #	_
16661 #	Z
16662 #
16663 fdbcc_sneq:
16664 	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
16665 fdbcc_sneq_no:
16666 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16667 	beq.w		fdbcc_false		# no;go handle counter
16668 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16669 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16670 	bne.w		fdbcc_bsun		# yes; we have an exception
16671 	bra.w		fdbcc_false		# go handle counter
16672 fdbcc_sneq_yes:
16673 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
16674 	beq.w		fdbcc_sneq_done		# no;go finish
16675 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16676 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16677 	bne.w		fdbcc_bsun		# yes; we have an exception
16678 fdbcc_sneq_done:
16679 	rts
16680 
16681 #########################################################################
16682 #									#
16683 # IEEE Aware tests							#
16684 #									#
16685 # For the IEEE aware tests, action is only taken if the result is false.#
16686 # Therefore, the opposite branch type is used to jump to the decrement	#
16687 # routine.								#
16688 # The BSUN exception will not be set for any of these tests.		#
16689 #									#
16690 #########################################################################
16691 
16692 #
16693 # ordered greater than:
16694 #	_______
16695 #	NANvZvN
16696 #
16697 fdbcc_ogt:
16698 	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
16699 fdbcc_ogt_no:
16700 	bra.w		fdbcc_false		# no; go handle counter
16701 fdbcc_ogt_yes:
16702 	rts					# yes; do nothing
16703 
16704 #
16705 # unordered or less or equal:
16706 #	_______
16707 #	NANvZvN
16708 #
16709 fdbcc_ule:
16710 	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
16711 fdbcc_ule_no:
16712 	bra.w		fdbcc_false		# no; go handle counter
16713 fdbcc_ule_yes:
16714 	rts					# yes; do nothing
16715 
16716 #
16717 # ordered greater than or equal:
16718 #	   _____
16719 #	Zv(NANvN)
16720 #
16721 fdbcc_oge:
16722 	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
16723 fdbcc_oge_no:
16724 	bra.w		fdbcc_false		# no; go handle counter
16725 fdbcc_oge_yes:
16726 	rts					# yes; do nothing
16727 
16728 #
16729 # unordered or less than:
16730 #	       _
16731 #	NANv(N^Z)
16732 #
16733 fdbcc_ult:
16734 	fbult.w		fdbcc_ult_yes		# unordered or less than?
16735 fdbcc_ult_no:
16736 	bra.w		fdbcc_false		# no; go handle counter
16737 fdbcc_ult_yes:
16738 	rts					# yes; do nothing
16739 
16740 #
16741 # ordered less than:
16742 #	   _____
16743 #	N^(NANvZ)
16744 #
16745 fdbcc_olt:
16746 	fbolt.w		fdbcc_olt_yes		# ordered less than?
16747 fdbcc_olt_no:
16748 	bra.w		fdbcc_false		# no; go handle counter
16749 fdbcc_olt_yes:
16750 	rts					# yes; do nothing
16751 
16752 #
16753 # unordered or greater or equal:
16754 #
16755 #	NANvZvN
16756 #
16757 fdbcc_uge:
16758 	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
16759 fdbcc_uge_no:
16760 	bra.w		fdbcc_false		# no; go handle counter
16761 fdbcc_uge_yes:
16762 	rts					# yes; do nothing
16763 
16764 #
16765 # ordered less than or equal:
16766 #	     ___
16767 #	Zv(N^NAN)
16768 #
16769 fdbcc_ole:
16770 	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
16771 fdbcc_ole_no:
16772 	bra.w		fdbcc_false		# no; go handle counter
16773 fdbcc_ole_yes:
16774 	rts					# yes; do nothing
16775 
16776 #
16777 # unordered or greater than:
16778 #	     ___
16779 #	NANv(NvZ)
16780 #
16781 fdbcc_ugt:
16782 	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
16783 fdbcc_ugt_no:
16784 	bra.w		fdbcc_false		# no; go handle counter
16785 fdbcc_ugt_yes:
16786 	rts					# yes; do nothing
16787 
16788 #
16789 # ordered greater or less than:
16790 #	_____
16791 #	NANvZ
16792 #
16793 fdbcc_ogl:
16794 	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
16795 fdbcc_ogl_no:
16796 	bra.w		fdbcc_false		# no; go handle counter
16797 fdbcc_ogl_yes:
16798 	rts					# yes; do nothing
16799 
16800 #
16801 # unordered or equal:
16802 #
16803 #	NANvZ
16804 #
16805 fdbcc_ueq:
16806 	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
16807 fdbcc_ueq_no:
16808 	bra.w		fdbcc_false		# no; go handle counter
16809 fdbcc_ueq_yes:
16810 	rts					# yes; do nothing
16811 
16812 #
16813 # ordered:
16814 #	___
16815 #	NAN
16816 #
16817 fdbcc_or:
16818 	fbor.w		fdbcc_or_yes		# ordered?
16819 fdbcc_or_no:
16820 	bra.w		fdbcc_false		# no; go handle counter
16821 fdbcc_or_yes:
16822 	rts					# yes; do nothing
16823 
16824 #
16825 # unordered:
16826 #
16827 #	NAN
16828 #
16829 fdbcc_un:
16830 	fbun.w		fdbcc_un_yes		# unordered?
16831 fdbcc_un_no:
16832 	bra.w		fdbcc_false		# no; go handle counter
16833 fdbcc_un_yes:
16834 	rts					# yes; do nothing
16835 
16836 #######################################################################
16837 
16838 #
16839 # the bsun exception bit was not set.
16840 #
16841 # (1) subtract 1 from the count register
16842 # (2) if (cr == -1) then
16843 #	pc = pc of next instruction
16844 #     else
16845 #	pc += sign_ext(16-bit displacement)
16846 #
16847 fdbcc_false:
16848 	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
16849 	andi.w		&0x7, %d1		# extract count register
16850 
16851 	bsr.l		fetch_dreg		# fetch count value
16852 # make sure that d0 isn't corrupted between calls...
16853 
16854 	subq.w		&0x1, %d0		# Dn - 1 -> Dn
16855 
16856 	bsr.l		store_dreg_l		# store new count value
16857 
16858 	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
16859 	bne.b		fdbcc_false_cont	# no;
16860 	rts
16861 
16862 fdbcc_false_cont:
16863 	mov.l		L_SCR1(%a6),%d0		# fetch displacement
16864 	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
16865 	addq.l		&0x4,%d0		# add instruction length
16866 	mov.l		%d0,EXC_PC(%a6)		# set new PC
16867 	rts
16868 
16869 # the emulation routine set bsun and BSUN was enabled. have to
16870 # fix stack and jump to the bsun handler.
16871 # let the caller of this routine shift the stack frame up to
16872 # eliminate the effective address field.
16873 fdbcc_bsun:
16874 	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
16875 	rts
16876 
16877 #########################################################################
16878 # ftrapcc(): routine to emulate the ftrapcc instruction			#
16879 #									#
16880 # XDEF ****************************************************************	#
16881 #	_ftrapcc()							#
16882 #									#
16883 # XREF ****************************************************************	#
16884 #	none								#
16885 #									#
16886 # INPUT *************************************************************** #
16887 #	none								#
16888 #									#
16889 # OUTPUT ************************************************************** #
16890 #	none								#
16891 #									#
16892 # ALGORITHM *********************************************************** #
16893 #	This routine checks which conditional predicate is specified by	#
16894 # the stacked ftrapcc instruction opcode and then branches to a routine	#
16895 # for that predicate. The corresponding fbcc instruction is then used	#
16896 # to see whether the condition (specified by the stacked FPSR) is true	#
16897 # or false.								#
16898 #	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16899 # bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16900 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
16901 # enabled BSUN should not be flagged and the predicate is true, then	#
16902 # the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
16903 # flags indicate to the calling routine to emulate the exceptional	#
16904 # condition.								#
16905 #									#
16906 #########################################################################
16907 
16908 	global		_ftrapcc
16909 _ftrapcc:
16910 	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16911 
16912 	clr.l		%d1			# clear scratch reg
16913 	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16914 	ror.l		&0x8,%d1		# rotate to top byte
16915 	fmov.l		%d1,%fpsr		# insert into FPSR
16916 
16917 	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16918 	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16919 
16920 tbl_ftrapcc:
16921 	short		ftrapcc_f	-	tbl_ftrapcc	# 00
16922 	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
16923 	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
16924 	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
16925 	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
16926 	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
16927 	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
16928 	short		ftrapcc_or	-	tbl_ftrapcc	# 07
16929 	short		ftrapcc_un	-	tbl_ftrapcc	# 08
16930 	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
16931 	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
16932 	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
16933 	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
16934 	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
16935 	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
16936 	short		ftrapcc_t	-	tbl_ftrapcc	# 15
16937 	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
16938 	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
16939 	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
16940 	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
16941 	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
16942 	short		ftrapcc_le	-	tbl_ftrapcc	# 21
16943 	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
16944 	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
16945 	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
16946 	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
16947 	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
16948 	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
16949 	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
16950 	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
16951 	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
16952 	short		ftrapcc_st	-	tbl_ftrapcc	# 31
16953 
16954 #########################################################################
16955 #									#
16956 # IEEE Nonaware tests							#
16957 #									#
16958 # For the IEEE nonaware tests, we set the result based on the		#
16959 # floating point condition codes. In addition, we check to see		#
16960 # if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16961 #									#
16962 # The cases EQ and NE are shared by the Aware and Nonaware groups	#
16963 # and are incapable of setting the BSUN exception bit.			#
16964 #									#
16965 # Typically, only one of the two possible branch directions could	#
16966 # have the NAN bit set.							#
16967 #									#
16968 #########################################################################
16969 
16970 #
16971 # equal:
16972 #
16973 #	Z
16974 #
16975 ftrapcc_eq:
16976 	fbeq.w		ftrapcc_trap		# equal?
16977 ftrapcc_eq_no:
16978 	rts					# do nothing
16979 
16980 #
16981 # not equal:
16982 #	_
16983 #	Z
16984 #
16985 ftrapcc_neq:
16986 	fbneq.w		ftrapcc_trap		# not equal?
16987 ftrapcc_neq_no:
16988 	rts					# do nothing
16989 
16990 #
16991 # greater than:
16992 #	_______
16993 #	NANvZvN
16994 #
16995 ftrapcc_gt:
16996 	fbgt.w		ftrapcc_trap		# greater than?
16997 ftrapcc_gt_no:
16998 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16999 	beq.b		ftrapcc_gt_done		# no
17000 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17001 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17002 	bne.w		ftrapcc_bsun		# yes
17003 ftrapcc_gt_done:
17004 	rts					# no; do nothing
17005 
17006 #
17007 # not greater than:
17008 #
17009 #	NANvZvN
17010 #
17011 ftrapcc_ngt:
17012 	fbngt.w		ftrapcc_ngt_yes		# not greater than?
17013 ftrapcc_ngt_no:
17014 	rts					# do nothing
17015 ftrapcc_ngt_yes:
17016 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17017 	beq.w		ftrapcc_trap		# no; go take trap
17018 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17019 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17020 	bne.w		ftrapcc_bsun		# yes
17021 	bra.w		ftrapcc_trap		# no; go take trap
17022 
17023 #
17024 # greater than or equal:
17025 #	   _____
17026 #	Zv(NANvN)
17027 #
17028 ftrapcc_ge:
17029 	fbge.w		ftrapcc_ge_yes		# greater than or equal?
17030 ftrapcc_ge_no:
17031 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17032 	beq.b		ftrapcc_ge_done		# no; go finish
17033 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17034 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17035 	bne.w		ftrapcc_bsun		# yes
17036 ftrapcc_ge_done:
17037 	rts					# no; do nothing
17038 ftrapcc_ge_yes:
17039 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17040 	beq.w		ftrapcc_trap		# no; go take trap
17041 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17042 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17043 	bne.w		ftrapcc_bsun		# yes
17044 	bra.w		ftrapcc_trap		# no; go take trap
17045 
17046 #
17047 # not (greater than or equal):
17048 #	       _
17049 #	NANv(N^Z)
17050 #
17051 ftrapcc_nge:
17052 	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
17053 ftrapcc_nge_no:
17054 	rts					# do nothing
17055 ftrapcc_nge_yes:
17056 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17057 	beq.w		ftrapcc_trap		# no; go take trap
17058 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17059 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17060 	bne.w		ftrapcc_bsun		# yes
17061 	bra.w		ftrapcc_trap		# no; go take trap
17062 
17063 #
17064 # less than:
17065 #	   _____
17066 #	N^(NANvZ)
17067 #
17068 ftrapcc_lt:
17069 	fblt.w		ftrapcc_trap		# less than?
17070 ftrapcc_lt_no:
17071 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17072 	beq.b		ftrapcc_lt_done		# no; go finish
17073 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17074 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17075 	bne.w		ftrapcc_bsun		# yes
17076 ftrapcc_lt_done:
17077 	rts					# no; do nothing
17078 
17079 #
17080 # not less than:
17081 #	       _
17082 #	NANv(ZvN)
17083 #
17084 ftrapcc_nlt:
17085 	fbnlt.w		ftrapcc_nlt_yes		# not less than?
17086 ftrapcc_nlt_no:
17087 	rts					# do nothing
17088 ftrapcc_nlt_yes:
17089 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17090 	beq.w		ftrapcc_trap		# no; go take trap
17091 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17092 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17093 	bne.w		ftrapcc_bsun		# yes
17094 	bra.w		ftrapcc_trap		# no; go take trap
17095 
17096 #
17097 # less than or equal:
17098 #	     ___
17099 #	Zv(N^NAN)
17100 #
17101 ftrapcc_le:
17102 	fble.w		ftrapcc_le_yes		# less than or equal?
17103 ftrapcc_le_no:
17104 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17105 	beq.b		ftrapcc_le_done		# no; go finish
17106 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17107 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17108 	bne.w		ftrapcc_bsun		# yes
17109 ftrapcc_le_done:
17110 	rts					# no; do nothing
17111 ftrapcc_le_yes:
17112 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17113 	beq.w		ftrapcc_trap		# no; go take trap
17114 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17115 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17116 	bne.w		ftrapcc_bsun		# yes
17117 	bra.w		ftrapcc_trap		# no; go take trap
17118 
17119 #
17120 # not (less than or equal):
17121 #	     ___
17122 #	NANv(NvZ)
17123 #
17124 ftrapcc_nle:
17125 	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
17126 ftrapcc_nle_no:
17127 	rts					# do nothing
17128 ftrapcc_nle_yes:
17129 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17130 	beq.w		ftrapcc_trap		# no; go take trap
17131 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17132 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17133 	bne.w		ftrapcc_bsun		# yes
17134 	bra.w		ftrapcc_trap		# no; go take trap
17135 
17136 #
17137 # greater or less than:
17138 #	_____
17139 #	NANvZ
17140 #
17141 ftrapcc_gl:
17142 	fbgl.w		ftrapcc_trap		# greater or less than?
17143 ftrapcc_gl_no:
17144 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17145 	beq.b		ftrapcc_gl_done		# no; go finish
17146 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17147 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17148 	bne.w		ftrapcc_bsun		# yes
17149 ftrapcc_gl_done:
17150 	rts					# no; do nothing
17151 
17152 #
17153 # not (greater or less than):
17154 #
17155 #	NANvZ
17156 #
17157 ftrapcc_ngl:
17158 	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
17159 ftrapcc_ngl_no:
17160 	rts					# do nothing
17161 ftrapcc_ngl_yes:
17162 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17163 	beq.w		ftrapcc_trap		# no; go take trap
17164 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17165 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17166 	bne.w		ftrapcc_bsun		# yes
17167 	bra.w		ftrapcc_trap		# no; go take trap
17168 
17169 #
17170 # greater, less, or equal:
17171 #	___
17172 #	NAN
17173 #
17174 ftrapcc_gle:
17175 	fbgle.w		ftrapcc_trap		# greater, less, or equal?
17176 ftrapcc_gle_no:
17177 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17178 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17179 	bne.w		ftrapcc_bsun		# yes
17180 	rts					# no; do nothing
17181 
17182 #
17183 # not (greater, less, or equal):
17184 #
17185 #	NAN
17186 #
17187 ftrapcc_ngle:
17188 	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
17189 ftrapcc_ngle_no:
17190 	rts					# do nothing
17191 ftrapcc_ngle_yes:
17192 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17193 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17194 	bne.w		ftrapcc_bsun		# yes
17195 	bra.w		ftrapcc_trap		# no; go take trap
17196 
17197 #########################################################################
17198 #									#
17199 # Miscellaneous tests							#
17200 #									#
17201 # For the IEEE aware tests, we only have to set the result based on the	#
17202 # floating point condition codes. The BSUN exception will not be	#
17203 # set for any of these tests.						#
17204 #									#
17205 #########################################################################
17206 
17207 #
17208 # false:
17209 #
17210 #	False
17211 #
17212 ftrapcc_f:
17213 	rts					# do nothing
17214 
17215 #
17216 # true:
17217 #
17218 #	True
17219 #
17220 ftrapcc_t:
17221 	bra.w		ftrapcc_trap		# go take trap
17222 
17223 #
17224 # signalling false:
17225 #
17226 #	False
17227 #
17228 ftrapcc_sf:
17229 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17230 	beq.b		ftrapcc_sf_done		# no; go finish
17231 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17232 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17233 	bne.w		ftrapcc_bsun		# yes
17234 ftrapcc_sf_done:
17235 	rts					# no; do nothing
17236 
17237 #
17238 # signalling true:
17239 #
17240 #	True
17241 #
17242 ftrapcc_st:
17243 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17244 	beq.w		ftrapcc_trap		# no; go take trap
17245 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17246 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17247 	bne.w		ftrapcc_bsun		# yes
17248 	bra.w		ftrapcc_trap		# no; go take trap
17249 
17250 #
17251 # signalling equal:
17252 #
17253 #	Z
17254 #
17255 ftrapcc_seq:
17256 	fbseq.w		ftrapcc_seq_yes		# signalling equal?
17257 ftrapcc_seq_no:
17258 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17259 	beq.w		ftrapcc_seq_done	# no; go finish
17260 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17261 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17262 	bne.w		ftrapcc_bsun		# yes
17263 ftrapcc_seq_done:
17264 	rts					# no; do nothing
17265 ftrapcc_seq_yes:
17266 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17267 	beq.w		ftrapcc_trap		# no; go take trap
17268 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17269 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17270 	bne.w		ftrapcc_bsun		# yes
17271 	bra.w		ftrapcc_trap		# no; go take trap
17272 
17273 #
17274 # signalling not equal:
17275 #	_
17276 #	Z
17277 #
17278 ftrapcc_sneq:
17279 	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
17280 ftrapcc_sneq_no:
17281 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17282 	beq.w		ftrapcc_sneq_no_done	# no; go finish
17283 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17284 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17285 	bne.w		ftrapcc_bsun		# yes
17286 ftrapcc_sneq_no_done:
17287 	rts					# do nothing
17288 ftrapcc_sneq_yes:
17289 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17290 	beq.w		ftrapcc_trap		# no; go take trap
17291 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17292 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17293 	bne.w		ftrapcc_bsun		# yes
17294 	bra.w		ftrapcc_trap		# no; go take trap
17295 
17296 #########################################################################
17297 #									#
17298 # IEEE Aware tests							#
17299 #									#
17300 # For the IEEE aware tests, we only have to set the result based on the	#
17301 # floating point condition codes. The BSUN exception will not be	#
17302 # set for any of these tests.						#
17303 #									#
17304 #########################################################################
17305 
17306 #
17307 # ordered greater than:
17308 #	_______
17309 #	NANvZvN
17310 #
17311 ftrapcc_ogt:
17312 	fbogt.w		ftrapcc_trap		# ordered greater than?
17313 ftrapcc_ogt_no:
17314 	rts					# do nothing
17315 
17316 #
17317 # unordered or less or equal:
17318 #	_______
17319 #	NANvZvN
17320 #
17321 ftrapcc_ule:
17322 	fbule.w		ftrapcc_trap		# unordered or less or equal?
17323 ftrapcc_ule_no:
17324 	rts					# do nothing
17325 
17326 #
17327 # ordered greater than or equal:
17328 #	   _____
17329 #	Zv(NANvN)
17330 #
17331 ftrapcc_oge:
17332 	fboge.w		ftrapcc_trap		# ordered greater than or equal?
17333 ftrapcc_oge_no:
17334 	rts					# do nothing
17335 
17336 #
17337 # unordered or less than:
17338 #	       _
17339 #	NANv(N^Z)
17340 #
17341 ftrapcc_ult:
17342 	fbult.w		ftrapcc_trap		# unordered or less than?
17343 ftrapcc_ult_no:
17344 	rts					# do nothing
17345 
17346 #
17347 # ordered less than:
17348 #	   _____
17349 #	N^(NANvZ)
17350 #
17351 ftrapcc_olt:
17352 	fbolt.w		ftrapcc_trap		# ordered less than?
17353 ftrapcc_olt_no:
17354 	rts					# do nothing
17355 
17356 #
17357 # unordered or greater or equal:
17358 #
17359 #	NANvZvN
17360 #
17361 ftrapcc_uge:
17362 	fbuge.w		ftrapcc_trap		# unordered or greater than?
17363 ftrapcc_uge_no:
17364 	rts					# do nothing
17365 
17366 #
17367 # ordered less than or equal:
17368 #	     ___
17369 #	Zv(N^NAN)
17370 #
17371 ftrapcc_ole:
17372 	fbole.w		ftrapcc_trap		# ordered greater or less than?
17373 ftrapcc_ole_no:
17374 	rts					# do nothing
17375 
17376 #
17377 # unordered or greater than:
17378 #	     ___
17379 #	NANv(NvZ)
17380 #
17381 ftrapcc_ugt:
17382 	fbugt.w		ftrapcc_trap		# unordered or greater than?
17383 ftrapcc_ugt_no:
17384 	rts					# do nothing
17385 
17386 #
17387 # ordered greater or less than:
17388 #	_____
17389 #	NANvZ
17390 #
17391 ftrapcc_ogl:
17392 	fbogl.w		ftrapcc_trap		# ordered greater or less than?
17393 ftrapcc_ogl_no:
17394 	rts					# do nothing
17395 
17396 #
17397 # unordered or equal:
17398 #
17399 #	NANvZ
17400 #
17401 ftrapcc_ueq:
17402 	fbueq.w		ftrapcc_trap		# unordered or equal?
17403 ftrapcc_ueq_no:
17404 	rts					# do nothing
17405 
17406 #
17407 # ordered:
17408 #	___
17409 #	NAN
17410 #
17411 ftrapcc_or:
17412 	fbor.w		ftrapcc_trap		# ordered?
17413 ftrapcc_or_no:
17414 	rts					# do nothing
17415 
17416 #
17417 # unordered:
17418 #
17419 #	NAN
17420 #
17421 ftrapcc_un:
17422 	fbun.w		ftrapcc_trap		# unordered?
17423 ftrapcc_un_no:
17424 	rts					# do nothing
17425 
17426 #######################################################################
17427 
17428 # the bsun exception bit was not set.
17429 # we will need to jump to the ftrapcc vector. the stack frame
17430 # is the same size as that of the fp unimp instruction. the
17431 # only difference is that the <ea> field should hold the PC
17432 # of the ftrapcc instruction and the vector offset field
17433 # should denote the ftrapcc trap.
17434 ftrapcc_trap:
17435 	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
17436 	rts
17437 
17438 # the emulation routine set bsun and BSUN was enabled. have to
17439 # fix stack and jump to the bsun handler.
17440 # let the caller of this routine shift the stack frame up to
17441 # eliminate the effective address field.
17442 ftrapcc_bsun:
17443 	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
17444 	rts
17445 
17446 #########################################################################
17447 # fscc(): routine to emulate the fscc instruction			#
17448 #									#
17449 # XDEF **************************************************************** #
17450 #	_fscc()								#
17451 #									#
17452 # XREF **************************************************************** #
17453 #	store_dreg_b() - store result to data register file		#
17454 #	dec_areg() - decrement an areg for -(an) mode			#
17455 #	inc_areg() - increment an areg for (an)+ mode			#
17456 #	_dmem_write_byte() - store result to memory			#
17457 #									#
17458 # INPUT ***************************************************************	#
17459 #	none								#
17460 #									#
17461 # OUTPUT ************************************************************** #
17462 #	none								#
17463 #									#
17464 # ALGORITHM ***********************************************************	#
17465 #	This routine checks which conditional predicate is specified by	#
17466 # the stacked fscc instruction opcode and then branches to a routine	#
17467 # for that predicate. The corresponding fbcc instruction is then used	#
17468 # to see whether the condition (specified by the stacked FPSR) is true	#
17469 # or false.								#
17470 #	If a BSUN exception should be indicated, the BSUN and ABSUN	#
17471 # bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
17472 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
17473 # enabled BSUN should not be flagged and the predicate is true, then	#
17474 # the result is stored to the data register file or memory		#
17475 #									#
17476 #########################################################################
17477 
17478 	global		_fscc
17479 _fscc:
17480 	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
17481 
17482 	clr.l		%d1			# clear scratch reg
17483 	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
17484 	ror.l		&0x8,%d1		# rotate to top byte
17485 	fmov.l		%d1,%fpsr		# insert into FPSR
17486 
17487 	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17488 	jmp		(tbl_fscc.b,%pc,%d1.w)	# jump to fscc routine
17489 
17490 tbl_fscc:
17491 	short		fscc_f		-	tbl_fscc	# 00
17492 	short		fscc_eq		-	tbl_fscc	# 01
17493 	short		fscc_ogt	-	tbl_fscc	# 02
17494 	short		fscc_oge	-	tbl_fscc	# 03
17495 	short		fscc_olt	-	tbl_fscc	# 04
17496 	short		fscc_ole	-	tbl_fscc	# 05
17497 	short		fscc_ogl	-	tbl_fscc	# 06
17498 	short		fscc_or		-	tbl_fscc	# 07
17499 	short		fscc_un		-	tbl_fscc	# 08
17500 	short		fscc_ueq	-	tbl_fscc	# 09
17501 	short		fscc_ugt	-	tbl_fscc	# 10
17502 	short		fscc_uge	-	tbl_fscc	# 11
17503 	short		fscc_ult	-	tbl_fscc	# 12
17504 	short		fscc_ule	-	tbl_fscc	# 13
17505 	short		fscc_neq	-	tbl_fscc	# 14
17506 	short		fscc_t		-	tbl_fscc	# 15
17507 	short		fscc_sf		-	tbl_fscc	# 16
17508 	short		fscc_seq	-	tbl_fscc	# 17
17509 	short		fscc_gt		-	tbl_fscc	# 18
17510 	short		fscc_ge		-	tbl_fscc	# 19
17511 	short		fscc_lt		-	tbl_fscc	# 20
17512 	short		fscc_le		-	tbl_fscc	# 21
17513 	short		fscc_gl		-	tbl_fscc	# 22
17514 	short		fscc_gle	-	tbl_fscc	# 23
17515 	short		fscc_ngle	-	tbl_fscc	# 24
17516 	short		fscc_ngl	-	tbl_fscc	# 25
17517 	short		fscc_nle	-	tbl_fscc	# 26
17518 	short		fscc_nlt	-	tbl_fscc	# 27
17519 	short		fscc_nge	-	tbl_fscc	# 28
17520 	short		fscc_ngt	-	tbl_fscc	# 29
17521 	short		fscc_sneq	-	tbl_fscc	# 30
17522 	short		fscc_st		-	tbl_fscc	# 31
17523 
17524 #########################################################################
17525 #									#
17526 # IEEE Nonaware tests							#
17527 #									#
17528 # For the IEEE nonaware tests, we set the result based on the		#
17529 # floating point condition codes. In addition, we check to see		#
17530 # if the NAN bit is set, in which case BSUN and AIOP will be set.	#
17531 #									#
17532 # The cases EQ and NE are shared by the Aware and Nonaware groups	#
17533 # and are incapable of setting the BSUN exception bit.			#
17534 #									#
17535 # Typically, only one of the two possible branch directions could	#
17536 # have the NAN bit set.							#
17537 #									#
17538 #########################################################################
17539 
17540 #
17541 # equal:
17542 #
17543 #	Z
17544 #
17545 fscc_eq:
17546 	fbeq.w		fscc_eq_yes		# equal?
17547 fscc_eq_no:
17548 	clr.b		%d0			# set false
17549 	bra.w		fscc_done		# go finish
17550 fscc_eq_yes:
17551 	st		%d0			# set true
17552 	bra.w		fscc_done		# go finish
17553 
17554 #
17555 # not equal:
17556 #	_
17557 #	Z
17558 #
17559 fscc_neq:
17560 	fbneq.w		fscc_neq_yes		# not equal?
17561 fscc_neq_no:
17562 	clr.b		%d0			# set false
17563 	bra.w		fscc_done		# go finish
17564 fscc_neq_yes:
17565 	st		%d0			# set true
17566 	bra.w		fscc_done		# go finish
17567 
17568 #
17569 # greater than:
17570 #	_______
17571 #	NANvZvN
17572 #
17573 fscc_gt:
17574 	fbgt.w		fscc_gt_yes		# greater than?
17575 fscc_gt_no:
17576 	clr.b		%d0			# set false
17577 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17578 	beq.w		fscc_done		# no;go finish
17579 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17580 	bra.w		fscc_chk_bsun		# go finish
17581 fscc_gt_yes:
17582 	st		%d0			# set true
17583 	bra.w		fscc_done		# go finish
17584 
17585 #
17586 # not greater than:
17587 #
17588 #	NANvZvN
17589 #
17590 fscc_ngt:
17591 	fbngt.w		fscc_ngt_yes		# not greater than?
17592 fscc_ngt_no:
17593 	clr.b		%d0			# set false
17594 	bra.w		fscc_done		# go finish
17595 fscc_ngt_yes:
17596 	st		%d0			# set true
17597 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17598 	beq.w		fscc_done		# no;go finish
17599 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17600 	bra.w		fscc_chk_bsun		# go finish
17601 
17602 #
17603 # greater than or equal:
17604 #	   _____
17605 #	Zv(NANvN)
17606 #
17607 fscc_ge:
17608 	fbge.w		fscc_ge_yes		# greater than or equal?
17609 fscc_ge_no:
17610 	clr.b		%d0			# set false
17611 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17612 	beq.w		fscc_done		# no;go finish
17613 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17614 	bra.w		fscc_chk_bsun		# go finish
17615 fscc_ge_yes:
17616 	st		%d0			# set true
17617 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17618 	beq.w		fscc_done		# no;go finish
17619 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17620 	bra.w		fscc_chk_bsun		# go finish
17621 
17622 #
17623 # not (greater than or equal):
17624 #	       _
17625 #	NANv(N^Z)
17626 #
17627 fscc_nge:
17628 	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
17629 fscc_nge_no:
17630 	clr.b		%d0			# set false
17631 	bra.w		fscc_done		# go finish
17632 fscc_nge_yes:
17633 	st		%d0			# set true
17634 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17635 	beq.w		fscc_done		# no;go finish
17636 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17637 	bra.w		fscc_chk_bsun		# go finish
17638 
17639 #
17640 # less than:
17641 #	   _____
17642 #	N^(NANvZ)
17643 #
17644 fscc_lt:
17645 	fblt.w		fscc_lt_yes		# less than?
17646 fscc_lt_no:
17647 	clr.b		%d0			# set false
17648 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17649 	beq.w		fscc_done		# no;go finish
17650 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17651 	bra.w		fscc_chk_bsun		# go finish
17652 fscc_lt_yes:
17653 	st		%d0			# set true
17654 	bra.w		fscc_done		# go finish
17655 
17656 #
17657 # not less than:
17658 #	       _
17659 #	NANv(ZvN)
17660 #
17661 fscc_nlt:
17662 	fbnlt.w		fscc_nlt_yes		# not less than?
17663 fscc_nlt_no:
17664 	clr.b		%d0			# set false
17665 	bra.w		fscc_done		# go finish
17666 fscc_nlt_yes:
17667 	st		%d0			# set true
17668 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17669 	beq.w		fscc_done		# no;go finish
17670 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17671 	bra.w		fscc_chk_bsun		# go finish
17672 
17673 #
17674 # less than or equal:
17675 #	     ___
17676 #	Zv(N^NAN)
17677 #
17678 fscc_le:
17679 	fble.w		fscc_le_yes		# less than or equal?
17680 fscc_le_no:
17681 	clr.b		%d0			# set false
17682 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17683 	beq.w		fscc_done		# no;go finish
17684 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17685 	bra.w		fscc_chk_bsun		# go finish
17686 fscc_le_yes:
17687 	st		%d0			# set true
17688 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17689 	beq.w		fscc_done		# no;go finish
17690 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17691 	bra.w		fscc_chk_bsun		# go finish
17692 
17693 #
17694 # not (less than or equal):
17695 #	     ___
17696 #	NANv(NvZ)
17697 #
17698 fscc_nle:
17699 	fbnle.w		fscc_nle_yes		# not (less than or equal)?
17700 fscc_nle_no:
17701 	clr.b		%d0			# set false
17702 	bra.w		fscc_done		# go finish
17703 fscc_nle_yes:
17704 	st		%d0			# set true
17705 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17706 	beq.w		fscc_done		# no;go finish
17707 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17708 	bra.w		fscc_chk_bsun		# go finish
17709 
17710 #
17711 # greater or less than:
17712 #	_____
17713 #	NANvZ
17714 #
17715 fscc_gl:
17716 	fbgl.w		fscc_gl_yes		# greater or less than?
17717 fscc_gl_no:
17718 	clr.b		%d0			# set false
17719 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17720 	beq.w		fscc_done		# no;go finish
17721 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17722 	bra.w		fscc_chk_bsun		# go finish
17723 fscc_gl_yes:
17724 	st		%d0			# set true
17725 	bra.w		fscc_done		# go finish
17726 
17727 #
17728 # not (greater or less than):
17729 #
17730 #	NANvZ
17731 #
17732 fscc_ngl:
17733 	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
17734 fscc_ngl_no:
17735 	clr.b		%d0			# set false
17736 	bra.w		fscc_done		# go finish
17737 fscc_ngl_yes:
17738 	st		%d0			# set true
17739 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17740 	beq.w		fscc_done		# no;go finish
17741 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17742 	bra.w		fscc_chk_bsun		# go finish
17743 
17744 #
17745 # greater, less, or equal:
17746 #	___
17747 #	NAN
17748 #
17749 fscc_gle:
17750 	fbgle.w		fscc_gle_yes		# greater, less, or equal?
17751 fscc_gle_no:
17752 	clr.b		%d0			# set false
17753 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17754 	bra.w		fscc_chk_bsun		# go finish
17755 fscc_gle_yes:
17756 	st		%d0			# set true
17757 	bra.w		fscc_done		# go finish
17758 
17759 #
17760 # not (greater, less, or equal):
17761 #
17762 #	NAN
17763 #
17764 fscc_ngle:
17765 	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
17766 fscc_ngle_no:
17767 	clr.b		%d0			# set false
17768 	bra.w		fscc_done		# go finish
17769 fscc_ngle_yes:
17770 	st		%d0			# set true
17771 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17772 	bra.w		fscc_chk_bsun		# go finish
17773 
17774 #########################################################################
17775 #									#
17776 # Miscellaneous tests							#
17777 #									#
17778 # For the IEEE aware tests, we only have to set the result based on the	#
17779 # floating point condition codes. The BSUN exception will not be	#
17780 # set for any of these tests.						#
17781 #									#
17782 #########################################################################
17783 
17784 #
17785 # false:
17786 #
17787 #	False
17788 #
17789 fscc_f:
17790 	clr.b		%d0			# set false
17791 	bra.w		fscc_done		# go finish
17792 
17793 #
17794 # true:
17795 #
17796 #	True
17797 #
17798 fscc_t:
17799 	st		%d0			# set true
17800 	bra.w		fscc_done		# go finish
17801 
17802 #
17803 # signalling false:
17804 #
17805 #	False
17806 #
17807 fscc_sf:
17808 	clr.b		%d0			# set false
17809 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17810 	beq.w		fscc_done		# no;go finish
17811 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17812 	bra.w		fscc_chk_bsun		# go finish
17813 
17814 #
17815 # signalling true:
17816 #
17817 #	True
17818 #
17819 fscc_st:
17820 	st		%d0			# set false
17821 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17822 	beq.w		fscc_done		# no;go finish
17823 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17824 	bra.w		fscc_chk_bsun		# go finish
17825 
17826 #
17827 # signalling equal:
17828 #
17829 #	Z
17830 #
17831 fscc_seq:
17832 	fbseq.w		fscc_seq_yes		# signalling equal?
17833 fscc_seq_no:
17834 	clr.b		%d0			# set false
17835 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17836 	beq.w		fscc_done		# no;go finish
17837 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17838 	bra.w		fscc_chk_bsun		# go finish
17839 fscc_seq_yes:
17840 	st		%d0			# set true
17841 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17842 	beq.w		fscc_done		# no;go finish
17843 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17844 	bra.w		fscc_chk_bsun		# go finish
17845 
17846 #
17847 # signalling not equal:
17848 #	_
17849 #	Z
17850 #
17851 fscc_sneq:
17852 	fbsneq.w	fscc_sneq_yes		# signalling equal?
17853 fscc_sneq_no:
17854 	clr.b		%d0			# set false
17855 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17856 	beq.w		fscc_done		# no;go finish
17857 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17858 	bra.w		fscc_chk_bsun		# go finish
17859 fscc_sneq_yes:
17860 	st		%d0			# set true
17861 	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17862 	beq.w		fscc_done		# no;go finish
17863 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17864 	bra.w		fscc_chk_bsun		# go finish
17865 
17866 #########################################################################
17867 #									#
17868 # IEEE Aware tests							#
17869 #									#
17870 # For the IEEE aware tests, we only have to set the result based on the	#
17871 # floating point condition codes. The BSUN exception will not be	#
17872 # set for any of these tests.						#
17873 #									#
17874 #########################################################################
17875 
17876 #
17877 # ordered greater than:
17878 #	_______
17879 #	NANvZvN
17880 #
17881 fscc_ogt:
17882 	fbogt.w		fscc_ogt_yes		# ordered greater than?
17883 fscc_ogt_no:
17884 	clr.b		%d0			# set false
17885 	bra.w		fscc_done		# go finish
17886 fscc_ogt_yes:
17887 	st		%d0			# set true
17888 	bra.w		fscc_done		# go finish
17889 
17890 #
17891 # unordered or less or equal:
17892 #	_______
17893 #	NANvZvN
17894 #
17895 fscc_ule:
17896 	fbule.w		fscc_ule_yes		# unordered or less or equal?
17897 fscc_ule_no:
17898 	clr.b		%d0			# set false
17899 	bra.w		fscc_done		# go finish
17900 fscc_ule_yes:
17901 	st		%d0			# set true
17902 	bra.w		fscc_done		# go finish
17903 
17904 #
17905 # ordered greater than or equal:
17906 #	   _____
17907 #	Zv(NANvN)
17908 #
17909 fscc_oge:
17910 	fboge.w		fscc_oge_yes		# ordered greater than or equal?
17911 fscc_oge_no:
17912 	clr.b		%d0			# set false
17913 	bra.w		fscc_done		# go finish
17914 fscc_oge_yes:
17915 	st		%d0			# set true
17916 	bra.w		fscc_done		# go finish
17917 
17918 #
17919 # unordered or less than:
17920 #	       _
17921 #	NANv(N^Z)
17922 #
17923 fscc_ult:
17924 	fbult.w		fscc_ult_yes		# unordered or less than?
17925 fscc_ult_no:
17926 	clr.b		%d0			# set false
17927 	bra.w		fscc_done		# go finish
17928 fscc_ult_yes:
17929 	st		%d0			# set true
17930 	bra.w		fscc_done		# go finish
17931 
17932 #
17933 # ordered less than:
17934 #	   _____
17935 #	N^(NANvZ)
17936 #
17937 fscc_olt:
17938 	fbolt.w		fscc_olt_yes		# ordered less than?
17939 fscc_olt_no:
17940 	clr.b		%d0			# set false
17941 	bra.w		fscc_done		# go finish
17942 fscc_olt_yes:
17943 	st		%d0			# set true
17944 	bra.w		fscc_done		# go finish
17945 
17946 #
17947 # unordered or greater or equal:
17948 #
17949 #	NANvZvN
17950 #
17951 fscc_uge:
17952 	fbuge.w		fscc_uge_yes		# unordered or greater than?
17953 fscc_uge_no:
17954 	clr.b		%d0			# set false
17955 	bra.w		fscc_done		# go finish
17956 fscc_uge_yes:
17957 	st		%d0			# set true
17958 	bra.w		fscc_done		# go finish
17959 
17960 #
17961 # ordered less than or equal:
17962 #	     ___
17963 #	Zv(N^NAN)
17964 #
17965 fscc_ole:
17966 	fbole.w		fscc_ole_yes		# ordered greater or less than?
17967 fscc_ole_no:
17968 	clr.b		%d0			# set false
17969 	bra.w		fscc_done		# go finish
17970 fscc_ole_yes:
17971 	st		%d0			# set true
17972 	bra.w		fscc_done		# go finish
17973 
17974 #
17975 # unordered or greater than:
17976 #	     ___
17977 #	NANv(NvZ)
17978 #
17979 fscc_ugt:
17980 	fbugt.w		fscc_ugt_yes		# unordered or greater than?
17981 fscc_ugt_no:
17982 	clr.b		%d0			# set false
17983 	bra.w		fscc_done		# go finish
17984 fscc_ugt_yes:
17985 	st		%d0			# set true
17986 	bra.w		fscc_done		# go finish
17987 
17988 #
17989 # ordered greater or less than:
17990 #	_____
17991 #	NANvZ
17992 #
17993 fscc_ogl:
17994 	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
17995 fscc_ogl_no:
17996 	clr.b		%d0			# set false
17997 	bra.w		fscc_done		# go finish
17998 fscc_ogl_yes:
17999 	st		%d0			# set true
18000 	bra.w		fscc_done		# go finish
18001 
18002 #
18003 # unordered or equal:
18004 #
18005 #	NANvZ
18006 #
18007 fscc_ueq:
18008 	fbueq.w		fscc_ueq_yes		# unordered or equal?
18009 fscc_ueq_no:
18010 	clr.b		%d0			# set false
18011 	bra.w		fscc_done		# go finish
18012 fscc_ueq_yes:
18013 	st		%d0			# set true
18014 	bra.w		fscc_done		# go finish
18015 
18016 #
18017 # ordered:
18018 #	___
18019 #	NAN
18020 #
18021 fscc_or:
18022 	fbor.w		fscc_or_yes		# ordered?
18023 fscc_or_no:
18024 	clr.b		%d0			# set false
18025 	bra.w		fscc_done		# go finish
18026 fscc_or_yes:
18027 	st		%d0			# set true
18028 	bra.w		fscc_done		# go finish
18029 
18030 #
18031 # unordered:
18032 #
18033 #	NAN
18034 #
18035 fscc_un:
18036 	fbun.w		fscc_un_yes		# unordered?
18037 fscc_un_no:
18038 	clr.b		%d0			# set false
18039 	bra.w		fscc_done		# go finish
18040 fscc_un_yes:
18041 	st		%d0			# set true
18042 	bra.w		fscc_done		# go finish
18043 
18044 #######################################################################
18045 
18046 #
18047 # the bsun exception bit was set. now, check to see is BSUN
18048 # is enabled. if so, don't store result and correct stack frame
18049 # for a bsun exception.
18050 #
18051 fscc_chk_bsun:
18052 	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18053 	bne.w		fscc_bsun
18054 
18055 #
18056 # the bsun exception bit was not set.
18057 # the result has been selected.
18058 # now, check to see if the result is to be stored in the data register
18059 # file or in memory.
18060 #
18061 fscc_done:
18062 	mov.l		%d0,%a0			# save result for a moment
18063 
18064 	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
18065 	mov.l		%d1,%d0			# make a copy
18066 	andi.b		&0x38,%d1		# extract src mode
18067 
18068 	bne.b		fscc_mem_op		# it's a memory operation
18069 
18070 	mov.l		%d0,%d1
18071 	andi.w		&0x7,%d1		# pass index in d1
18072 	mov.l		%a0,%d0			# pass result in d0
18073 	bsr.l		store_dreg_b		# save result in regfile
18074 	rts
18075 
18076 #
18077 # the stacked <ea> is correct with the exception of:
18078 #	-> Dn : <ea> is garbage
18079 #
18080 # if the addressing mode is post-increment or pre-decrement,
18081 # then the address registers have not been updated.
18082 #
18083 fscc_mem_op:
18084 	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
18085 	beq.b		fscc_mem_inc		# yes
18086 	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
18087 	beq.b		fscc_mem_dec		# yes
18088 
18089 	mov.l		%a0,%d0			# pass result in d0
18090 	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18091 	bsr.l		_dmem_write_byte	# write result byte
18092 
18093 	tst.l		%d1			# did dstore fail?
18094 	bne.w		fscc_err		# yes
18095 
18096 	rts
18097 
18098 # addressing mode is post-increment. write the result byte. if the write
18099 # fails then don't update the address register. if write passes then
18100 # call inc_areg() to update the address register.
18101 fscc_mem_inc:
18102 	mov.l		%a0,%d0			# pass result in d0
18103 	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18104 	bsr.l		_dmem_write_byte	# write result byte
18105 
18106 	tst.l		%d1			# did dstore fail?
18107 	bne.w		fscc_err		# yes
18108 
18109 	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18110 	andi.w		&0x7,%d1		# pass index in d1
18111 	movq.l		&0x1,%d0		# pass amt to inc by
18112 	bsr.l		inc_areg		# increment address register
18113 
18114 	rts
18115 
18116 # addressing mode is pre-decrement. write the result byte. if the write
18117 # fails then don't update the address register. if the write passes then
18118 # call dec_areg() to update the address register.
18119 fscc_mem_dec:
18120 	mov.l		%a0,%d0			# pass result in d0
18121 	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18122 	bsr.l		_dmem_write_byte	# write result byte
18123 
18124 	tst.l		%d1			# did dstore fail?
18125 	bne.w		fscc_err		# yes
18126 
18127 	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18128 	andi.w		&0x7,%d1		# pass index in d1
18129 	movq.l		&0x1,%d0		# pass amt to dec by
18130 	bsr.l		dec_areg		# decrement address register
18131 
18132 	rts
18133 
18134 # the emulation routine set bsun and BSUN was enabled. have to
18135 # fix stack and jump to the bsun handler.
18136 # let the caller of this routine shift the stack frame up to
18137 # eliminate the effective address field.
18138 fscc_bsun:
18139 	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
18140 	rts
18141 
18142 # the byte write to memory has failed. pass the failing effective address
18143 # and a FSLW to funimp_dacc().
18144 fscc_err:
18145 	mov.w		&0x00a1,EXC_VOFF(%a6)
18146 	bra.l		facc_finish
18147 
18148 #########################################################################
18149 # XDEF ****************************************************************	#
18150 #	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
18151 #									#
18152 # XREF ****************************************************************	#
18153 #	fetch_dreg() - fetch data register				#
18154 #	{i,d,}mem_read() - fetch data from memory			#
18155 #	_mem_write() - write data to memory				#
18156 #	iea_iacc() - instruction memory access error occurred		#
18157 #	iea_dacc() - data memory access error occurred			#
18158 #	restore() - restore An index regs if access error occurred	#
18159 #									#
18160 # INPUT ***************************************************************	#
18161 #	None								#
18162 #									#
18163 # OUTPUT **************************************************************	#
18164 #	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
18165 #		d0 = size of dump					#
18166 #		d1 = Dn							#
18167 #	Else if instruction access error,				#
18168 #		d0 = FSLW						#
18169 #	Else if data access error,					#
18170 #		d0 = FSLW						#
18171 #		a0 = address of fault					#
18172 #	Else								#
18173 #		none.							#
18174 #									#
18175 # ALGORITHM ***********************************************************	#
18176 #	The effective address must be calculated since this is entered	#
18177 # from an "Unimplemented Effective Address" exception handler. So, we	#
18178 # have our own fcalc_ea() routine here. If an access error is flagged	#
18179 # by a _{i,d,}mem_read() call, we must exit through the special		#
18180 # handler.								#
18181 #	The data register is determined and its value loaded to get the	#
18182 # string of FP registers affected. This value is used as an index into	#
18183 # a lookup table such that we can determine the number of bytes		#
18184 # involved.								#
18185 #	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
18186 # to read in all FP values. Again, _mem_read() may fail and require a	#
18187 # special exit.								#
18188 #	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
18189 # to write all FP values. _mem_write() may also fail.			#
18190 #	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
18191 # then we return the size of the dump and the string to the caller	#
18192 # so that the move can occur outside of this routine. This special	#
18193 # case is required so that moves to the system stack are handled	#
18194 # correctly.								#
18195 #									#
18196 # DYNAMIC:								#
18197 #	fmovm.x	dn, <ea>						#
18198 #	fmovm.x	<ea>, dn						#
18199 #									#
18200 #	      <WORD 1>		      <WORD2>				#
18201 #	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
18202 #									#
18203 #	& = (0): predecrement addressing mode				#
18204 #	    (1): postincrement or control addressing mode		#
18205 #	@ = (0): move listed regs from memory to the FPU		#
18206 #	    (1): move listed regs from the FPU to memory		#
18207 #	$$$    : index of data register holding reg select mask		#
18208 #									#
18209 # NOTES:								#
18210 #	If the data register holds a zero, then the			#
18211 #	instruction is a nop.						#
18212 #									#
18213 #########################################################################
18214 
18215 	global		fmovm_dynamic
18216 fmovm_dynamic:
18217 
18218 # extract the data register in which the bit string resides...
18219 	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
18220 	andi.w		&0x70,%d1		# extract reg bits
18221 	lsr.b		&0x4,%d1		# shift into lo bits
18222 
18223 # fetch the bit string into d0...
18224 	bsr.l		fetch_dreg		# fetch reg string
18225 
18226 	andi.l		&0x000000ff,%d0		# keep only lo byte
18227 
18228 	mov.l		%d0,-(%sp)		# save strg
18229 	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
18230 	mov.l		%d0,-(%sp)		# save size
18231 	bsr.l		fmovm_calc_ea		# calculate <ea>
18232 	mov.l		(%sp)+,%d0		# restore size
18233 	mov.l		(%sp)+,%d1		# restore strg
18234 
18235 # if the bit string is a zero, then the operation is a no-op
18236 # but, make sure that we've calculated ea and advanced the opword pointer
18237 	beq.w		fmovm_data_done
18238 
18239 # separate move ins from move outs...
18240 	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
18241 	beq.w		fmovm_data_in		# it's a move out
18242 
18243 #############
18244 # MOVE OUT: #
18245 #############
18246 fmovm_data_out:
18247 	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
18248 	bne.w		fmovm_out_ctrl		# control
18249 
18250 ############################
18251 fmovm_out_predec:
18252 # for predecrement mode, the bit string is the opposite of both control
18253 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18254 # here, we convert it to be just like the others...
18255 	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18256 
18257 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
18258 	beq.b		fmovm_out_ctrl		# user
18259 
18260 fmovm_out_predec_s:
18261 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18262 	bne.b		fmovm_out_ctrl
18263 
18264 # the operation was unfortunately an: fmovm.x dn,-(sp)
18265 # called from supervisor mode.
18266 # we're also passing "size" and "strg" back to the calling routine
18267 	rts
18268 
18269 ############################
18270 fmovm_out_ctrl:
18271 	mov.l		%a0,%a1			# move <ea> to a1
18272 
18273 	sub.l		%d0,%sp			# subtract size of dump
18274 	lea		(%sp),%a0
18275 
18276 	tst.b		%d1			# should FP0 be moved?
18277 	bpl.b		fmovm_out_ctrl_fp1	# no
18278 
18279 	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
18280 	mov.l		0x4+EXC_FP0(%a6),(%a0)+
18281 	mov.l		0x8+EXC_FP0(%a6),(%a0)+
18282 
18283 fmovm_out_ctrl_fp1:
18284 	lsl.b		&0x1,%d1		# should FP1 be moved?
18285 	bpl.b		fmovm_out_ctrl_fp2	# no
18286 
18287 	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
18288 	mov.l		0x4+EXC_FP1(%a6),(%a0)+
18289 	mov.l		0x8+EXC_FP1(%a6),(%a0)+
18290 
18291 fmovm_out_ctrl_fp2:
18292 	lsl.b		&0x1,%d1		# should FP2 be moved?
18293 	bpl.b		fmovm_out_ctrl_fp3	# no
18294 
18295 	fmovm.x		&0x20,(%a0)		# yes
18296 	add.l		&0xc,%a0
18297 
18298 fmovm_out_ctrl_fp3:
18299 	lsl.b		&0x1,%d1		# should FP3 be moved?
18300 	bpl.b		fmovm_out_ctrl_fp4	# no
18301 
18302 	fmovm.x		&0x10,(%a0)		# yes
18303 	add.l		&0xc,%a0
18304 
18305 fmovm_out_ctrl_fp4:
18306 	lsl.b		&0x1,%d1		# should FP4 be moved?
18307 	bpl.b		fmovm_out_ctrl_fp5	# no
18308 
18309 	fmovm.x		&0x08,(%a0)		# yes
18310 	add.l		&0xc,%a0
18311 
18312 fmovm_out_ctrl_fp5:
18313 	lsl.b		&0x1,%d1		# should FP5 be moved?
18314 	bpl.b		fmovm_out_ctrl_fp6	# no
18315 
18316 	fmovm.x		&0x04,(%a0)		# yes
18317 	add.l		&0xc,%a0
18318 
18319 fmovm_out_ctrl_fp6:
18320 	lsl.b		&0x1,%d1		# should FP6 be moved?
18321 	bpl.b		fmovm_out_ctrl_fp7	# no
18322 
18323 	fmovm.x		&0x02,(%a0)		# yes
18324 	add.l		&0xc,%a0
18325 
18326 fmovm_out_ctrl_fp7:
18327 	lsl.b		&0x1,%d1		# should FP7 be moved?
18328 	bpl.b		fmovm_out_ctrl_done	# no
18329 
18330 	fmovm.x		&0x01,(%a0)		# yes
18331 	add.l		&0xc,%a0
18332 
18333 fmovm_out_ctrl_done:
18334 	mov.l		%a1,L_SCR1(%a6)
18335 
18336 	lea		(%sp),%a0		# pass: supervisor src
18337 	mov.l		%d0,-(%sp)		# save size
18338 	bsr.l		_dmem_write		# copy data to user mem
18339 
18340 	mov.l		(%sp)+,%d0
18341 	add.l		%d0,%sp			# clear fpreg data from stack
18342 
18343 	tst.l		%d1			# did dstore err?
18344 	bne.w		fmovm_out_err		# yes
18345 
18346 	rts
18347 
18348 ############
18349 # MOVE IN: #
18350 ############
18351 fmovm_data_in:
18352 	mov.l		%a0,L_SCR1(%a6)
18353 
18354 	sub.l		%d0,%sp			# make room for fpregs
18355 	lea		(%sp),%a1
18356 
18357 	mov.l		%d1,-(%sp)		# save bit string for later
18358 	mov.l		%d0,-(%sp)		# save # of bytes
18359 
18360 	bsr.l		_dmem_read		# copy data from user mem
18361 
18362 	mov.l		(%sp)+,%d0		# retrieve # of bytes
18363 
18364 	tst.l		%d1			# did dfetch fail?
18365 	bne.w		fmovm_in_err		# yes
18366 
18367 	mov.l		(%sp)+,%d1		# load bit string
18368 
18369 	lea		(%sp),%a0		# addr of stack
18370 
18371 	tst.b		%d1			# should FP0 be moved?
18372 	bpl.b		fmovm_data_in_fp1	# no
18373 
18374 	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
18375 	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
18376 	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
18377 
18378 fmovm_data_in_fp1:
18379 	lsl.b		&0x1,%d1		# should FP1 be moved?
18380 	bpl.b		fmovm_data_in_fp2	# no
18381 
18382 	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
18383 	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
18384 	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
18385 
18386 fmovm_data_in_fp2:
18387 	lsl.b		&0x1,%d1		# should FP2 be moved?
18388 	bpl.b		fmovm_data_in_fp3	# no
18389 
18390 	fmovm.x		(%a0)+,&0x20		# yes
18391 
18392 fmovm_data_in_fp3:
18393 	lsl.b		&0x1,%d1		# should FP3 be moved?
18394 	bpl.b		fmovm_data_in_fp4	# no
18395 
18396 	fmovm.x		(%a0)+,&0x10		# yes
18397 
18398 fmovm_data_in_fp4:
18399 	lsl.b		&0x1,%d1		# should FP4 be moved?
18400 	bpl.b		fmovm_data_in_fp5	# no
18401 
18402 	fmovm.x		(%a0)+,&0x08		# yes
18403 
18404 fmovm_data_in_fp5:
18405 	lsl.b		&0x1,%d1		# should FP5 be moved?
18406 	bpl.b		fmovm_data_in_fp6	# no
18407 
18408 	fmovm.x		(%a0)+,&0x04		# yes
18409 
18410 fmovm_data_in_fp6:
18411 	lsl.b		&0x1,%d1		# should FP6 be moved?
18412 	bpl.b		fmovm_data_in_fp7	# no
18413 
18414 	fmovm.x		(%a0)+,&0x02		# yes
18415 
18416 fmovm_data_in_fp7:
18417 	lsl.b		&0x1,%d1		# should FP7 be moved?
18418 	bpl.b		fmovm_data_in_done	# no
18419 
18420 	fmovm.x		(%a0)+,&0x01		# yes
18421 
18422 fmovm_data_in_done:
18423 	add.l		%d0,%sp			# remove fpregs from stack
18424 	rts
18425 
18426 #####################################
18427 
18428 fmovm_data_done:
18429 	rts
18430 
18431 ##############################################################################
18432 
18433 #
18434 # table indexed by the operation's bit string that gives the number
18435 # of bytes that will be moved.
18436 #
18437 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18438 #
18439 tbl_fmovm_size:
18440 	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18441 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18442 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18443 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18444 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18445 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18446 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18447 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18448 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18449 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18450 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18451 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18452 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18454 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18455 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18456 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18457 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18458 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18460 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18461 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18462 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18464 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18465 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18466 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18467 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18468 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18470 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18471 	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18472 
18473 #
18474 # table to convert a pre-decrement bit string into a post-increment
18475 # or control bit string.
18476 # ex:	0x00	==>	0x00
18477 #	0x01	==>	0x80
18478 #	0x02	==>	0x40
18479 #		.
18480 #		.
18481 #	0xfd	==>	0xbf
18482 #	0xfe	==>	0x7f
18483 #	0xff	==>	0xff
18484 #
18485 tbl_fmovm_convert:
18486 	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18487 	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18488 	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18489 	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18490 	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18491 	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18492 	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18493 	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18494 	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18495 	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18496 	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18497 	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18498 	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18499 	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18500 	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18501 	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18502 	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18503 	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18504 	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18505 	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18506 	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18507 	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18508 	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18509 	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18510 	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18511 	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18512 	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18513 	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18514 	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18515 	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18516 	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18517 	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18518 
18519 	global		fmovm_calc_ea
18520 ###############################################
18521 # _fmovm_calc_ea: calculate effective address #
18522 ###############################################
18523 fmovm_calc_ea:
18524 	mov.l		%d0,%a0			# move # bytes to a0
18525 
18526 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
18527 # easily changed if they were inputs passed in registers.
18528 	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
18529 	mov.w		%d0,%d1			# make a copy
18530 
18531 	andi.w		&0x3f,%d0		# extract mode field
18532 	andi.l		&0x7,%d1		# extract reg  field
18533 
18534 # jump to the corresponding function for each {MODE,REG} pair.
18535 	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18536 	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18537 
18538 	swbeg		&64
18539 tbl_fea_mode:
18540 	short		tbl_fea_mode	-	tbl_fea_mode
18541 	short		tbl_fea_mode	-	tbl_fea_mode
18542 	short		tbl_fea_mode	-	tbl_fea_mode
18543 	short		tbl_fea_mode	-	tbl_fea_mode
18544 	short		tbl_fea_mode	-	tbl_fea_mode
18545 	short		tbl_fea_mode	-	tbl_fea_mode
18546 	short		tbl_fea_mode	-	tbl_fea_mode
18547 	short		tbl_fea_mode	-	tbl_fea_mode
18548 
18549 	short		tbl_fea_mode	-	tbl_fea_mode
18550 	short		tbl_fea_mode	-	tbl_fea_mode
18551 	short		tbl_fea_mode	-	tbl_fea_mode
18552 	short		tbl_fea_mode	-	tbl_fea_mode
18553 	short		tbl_fea_mode	-	tbl_fea_mode
18554 	short		tbl_fea_mode	-	tbl_fea_mode
18555 	short		tbl_fea_mode	-	tbl_fea_mode
18556 	short		tbl_fea_mode	-	tbl_fea_mode
18557 
18558 	short		faddr_ind_a0	-	tbl_fea_mode
18559 	short		faddr_ind_a1	-	tbl_fea_mode
18560 	short		faddr_ind_a2	-	tbl_fea_mode
18561 	short		faddr_ind_a3	-	tbl_fea_mode
18562 	short		faddr_ind_a4	-	tbl_fea_mode
18563 	short		faddr_ind_a5	-	tbl_fea_mode
18564 	short		faddr_ind_a6	-	tbl_fea_mode
18565 	short		faddr_ind_a7	-	tbl_fea_mode
18566 
18567 	short		faddr_ind_p_a0	-	tbl_fea_mode
18568 	short		faddr_ind_p_a1	-	tbl_fea_mode
18569 	short		faddr_ind_p_a2	-	tbl_fea_mode
18570 	short		faddr_ind_p_a3	-	tbl_fea_mode
18571 	short		faddr_ind_p_a4	-	tbl_fea_mode
18572 	short		faddr_ind_p_a5	-	tbl_fea_mode
18573 	short		faddr_ind_p_a6	-	tbl_fea_mode
18574 	short		faddr_ind_p_a7	-	tbl_fea_mode
18575 
18576 	short		faddr_ind_m_a0	-	tbl_fea_mode
18577 	short		faddr_ind_m_a1	-	tbl_fea_mode
18578 	short		faddr_ind_m_a2	-	tbl_fea_mode
18579 	short		faddr_ind_m_a3	-	tbl_fea_mode
18580 	short		faddr_ind_m_a4	-	tbl_fea_mode
18581 	short		faddr_ind_m_a5	-	tbl_fea_mode
18582 	short		faddr_ind_m_a6	-	tbl_fea_mode
18583 	short		faddr_ind_m_a7	-	tbl_fea_mode
18584 
18585 	short		faddr_ind_disp_a0	-	tbl_fea_mode
18586 	short		faddr_ind_disp_a1	-	tbl_fea_mode
18587 	short		faddr_ind_disp_a2	-	tbl_fea_mode
18588 	short		faddr_ind_disp_a3	-	tbl_fea_mode
18589 	short		faddr_ind_disp_a4	-	tbl_fea_mode
18590 	short		faddr_ind_disp_a5	-	tbl_fea_mode
18591 	short		faddr_ind_disp_a6	-	tbl_fea_mode
18592 	short		faddr_ind_disp_a7	-	tbl_fea_mode
18593 
18594 	short		faddr_ind_ext	-	tbl_fea_mode
18595 	short		faddr_ind_ext	-	tbl_fea_mode
18596 	short		faddr_ind_ext	-	tbl_fea_mode
18597 	short		faddr_ind_ext	-	tbl_fea_mode
18598 	short		faddr_ind_ext	-	tbl_fea_mode
18599 	short		faddr_ind_ext	-	tbl_fea_mode
18600 	short		faddr_ind_ext	-	tbl_fea_mode
18601 	short		faddr_ind_ext	-	tbl_fea_mode
18602 
18603 	short		fabs_short	-	tbl_fea_mode
18604 	short		fabs_long	-	tbl_fea_mode
18605 	short		fpc_ind		-	tbl_fea_mode
18606 	short		fpc_ind_ext	-	tbl_fea_mode
18607 	short		tbl_fea_mode	-	tbl_fea_mode
18608 	short		tbl_fea_mode	-	tbl_fea_mode
18609 	short		tbl_fea_mode	-	tbl_fea_mode
18610 	short		tbl_fea_mode	-	tbl_fea_mode
18611 
18612 ###################################
18613 # Address register indirect: (An) #
18614 ###################################
18615 faddr_ind_a0:
18616 	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
18617 	rts
18618 
18619 faddr_ind_a1:
18620 	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
18621 	rts
18622 
18623 faddr_ind_a2:
18624 	mov.l		%a2,%a0			# Get current a2
18625 	rts
18626 
18627 faddr_ind_a3:
18628 	mov.l		%a3,%a0			# Get current a3
18629 	rts
18630 
18631 faddr_ind_a4:
18632 	mov.l		%a4,%a0			# Get current a4
18633 	rts
18634 
18635 faddr_ind_a5:
18636 	mov.l		%a5,%a0			# Get current a5
18637 	rts
18638 
18639 faddr_ind_a6:
18640 	mov.l		(%a6),%a0		# Get current a6
18641 	rts
18642 
18643 faddr_ind_a7:
18644 	mov.l		EXC_A7(%a6),%a0		# Get current a7
18645 	rts
18646 
18647 #####################################################
18648 # Address register indirect w/ postincrement: (An)+ #
18649 #####################################################
18650 faddr_ind_p_a0:
18651 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18652 	mov.l		%d0,%d1
18653 	add.l		%a0,%d1			# Increment
18654 	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
18655 	mov.l		%d0,%a0
18656 	rts
18657 
18658 faddr_ind_p_a1:
18659 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18660 	mov.l		%d0,%d1
18661 	add.l		%a0,%d1			# Increment
18662 	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
18663 	mov.l		%d0,%a0
18664 	rts
18665 
18666 faddr_ind_p_a2:
18667 	mov.l		%a2,%d0			# Get current a2
18668 	mov.l		%d0,%d1
18669 	add.l		%a0,%d1			# Increment
18670 	mov.l		%d1,%a2			# Save incr value
18671 	mov.l		%d0,%a0
18672 	rts
18673 
18674 faddr_ind_p_a3:
18675 	mov.l		%a3,%d0			# Get current a3
18676 	mov.l		%d0,%d1
18677 	add.l		%a0,%d1			# Increment
18678 	mov.l		%d1,%a3			# Save incr value
18679 	mov.l		%d0,%a0
18680 	rts
18681 
18682 faddr_ind_p_a4:
18683 	mov.l		%a4,%d0			# Get current a4
18684 	mov.l		%d0,%d1
18685 	add.l		%a0,%d1			# Increment
18686 	mov.l		%d1,%a4			# Save incr value
18687 	mov.l		%d0,%a0
18688 	rts
18689 
18690 faddr_ind_p_a5:
18691 	mov.l		%a5,%d0			# Get current a5
18692 	mov.l		%d0,%d1
18693 	add.l		%a0,%d1			# Increment
18694 	mov.l		%d1,%a5			# Save incr value
18695 	mov.l		%d0,%a0
18696 	rts
18697 
18698 faddr_ind_p_a6:
18699 	mov.l		(%a6),%d0		# Get current a6
18700 	mov.l		%d0,%d1
18701 	add.l		%a0,%d1			# Increment
18702 	mov.l		%d1,(%a6)		# Save incr value
18703 	mov.l		%d0,%a0
18704 	rts
18705 
18706 faddr_ind_p_a7:
18707 	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18708 
18709 	mov.l		EXC_A7(%a6),%d0		# Get current a7
18710 	mov.l		%d0,%d1
18711 	add.l		%a0,%d1			# Increment
18712 	mov.l		%d1,EXC_A7(%a6)		# Save incr value
18713 	mov.l		%d0,%a0
18714 	rts
18715 
18716 ####################################################
18717 # Address register indirect w/ predecrement: -(An) #
18718 ####################################################
18719 faddr_ind_m_a0:
18720 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18721 	sub.l		%a0,%d0			# Decrement
18722 	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
18723 	mov.l		%d0,%a0
18724 	rts
18725 
18726 faddr_ind_m_a1:
18727 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18728 	sub.l		%a0,%d0			# Decrement
18729 	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
18730 	mov.l		%d0,%a0
18731 	rts
18732 
18733 faddr_ind_m_a2:
18734 	mov.l		%a2,%d0			# Get current a2
18735 	sub.l		%a0,%d0			# Decrement
18736 	mov.l		%d0,%a2			# Save decr value
18737 	mov.l		%d0,%a0
18738 	rts
18739 
18740 faddr_ind_m_a3:
18741 	mov.l		%a3,%d0			# Get current a3
18742 	sub.l		%a0,%d0			# Decrement
18743 	mov.l		%d0,%a3			# Save decr value
18744 	mov.l		%d0,%a0
18745 	rts
18746 
18747 faddr_ind_m_a4:
18748 	mov.l		%a4,%d0			# Get current a4
18749 	sub.l		%a0,%d0			# Decrement
18750 	mov.l		%d0,%a4			# Save decr value
18751 	mov.l		%d0,%a0
18752 	rts
18753 
18754 faddr_ind_m_a5:
18755 	mov.l		%a5,%d0			# Get current a5
18756 	sub.l		%a0,%d0			# Decrement
18757 	mov.l		%d0,%a5			# Save decr value
18758 	mov.l		%d0,%a0
18759 	rts
18760 
18761 faddr_ind_m_a6:
18762 	mov.l		(%a6),%d0		# Get current a6
18763 	sub.l		%a0,%d0			# Decrement
18764 	mov.l		%d0,(%a6)		# Save decr value
18765 	mov.l		%d0,%a0
18766 	rts
18767 
18768 faddr_ind_m_a7:
18769 	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18770 
18771 	mov.l		EXC_A7(%a6),%d0		# Get current a7
18772 	sub.l		%a0,%d0			# Decrement
18773 	mov.l		%d0,EXC_A7(%a6)		# Save decr value
18774 	mov.l		%d0,%a0
18775 	rts
18776 
18777 ########################################################
18778 # Address register indirect w/ displacement: (d16, An) #
18779 ########################################################
18780 faddr_ind_disp_a0:
18781 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18782 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18783 	bsr.l		_imem_read_word
18784 
18785 	tst.l		%d1			# did ifetch fail?
18786 	bne.l		iea_iacc		# yes
18787 
18788 	mov.w		%d0,%a0			# sign extend displacement
18789 
18790 	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
18791 	rts
18792 
18793 faddr_ind_disp_a1:
18794 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18795 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18796 	bsr.l		_imem_read_word
18797 
18798 	tst.l		%d1			# did ifetch fail?
18799 	bne.l		iea_iacc		# yes
18800 
18801 	mov.w		%d0,%a0			# sign extend displacement
18802 
18803 	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
18804 	rts
18805 
18806 faddr_ind_disp_a2:
18807 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18808 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18809 	bsr.l		_imem_read_word
18810 
18811 	tst.l		%d1			# did ifetch fail?
18812 	bne.l		iea_iacc		# yes
18813 
18814 	mov.w		%d0,%a0			# sign extend displacement
18815 
18816 	add.l		%a2,%a0			# a2 + d16
18817 	rts
18818 
18819 faddr_ind_disp_a3:
18820 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18821 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18822 	bsr.l		_imem_read_word
18823 
18824 	tst.l		%d1			# did ifetch fail?
18825 	bne.l		iea_iacc		# yes
18826 
18827 	mov.w		%d0,%a0			# sign extend displacement
18828 
18829 	add.l		%a3,%a0			# a3 + d16
18830 	rts
18831 
18832 faddr_ind_disp_a4:
18833 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18834 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18835 	bsr.l		_imem_read_word
18836 
18837 	tst.l		%d1			# did ifetch fail?
18838 	bne.l		iea_iacc		# yes
18839 
18840 	mov.w		%d0,%a0			# sign extend displacement
18841 
18842 	add.l		%a4,%a0			# a4 + d16
18843 	rts
18844 
18845 faddr_ind_disp_a5:
18846 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18847 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18848 	bsr.l		_imem_read_word
18849 
18850 	tst.l		%d1			# did ifetch fail?
18851 	bne.l		iea_iacc		# yes
18852 
18853 	mov.w		%d0,%a0			# sign extend displacement
18854 
18855 	add.l		%a5,%a0			# a5 + d16
18856 	rts
18857 
18858 faddr_ind_disp_a6:
18859 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18860 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18861 	bsr.l		_imem_read_word
18862 
18863 	tst.l		%d1			# did ifetch fail?
18864 	bne.l		iea_iacc		# yes
18865 
18866 	mov.w		%d0,%a0			# sign extend displacement
18867 
18868 	add.l		(%a6),%a0		# a6 + d16
18869 	rts
18870 
18871 faddr_ind_disp_a7:
18872 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18873 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18874 	bsr.l		_imem_read_word
18875 
18876 	tst.l		%d1			# did ifetch fail?
18877 	bne.l		iea_iacc		# yes
18878 
18879 	mov.w		%d0,%a0			# sign extend displacement
18880 
18881 	add.l		EXC_A7(%a6),%a0		# a7 + d16
18882 	rts
18883 
18884 ########################################################################
18885 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18886 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
18887 # Memory indirect postindexed: ([bd, An], Xn, od)		       #
18888 # Memory indirect preindexed: ([bd, An, Xn], od)		       #
18889 ########################################################################
18890 faddr_ind_ext:
18891 	addq.l		&0x8,%d1
18892 	bsr.l		fetch_dreg		# fetch base areg
18893 	mov.l		%d0,-(%sp)
18894 
18895 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18896 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18897 	bsr.l		_imem_read_word		# fetch extword in d0
18898 
18899 	tst.l		%d1			# did ifetch fail?
18900 	bne.l		iea_iacc		# yes
18901 
18902 	mov.l		(%sp)+,%a0
18903 
18904 	btst		&0x8,%d0
18905 	bne.w		fcalc_mem_ind
18906 
18907 	mov.l		%d0,L_SCR1(%a6)		# hold opword
18908 
18909 	mov.l		%d0,%d1
18910 	rol.w		&0x4,%d1
18911 	andi.w		&0xf,%d1		# extract index regno
18912 
18913 # count on fetch_dreg() not to alter a0...
18914 	bsr.l		fetch_dreg		# fetch index
18915 
18916 	mov.l		%d2,-(%sp)		# save d2
18917 	mov.l		L_SCR1(%a6),%d2		# fetch opword
18918 
18919 	btst		&0xb,%d2		# is it word or long?
18920 	bne.b		faii8_long
18921 	ext.l		%d0			# sign extend word index
18922 faii8_long:
18923 	mov.l		%d2,%d1
18924 	rol.w		&0x7,%d1
18925 	andi.l		&0x3,%d1		# extract scale value
18926 
18927 	lsl.l		%d1,%d0			# shift index by scale
18928 
18929 	extb.l		%d2			# sign extend displacement
18930 	add.l		%d2,%d0			# index + disp
18931 	add.l		%d0,%a0			# An + (index + disp)
18932 
18933 	mov.l		(%sp)+,%d2		# restore old d2
18934 	rts
18935 
18936 ###########################
18937 # Absolute short: (XXX).W #
18938 ###########################
18939 fabs_short:
18940 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18941 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18942 	bsr.l		_imem_read_word		# fetch short address
18943 
18944 	tst.l		%d1			# did ifetch fail?
18945 	bne.l		iea_iacc		# yes
18946 
18947 	mov.w		%d0,%a0			# return <ea> in a0
18948 	rts
18949 
18950 ##########################
18951 # Absolute long: (XXX).L #
18952 ##########################
18953 fabs_long:
18954 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18955 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
18956 	bsr.l		_imem_read_long		# fetch long address
18957 
18958 	tst.l		%d1			# did ifetch fail?
18959 	bne.l		iea_iacc		# yes
18960 
18961 	mov.l		%d0,%a0			# return <ea> in a0
18962 	rts
18963 
18964 #######################################################
18965 # Program counter indirect w/ displacement: (d16, PC) #
18966 #######################################################
18967 fpc_ind:
18968 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18969 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18970 	bsr.l		_imem_read_word		# fetch word displacement
18971 
18972 	tst.l		%d1			# did ifetch fail?
18973 	bne.l		iea_iacc		# yes
18974 
18975 	mov.w		%d0,%a0			# sign extend displacement
18976 
18977 	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
18978 
18979 # _imem_read_word() increased the extwptr by 2. need to adjust here.
18980 	subq.l		&0x2,%a0		# adjust <ea>
18981 	rts
18982 
18983 ##########################################################
18984 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18985 # "     "     w/   "  (base displacement): (bd, PC, An)  #
18986 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
18987 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
18988 ##########################################################
18989 fpc_ind_ext:
18990 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18991 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18992 	bsr.l		_imem_read_word		# fetch ext word
18993 
18994 	tst.l		%d1			# did ifetch fail?
18995 	bne.l		iea_iacc		# yes
18996 
18997 	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
18998 	subq.l		&0x2,%a0		# adjust base
18999 
19000 	btst		&0x8,%d0		# is disp only 8 bits?
19001 	bne.w		fcalc_mem_ind		# calc memory indirect
19002 
19003 	mov.l		%d0,L_SCR1(%a6)		# store opword
19004 
19005 	mov.l		%d0,%d1			# make extword copy
19006 	rol.w		&0x4,%d1		# rotate reg num into place
19007 	andi.w		&0xf,%d1		# extract register number
19008 
19009 # count on fetch_dreg() not to alter a0...
19010 	bsr.l		fetch_dreg		# fetch index
19011 
19012 	mov.l		%d2,-(%sp)		# save d2
19013 	mov.l		L_SCR1(%a6),%d2		# fetch opword
19014 
19015 	btst		&0xb,%d2		# is index word or long?
19016 	bne.b		fpii8_long		# long
19017 	ext.l		%d0			# sign extend word index
19018 fpii8_long:
19019 	mov.l		%d2,%d1
19020 	rol.w		&0x7,%d1		# rotate scale value into place
19021 	andi.l		&0x3,%d1		# extract scale value
19022 
19023 	lsl.l		%d1,%d0			# shift index by scale
19024 
19025 	extb.l		%d2			# sign extend displacement
19026 	add.l		%d2,%d0			# disp + index
19027 	add.l		%d0,%a0			# An + (index + disp)
19028 
19029 	mov.l		(%sp)+,%d2		# restore temp register
19030 	rts
19031 
19032 # d2 = index
19033 # d3 = base
19034 # d4 = od
19035 # d5 = extword
19036 fcalc_mem_ind:
19037 	btst		&0x6,%d0		# is the index suppressed?
19038 	beq.b		fcalc_index
19039 
19040 	movm.l		&0x3c00,-(%sp)		# save d2-d5
19041 
19042 	mov.l		%d0,%d5			# put extword in d5
19043 	mov.l		%a0,%d3			# put base in d3
19044 
19045 	clr.l		%d2			# yes, so index = 0
19046 	bra.b		fbase_supp_ck
19047 
19048 # index:
19049 fcalc_index:
19050 	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
19051 	bfextu		%d0{&16:&4},%d1		# fetch dreg index
19052 	bsr.l		fetch_dreg
19053 
19054 	movm.l		&0x3c00,-(%sp)		# save d2-d5
19055 	mov.l		%d0,%d2			# put index in d2
19056 	mov.l		L_SCR1(%a6),%d5
19057 	mov.l		%a0,%d3
19058 
19059 	btst		&0xb,%d5		# is index word or long?
19060 	bne.b		fno_ext
19061 	ext.l		%d2
19062 
19063 fno_ext:
19064 	bfextu		%d5{&21:&2},%d0
19065 	lsl.l		%d0,%d2
19066 
19067 # base address (passed as parameter in d3):
19068 # we clear the value here if it should actually be suppressed.
19069 fbase_supp_ck:
19070 	btst		&0x7,%d5		# is the bd suppressed?
19071 	beq.b		fno_base_sup
19072 	clr.l		%d3
19073 
19074 # base displacement:
19075 fno_base_sup:
19076 	bfextu		%d5{&26:&2},%d0		# get bd size
19077 #	beq.l		fmovm_error		# if (size == 0) it's reserved
19078 
19079 	cmpi.b		%d0,&0x2
19080 	blt.b		fno_bd
19081 	beq.b		fget_word_bd
19082 
19083 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19084 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19085 	bsr.l		_imem_read_long
19086 
19087 	tst.l		%d1			# did ifetch fail?
19088 	bne.l		fcea_iacc		# yes
19089 
19090 	bra.b		fchk_ind
19091 
19092 fget_word_bd:
19093 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19094 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19095 	bsr.l		_imem_read_word
19096 
19097 	tst.l		%d1			# did ifetch fail?
19098 	bne.l		fcea_iacc		# yes
19099 
19100 	ext.l		%d0			# sign extend bd
19101 
19102 fchk_ind:
19103 	add.l		%d0,%d3			# base += bd
19104 
19105 # outer displacement:
19106 fno_bd:
19107 	bfextu		%d5{&30:&2},%d0		# is od suppressed?
19108 	beq.w		faii_bd
19109 
19110 	cmpi.b		%d0,&0x2
19111 	blt.b		fnull_od
19112 	beq.b		fword_od
19113 
19114 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19115 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19116 	bsr.l		_imem_read_long
19117 
19118 	tst.l		%d1			# did ifetch fail?
19119 	bne.l		fcea_iacc		# yes
19120 
19121 	bra.b		fadd_them
19122 
19123 fword_od:
19124 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19125 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19126 	bsr.l		_imem_read_word
19127 
19128 	tst.l		%d1			# did ifetch fail?
19129 	bne.l		fcea_iacc		# yes
19130 
19131 	ext.l		%d0			# sign extend od
19132 	bra.b		fadd_them
19133 
19134 fnull_od:
19135 	clr.l		%d0
19136 
19137 fadd_them:
19138 	mov.l		%d0,%d4
19139 
19140 	btst		&0x2,%d5		# pre or post indexing?
19141 	beq.b		fpre_indexed
19142 
19143 	mov.l		%d3,%a0
19144 	bsr.l		_dmem_read_long
19145 
19146 	tst.l		%d1			# did dfetch fail?
19147 	bne.w		fcea_err		# yes
19148 
19149 	add.l		%d2,%d0			# <ea> += index
19150 	add.l		%d4,%d0			# <ea> += od
19151 	bra.b		fdone_ea
19152 
19153 fpre_indexed:
19154 	add.l		%d2,%d3			# preindexing
19155 	mov.l		%d3,%a0
19156 	bsr.l		_dmem_read_long
19157 
19158 	tst.l		%d1			# did dfetch fail?
19159 	bne.w		fcea_err		# yes
19160 
19161 	add.l		%d4,%d0			# ea += od
19162 	bra.b		fdone_ea
19163 
19164 faii_bd:
19165 	add.l		%d2,%d3			# ea = (base + bd) + index
19166 	mov.l		%d3,%d0
19167 fdone_ea:
19168 	mov.l		%d0,%a0
19169 
19170 	movm.l		(%sp)+,&0x003c		# restore d2-d5
19171 	rts
19172 
19173 #########################################################
19174 fcea_err:
19175 	mov.l		%d3,%a0
19176 
19177 	movm.l		(%sp)+,&0x003c		# restore d2-d5
19178 	mov.w		&0x0101,%d0
19179 	bra.l		iea_dacc
19180 
19181 fcea_iacc:
19182 	movm.l		(%sp)+,&0x003c		# restore d2-d5
19183 	bra.l		iea_iacc
19184 
19185 fmovm_out_err:
19186 	bsr.l		restore
19187 	mov.w		&0x00e1,%d0
19188 	bra.b		fmovm_err
19189 
19190 fmovm_in_err:
19191 	bsr.l		restore
19192 	mov.w		&0x0161,%d0
19193 
19194 fmovm_err:
19195 	mov.l		L_SCR1(%a6),%a0
19196 	bra.l		iea_dacc
19197 
19198 #########################################################################
19199 # XDEF ****************************************************************	#
19200 #	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
19201 #									#
19202 # XREF ****************************************************************	#
19203 #	_imem_read_long() - read longword from memory			#
19204 #	iea_iacc() - _imem_read_long() failed; error recovery		#
19205 #									#
19206 # INPUT ***************************************************************	#
19207 #	None								#
19208 #									#
19209 # OUTPUT **************************************************************	#
19210 #	If _imem_read_long() doesn't fail:				#
19211 #		USER_FPCR(a6)  = new FPCR value				#
19212 #		USER_FPSR(a6)  = new FPSR value				#
19213 #		USER_FPIAR(a6) = new FPIAR value			#
19214 #									#
19215 # ALGORITHM ***********************************************************	#
19216 #	Decode the instruction type by looking at the extension word	#
19217 # in order to see how many control registers to fetch from memory.	#
19218 # Fetch them using _imem_read_long(). If this fetch fails, exit through	#
19219 # the special access error exit handler iea_iacc().			#
19220 #									#
19221 # Instruction word decoding:						#
19222 #									#
19223 #	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
19224 #									#
19225 #		WORD1			WORD2				#
19226 #	1111 0010 00 111100	100$ $$00 0000 0000			#
19227 #									#
19228 #	$$$ (100): FPCR							#
19229 #	    (010): FPSR							#
19230 #	    (001): FPIAR						#
19231 #	    (000): FPIAR						#
19232 #									#
19233 #########################################################################
19234 
19235 	global		fmovm_ctrl
19236 fmovm_ctrl:
19237 	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
19238 	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
19239 	beq.w		fctrl_in_7		# yes
19240 	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
19241 	beq.w		fctrl_in_6		# yes
19242 	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
19243 	beq.b		fctrl_in_5		# yes
19244 
19245 # fmovem.l #<data>, fpsr/fpiar
19246 fctrl_in_3:
19247 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19248 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19249 	bsr.l		_imem_read_long		# fetch FPSR from mem
19250 
19251 	tst.l		%d1			# did ifetch fail?
19252 	bne.l		iea_iacc		# yes
19253 
19254 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
19255 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19256 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19257 	bsr.l		_imem_read_long		# fetch FPIAR from mem
19258 
19259 	tst.l		%d1			# did ifetch fail?
19260 	bne.l		iea_iacc		# yes
19261 
19262 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19263 	rts
19264 
19265 # fmovem.l #<data>, fpcr/fpiar
19266 fctrl_in_5:
19267 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19268 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19269 	bsr.l		_imem_read_long		# fetch FPCR from mem
19270 
19271 	tst.l		%d1			# did ifetch fail?
19272 	bne.l		iea_iacc		# yes
19273 
19274 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
19275 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19276 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19277 	bsr.l		_imem_read_long		# fetch FPIAR from mem
19278 
19279 	tst.l		%d1			# did ifetch fail?
19280 	bne.l		iea_iacc		# yes
19281 
19282 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19283 	rts
19284 
19285 # fmovem.l #<data>, fpcr/fpsr
19286 fctrl_in_6:
19287 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19288 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19289 	bsr.l		_imem_read_long		# fetch FPCR from mem
19290 
19291 	tst.l		%d1			# did ifetch fail?
19292 	bne.l		iea_iacc		# yes
19293 
19294 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19295 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19296 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19297 	bsr.l		_imem_read_long		# fetch FPSR from mem
19298 
19299 	tst.l		%d1			# did ifetch fail?
19300 	bne.l		iea_iacc		# yes
19301 
19302 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19303 	rts
19304 
19305 # fmovem.l #<data>, fpcr/fpsr/fpiar
19306 fctrl_in_7:
19307 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19308 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19309 	bsr.l		_imem_read_long		# fetch FPCR from mem
19310 
19311 	tst.l		%d1			# did ifetch fail?
19312 	bne.l		iea_iacc		# yes
19313 
19314 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19315 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19316 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19317 	bsr.l		_imem_read_long		# fetch FPSR from mem
19318 
19319 	tst.l		%d1			# did ifetch fail?
19320 	bne.l		iea_iacc		# yes
19321 
19322 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19323 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19324 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19325 	bsr.l		_imem_read_long		# fetch FPIAR from mem
19326 
19327 	tst.l		%d1			# did ifetch fail?
19328 	bne.l		iea_iacc		# yes
19329 
19330 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
19331 	rts
19332 
19333 #########################################################################
19334 # XDEF ****************************************************************	#
19335 #	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
19336 #									#
19337 # XREF ****************************************************************	#
19338 #	inc_areg() - increment an address register			#
19339 #	dec_areg() - decrement an address register			#
19340 #									#
19341 # INPUT ***************************************************************	#
19342 #	d0 = number of bytes to adjust <ea> by				#
19343 #									#
19344 # OUTPUT **************************************************************	#
19345 #	None								#
19346 #									#
19347 # ALGORITHM ***********************************************************	#
19348 # "Dummy" CALCulate Effective Address:					#
19349 #	The stacked <ea> for FP unimplemented instructions and opclass	#
19350 #	two packed instructions is correct with the exception of...	#
19351 #									#
19352 #	1) -(An)   : The register is not updated regardless of size.	#
19353 #		     Also, for extended precision and packed, the	#
19354 #		     stacked <ea> value is 8 bytes too big		#
19355 #	2) (An)+   : The register is not updated.			#
19356 #	3) #<data> : The upper longword of the immediate operand is	#
19357 #		     stacked b,w,l and s sizes are completely stacked.	#
19358 #		     d,x, and p are not.				#
19359 #									#
19360 #########################################################################
19361 
19362 	global		_dcalc_ea
19363 _dcalc_ea:
19364 	mov.l		%d0, %a0		# move # bytes to %a0
19365 
19366 	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
19367 	mov.l		%d0, %d1		# make a copy
19368 
19369 	andi.w		&0x38, %d0		# extract mode field
19370 	andi.l		&0x7, %d1		# extract reg  field
19371 
19372 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19373 	beq.b		dcea_pi			# yes
19374 
19375 	cmpi.b		%d0,&0x20		# is mode -(An) ?
19376 	beq.b		dcea_pd			# yes
19377 
19378 	or.w		%d1,%d0			# concat mode,reg
19379 	cmpi.b		%d0,&0x3c		# is mode #<data>?
19380 
19381 	beq.b		dcea_imm		# yes
19382 
19383 	mov.l		EXC_EA(%a6),%a0		# return <ea>
19384 	rts
19385 
19386 # need to set immediate data flag here since we'll need to do
19387 # an imem_read to fetch this later.
19388 dcea_imm:
19389 	mov.b		&immed_flg,SPCOND_FLG(%a6)
19390 	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19391 	rts
19392 
19393 # here, the <ea> is stacked correctly. however, we must update the
19394 # address register...
19395 dcea_pi:
19396 	mov.l		%a0,%d0			# pass amt to inc by
19397 	bsr.l		inc_areg		# inc addr register
19398 
19399 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19400 	rts
19401 
19402 # the <ea> is stacked correctly for all but extended and packed which
19403 # the <ea>s are 8 bytes too large.
19404 # it would make no sense to have a pre-decrement to a7 in supervisor
19405 # mode so we don't even worry about this tricky case here : )
19406 dcea_pd:
19407 	mov.l		%a0,%d0			# pass amt to dec by
19408 	bsr.l		dec_areg		# dec addr register
19409 
19410 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19411 
19412 	cmpi.b		%d0,&0xc		# is opsize ext or packed?
19413 	beq.b		dcea_pd2		# yes
19414 	rts
19415 dcea_pd2:
19416 	sub.l		&0x8,%a0		# correct <ea>
19417 	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
19418 	rts
19419 
19420 #########################################################################
19421 # XDEF ****************************************************************	#
19422 #	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
19423 #			 and packed data opclass 3 operations.		#
19424 #									#
19425 # XREF ****************************************************************	#
19426 #	None								#
19427 #									#
19428 # INPUT ***************************************************************	#
19429 #	None								#
19430 #									#
19431 # OUTPUT **************************************************************	#
19432 #	a0 = return correct effective address				#
19433 #									#
19434 # ALGORITHM ***********************************************************	#
19435 #	For opclass 3 extended and packed data operations, the <ea>	#
19436 # stacked for the exception is incorrect for -(an) and (an)+ addressing	#
19437 # modes. Also, while we're at it, the index register itself must get	#
19438 # updated.								#
19439 #	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
19440 # and return that value as the correct <ea> and store that value in An.	#
19441 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
19442 #									#
19443 #########################################################################
19444 
19445 # This calc_ea is currently used to retrieve the correct <ea>
19446 # for fmove outs of type extended and packed.
19447 	global		_calc_ea_fout
19448 _calc_ea_fout:
19449 	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
19450 	mov.l		%d0,%d1			# make a copy
19451 
19452 	andi.w		&0x38,%d0		# extract mode field
19453 	andi.l		&0x7,%d1		# extract reg  field
19454 
19455 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19456 	beq.b		ceaf_pi			# yes
19457 
19458 	cmpi.b		%d0,&0x20		# is mode -(An) ?
19459 	beq.w		ceaf_pd			# yes
19460 
19461 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19462 	rts
19463 
19464 # (An)+ : extended and packed fmove out
19465 #	: stacked <ea> is correct
19466 #	: "An" not updated
19467 ceaf_pi:
19468 	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19469 	mov.l		EXC_EA(%a6),%a0
19470 	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
19471 
19472 	swbeg		&0x8
19473 tbl_ceaf_pi:
19474 	short		ceaf_pi0 - tbl_ceaf_pi
19475 	short		ceaf_pi1 - tbl_ceaf_pi
19476 	short		ceaf_pi2 - tbl_ceaf_pi
19477 	short		ceaf_pi3 - tbl_ceaf_pi
19478 	short		ceaf_pi4 - tbl_ceaf_pi
19479 	short		ceaf_pi5 - tbl_ceaf_pi
19480 	short		ceaf_pi6 - tbl_ceaf_pi
19481 	short		ceaf_pi7 - tbl_ceaf_pi
19482 
19483 ceaf_pi0:
19484 	addi.l		&0xc,EXC_DREGS+0x8(%a6)
19485 	rts
19486 ceaf_pi1:
19487 	addi.l		&0xc,EXC_DREGS+0xc(%a6)
19488 	rts
19489 ceaf_pi2:
19490 	add.l		&0xc,%a2
19491 	rts
19492 ceaf_pi3:
19493 	add.l		&0xc,%a3
19494 	rts
19495 ceaf_pi4:
19496 	add.l		&0xc,%a4
19497 	rts
19498 ceaf_pi5:
19499 	add.l		&0xc,%a5
19500 	rts
19501 ceaf_pi6:
19502 	addi.l		&0xc,EXC_A6(%a6)
19503 	rts
19504 ceaf_pi7:
19505 	mov.b		&mia7_flg,SPCOND_FLG(%a6)
19506 	addi.l		&0xc,EXC_A7(%a6)
19507 	rts
19508 
19509 # -(An) : extended and packed fmove out
19510 #	: stacked <ea> = actual <ea> + 8
19511 #	: "An" not updated
19512 ceaf_pd:
19513 	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19514 	mov.l		EXC_EA(%a6),%a0
19515 	sub.l		&0x8,%a0
19516 	sub.l		&0x8,EXC_EA(%a6)
19517 	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
19518 
19519 	swbeg		&0x8
19520 tbl_ceaf_pd:
19521 	short		ceaf_pd0 - tbl_ceaf_pd
19522 	short		ceaf_pd1 - tbl_ceaf_pd
19523 	short		ceaf_pd2 - tbl_ceaf_pd
19524 	short		ceaf_pd3 - tbl_ceaf_pd
19525 	short		ceaf_pd4 - tbl_ceaf_pd
19526 	short		ceaf_pd5 - tbl_ceaf_pd
19527 	short		ceaf_pd6 - tbl_ceaf_pd
19528 	short		ceaf_pd7 - tbl_ceaf_pd
19529 
19530 ceaf_pd0:
19531 	mov.l		%a0,EXC_DREGS+0x8(%a6)
19532 	rts
19533 ceaf_pd1:
19534 	mov.l		%a0,EXC_DREGS+0xc(%a6)
19535 	rts
19536 ceaf_pd2:
19537 	mov.l		%a0,%a2
19538 	rts
19539 ceaf_pd3:
19540 	mov.l		%a0,%a3
19541 	rts
19542 ceaf_pd4:
19543 	mov.l		%a0,%a4
19544 	rts
19545 ceaf_pd5:
19546 	mov.l		%a0,%a5
19547 	rts
19548 ceaf_pd6:
19549 	mov.l		%a0,EXC_A6(%a6)
19550 	rts
19551 ceaf_pd7:
19552 	mov.l		%a0,EXC_A7(%a6)
19553 	mov.b		&mda7_flg,SPCOND_FLG(%a6)
19554 	rts
19555 
19556 #########################################################################
19557 # XDEF ****************************************************************	#
19558 #	_load_fop(): load operand for unimplemented FP exception	#
19559 #									#
19560 # XREF ****************************************************************	#
19561 #	set_tag_x() - determine ext prec optype tag			#
19562 #	set_tag_s() - determine sgl prec optype tag			#
19563 #	set_tag_d() - determine dbl prec optype tag			#
19564 #	unnorm_fix() - convert normalized number to denorm or zero	#
19565 #	norm() - normalize a denormalized number			#
19566 #	get_packed() - fetch a packed operand from memory		#
19567 #	_dcalc_ea() - calculate <ea>, fixing An in process		#
19568 #									#
19569 #	_imem_read_{word,long}() - read from instruction memory		#
19570 #	_dmem_read() - read from data memory				#
19571 #	_dmem_read_{byte,word,long}() - read from data memory		#
19572 #									#
19573 #	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
19574 #									#
19575 # INPUT ***************************************************************	#
19576 #	None								#
19577 #									#
19578 # OUTPUT **************************************************************	#
19579 #	If memory access doesn't fail:					#
19580 #		FP_SRC(a6) = source operand in extended precision	#
19581 #		FP_DST(a6) = destination operand in extended precision	#
19582 #									#
19583 # ALGORITHM ***********************************************************	#
19584 #	This is called from the Unimplemented FP exception handler in	#
19585 # order to load the source and maybe destination operand into		#
19586 # FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
19587 # the source and destination from the FP register file. Set the optype	#
19588 # tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
19589 # convert it to a DENORM or a ZERO.					#
19590 #	If the instruction is opclass two (memory->reg), then fetch	#
19591 # the destination from the register file and the source operand from	#
19592 # memory. Tag and fix both as above w/ opclass zero instructions.	#
19593 #	If the source operand is byte,word,long, or single, it may be	#
19594 # in the data register file. If it's actually out in memory, use one of	#
19595 # the mem_read() routines to fetch it. If the mem_read() access returns	#
19596 # a failing value, exit through the special facc_in() routine which	#
19597 # will create an access error exception frame from the current exception #
19598 # frame.								#
19599 #	Immediate data and regular data accesses are separated because	#
19600 # if an immediate data access fails, the resulting fault status		#
19601 # longword stacked for the access error exception must have the		#
19602 # instruction bit set.							#
19603 #									#
19604 #########################################################################
19605 
19606 	global		_load_fop
19607 _load_fop:
19608 
19609 #  15     13 12 10  9 7  6       0
19610 # /        \ /   \ /  \ /         \
19611 # ---------------------------------
19612 # | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
19613 # ---------------------------------
19614 #
19615 
19616 #	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19617 #	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
19618 #	beq.w		op010			# handle <ea> -> fpn
19619 #	bgt.w		op011			# handle fpn -> <ea>
19620 
19621 # we're not using op011 for now...
19622 	btst		&0x6,EXC_CMDREG(%a6)
19623 	bne.b		op010
19624 
19625 ############################
19626 # OPCLASS '000: reg -> reg #
19627 ############################
19628 op000:
19629 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
19630 	btst		&0x5,%d0		# testing extension bits
19631 	beq.b		op000_src		# (bit 5 == 0) => monadic
19632 	btst		&0x4,%d0		# (bit 5 == 1)
19633 	beq.b		op000_dst		# (bit 4 == 0) => dyadic
19634 	and.w		&0x007f,%d0		# extract extension bits {6:0}
19635 	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19636 	bne.b		op000_src		# it's an fcmp
19637 
19638 op000_dst:
19639 	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19640 	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
19641 
19642 	bsr.l		set_tag_x		# get dst optype tag
19643 
19644 	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19645 	beq.b		op000_dst_unnorm	# yes
19646 op000_dst_cont:
19647 	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19648 
19649 op000_src:
19650 	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19651 	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
19652 
19653 	bsr.l		set_tag_x		# get src optype tag
19654 
19655 	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
19656 	beq.b		op000_src_unnorm	# yes
19657 op000_src_cont:
19658 	mov.b		%d0, STAG(%a6)		# store the src optype tag
19659 	rts
19660 
19661 op000_dst_unnorm:
19662 	bsr.l		unnorm_fix		# fix the dst UNNORM
19663 	bra.b		op000_dst_cont
19664 op000_src_unnorm:
19665 	bsr.l		unnorm_fix		# fix the src UNNORM
19666 	bra.b		op000_src_cont
19667 
19668 #############################
19669 # OPCLASS '010: <ea> -> reg #
19670 #############################
19671 op010:
19672 	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
19673 	btst		&0x5,%d0		# testing extension bits
19674 	beq.b		op010_src		# (bit 5 == 0) => monadic
19675 	btst		&0x4,%d0		# (bit 5 == 1)
19676 	beq.b		op010_dst		# (bit 4 == 0) => dyadic
19677 	and.w		&0x007f,%d0		# extract extension bits {6:0}
19678 	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19679 	bne.b		op010_src		# it's an fcmp
19680 
19681 op010_dst:
19682 	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19683 	bsr.l		load_fpn2		# fetch dst fpreg ptr
19684 
19685 	bsr.l		set_tag_x		# get dst type tag
19686 
19687 	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19688 	beq.b		op010_dst_unnorm	# yes
19689 op010_dst_cont:
19690 	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19691 
19692 op010_src:
19693 	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19694 
19695 	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19696 	bne.w		fetch_from_mem		# src op is in memory
19697 
19698 op010_dreg:
19699 	clr.b		STAG(%a6)		# either NORM or ZERO
19700 	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19701 
19702 	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19703 	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19704 
19705 op010_dst_unnorm:
19706 	bsr.l		unnorm_fix		# fix the dst UNNORM
19707 	bra.b		op010_dst_cont
19708 
19709 	swbeg		&0x8
19710 tbl_op010_dreg:
19711 	short		opd_long	- tbl_op010_dreg
19712 	short		opd_sgl		- tbl_op010_dreg
19713 	short		tbl_op010_dreg	- tbl_op010_dreg
19714 	short		tbl_op010_dreg	- tbl_op010_dreg
19715 	short		opd_word	- tbl_op010_dreg
19716 	short		tbl_op010_dreg	- tbl_op010_dreg
19717 	short		opd_byte	- tbl_op010_dreg
19718 	short		tbl_op010_dreg	- tbl_op010_dreg
19719 
19720 #
19721 # LONG: can be either NORM or ZERO...
19722 #
19723 opd_long:
19724 	bsr.l		fetch_dreg		# fetch long in d0
19725 	fmov.l		%d0, %fp0		# load a long
19726 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19727 	fbeq.w		opd_long_zero		# long is a ZERO
19728 	rts
19729 opd_long_zero:
19730 	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19731 	rts
19732 
19733 #
19734 # WORD: can be either NORM or ZERO...
19735 #
19736 opd_word:
19737 	bsr.l		fetch_dreg		# fetch word in d0
19738 	fmov.w		%d0, %fp0		# load a word
19739 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19740 	fbeq.w		opd_word_zero		# WORD is a ZERO
19741 	rts
19742 opd_word_zero:
19743 	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19744 	rts
19745 
19746 #
19747 # BYTE: can be either NORM or ZERO...
19748 #
19749 opd_byte:
19750 	bsr.l		fetch_dreg		# fetch word in d0
19751 	fmov.b		%d0, %fp0		# load a byte
19752 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19753 	fbeq.w		opd_byte_zero		# byte is a ZERO
19754 	rts
19755 opd_byte_zero:
19756 	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19757 	rts
19758 
19759 #
19760 # SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19761 #
19762 # separate SNANs and DENORMs so they can be loaded w/ special care.
19763 # all others can simply be moved "in" using fmove.
19764 #
19765 opd_sgl:
19766 	bsr.l		fetch_dreg		# fetch sgl in d0
19767 	mov.l		%d0,L_SCR1(%a6)
19768 
19769 	lea		L_SCR1(%a6), %a0	# pass: ptr to the sgl
19770 	bsr.l		set_tag_s		# determine sgl type
19771 	mov.b		%d0, STAG(%a6)		# save the src tag
19772 
19773 	cmpi.b		%d0, &SNAN		# is it an SNAN?
19774 	beq.w		get_sgl_snan		# yes
19775 
19776 	cmpi.b		%d0, &DENORM		# is it a DENORM?
19777 	beq.w		get_sgl_denorm		# yes
19778 
19779 	fmov.s		(%a0), %fp0		# no, so can load it regular
19780 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19781 	rts
19782 
19783 ##############################################################################
19784 
19785 #########################################################################
19786 # fetch_from_mem():							#
19787 # - src is out in memory. must:						#
19788 #	(1) calc ea - must read AFTER you know the src type since	#
19789 #		      if the ea is -() or ()+, need to know # of bytes.	#
19790 #	(2) read it in from either user or supervisor space		#
19791 #	(3) if (b || w || l) then simply read in			#
19792 #	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
19793 #	    if (packed) then punt for now				#
19794 # INPUT:								#
19795 #	%d0 : src type field						#
19796 #########################################################################
19797 fetch_from_mem:
19798 	clr.b		STAG(%a6)		# either NORM or ZERO
19799 
19800 	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19801 	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
19802 
19803 	swbeg		&0x8
19804 tbl_fp_type:
19805 	short		load_long	- tbl_fp_type
19806 	short		load_sgl	- tbl_fp_type
19807 	short		load_ext	- tbl_fp_type
19808 	short		load_packed	- tbl_fp_type
19809 	short		load_word	- tbl_fp_type
19810 	short		load_dbl	- tbl_fp_type
19811 	short		load_byte	- tbl_fp_type
19812 	short		tbl_fp_type	- tbl_fp_type
19813 
19814 #########################################
19815 # load a LONG into %fp0:		#
19816 #	-number can't fault		#
19817 #	(1) calc ea			#
19818 #	(2) read 4 bytes into L_SCR1	#
19819 #	(3) fmov.l into %fp0		#
19820 #########################################
19821 load_long:
19822 	movq.l		&0x4, %d0		# pass: 4 (bytes)
19823 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19824 
19825 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19826 	beq.b		load_long_immed
19827 
19828 	bsr.l		_dmem_read_long		# fetch src operand from memory
19829 
19830 	tst.l		%d1			# did dfetch fail?
19831 	bne.l		facc_in_l		# yes
19832 
19833 load_long_cont:
19834 	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
19835 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19836 
19837 	fbeq.w		load_long_zero		# src op is a ZERO
19838 	rts
19839 load_long_zero:
19840 	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19841 	rts
19842 
19843 load_long_immed:
19844 	bsr.l		_imem_read_long		# fetch src operand immed data
19845 
19846 	tst.l		%d1			# did ifetch fail?
19847 	bne.l		funimp_iacc		# yes
19848 	bra.b		load_long_cont
19849 
19850 #########################################
19851 # load a WORD into %fp0:		#
19852 #	-number can't fault		#
19853 #	(1) calc ea			#
19854 #	(2) read 2 bytes into L_SCR1	#
19855 #	(3) fmov.w into %fp0		#
19856 #########################################
19857 load_word:
19858 	movq.l		&0x2, %d0		# pass: 2 (bytes)
19859 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19860 
19861 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19862 	beq.b		load_word_immed
19863 
19864 	bsr.l		_dmem_read_word		# fetch src operand from memory
19865 
19866 	tst.l		%d1			# did dfetch fail?
19867 	bne.l		facc_in_w		# yes
19868 
19869 load_word_cont:
19870 	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
19871 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19872 
19873 	fbeq.w		load_word_zero		# src op is a ZERO
19874 	rts
19875 load_word_zero:
19876 	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19877 	rts
19878 
19879 load_word_immed:
19880 	bsr.l		_imem_read_word		# fetch src operand immed data
19881 
19882 	tst.l		%d1			# did ifetch fail?
19883 	bne.l		funimp_iacc		# yes
19884 	bra.b		load_word_cont
19885 
19886 #########################################
19887 # load a BYTE into %fp0:		#
19888 #	-number can't fault		#
19889 #	(1) calc ea			#
19890 #	(2) read 1 byte into L_SCR1	#
19891 #	(3) fmov.b into %fp0		#
19892 #########################################
19893 load_byte:
19894 	movq.l		&0x1, %d0		# pass: 1 (byte)
19895 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19896 
19897 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19898 	beq.b		load_byte_immed
19899 
19900 	bsr.l		_dmem_read_byte		# fetch src operand from memory
19901 
19902 	tst.l		%d1			# did dfetch fail?
19903 	bne.l		facc_in_b		# yes
19904 
19905 load_byte_cont:
19906 	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
19907 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19908 
19909 	fbeq.w		load_byte_zero		# src op is a ZERO
19910 	rts
19911 load_byte_zero:
19912 	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19913 	rts
19914 
19915 load_byte_immed:
19916 	bsr.l		_imem_read_word		# fetch src operand immed data
19917 
19918 	tst.l		%d1			# did ifetch fail?
19919 	bne.l		funimp_iacc		# yes
19920 	bra.b		load_byte_cont
19921 
19922 #########################################
19923 # load a SGL into %fp0:			#
19924 #	-number can't fault		#
19925 #	(1) calc ea			#
19926 #	(2) read 4 bytes into L_SCR1	#
19927 #	(3) fmov.s into %fp0		#
19928 #########################################
19929 load_sgl:
19930 	movq.l		&0x4, %d0		# pass: 4 (bytes)
19931 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19932 
19933 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19934 	beq.b		load_sgl_immed
19935 
19936 	bsr.l		_dmem_read_long		# fetch src operand from memory
19937 	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
19938 
19939 	tst.l		%d1			# did dfetch fail?
19940 	bne.l		facc_in_l		# yes
19941 
19942 load_sgl_cont:
19943 	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
19944 	bsr.l		set_tag_s		# determine src type tag
19945 	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
19946 
19947 	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
19948 	beq.w		get_sgl_denorm		# yes
19949 
19950 	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
19951 	beq.w		get_sgl_snan		# yes
19952 
19953 	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
19954 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19955 	rts
19956 
19957 load_sgl_immed:
19958 	bsr.l		_imem_read_long		# fetch src operand immed data
19959 
19960 	tst.l		%d1			# did ifetch fail?
19961 	bne.l		funimp_iacc		# yes
19962 	bra.b		load_sgl_cont
19963 
19964 # must convert sgl denorm format to an Xprec denorm fmt suitable for
19965 # normalization...
19966 # %a0 : points to sgl denorm
19967 get_sgl_denorm:
19968 	clr.w		FP_SRC_EX(%a6)
19969 	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
19970 	lsl.l		&0x8, %d0
19971 	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
19972 	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
19973 
19974 	clr.w		FP_SRC_EX(%a6)
19975 	btst		&0x7, (%a0)		# is sgn bit set?
19976 	beq.b		sgl_dnrm_norm
19977 	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
19978 
19979 sgl_dnrm_norm:
19980 	lea		FP_SRC(%a6), %a0
19981 	bsr.l		norm			# normalize number
19982 	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
19983 	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
19984 	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
19985 
19986 	mov.b		&NORM, STAG(%a6)	# fix src type tag
19987 	rts
19988 
19989 # convert sgl to ext SNAN
19990 # %a0 : points to sgl SNAN
19991 get_sgl_snan:
19992 	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
19993 	bfextu		(%a0){&9:&23}, %d0
19994 	lsl.l		&0x8, %d0		# extract and insert hi(man)
19995 	mov.l		%d0, FP_SRC_HI(%a6)
19996 	clr.l		FP_SRC_LO(%a6)
19997 
19998 	btst		&0x7, (%a0)		# see if sign of SNAN is set
19999 	beq.b		no_sgl_snan_sgn
20000 	bset		&0x7, FP_SRC_EX(%a6)
20001 no_sgl_snan_sgn:
20002 	rts
20003 
20004 #########################################
20005 # load a DBL into %fp0:			#
20006 #	-number can't fault		#
20007 #	(1) calc ea			#
20008 #	(2) read 8 bytes into L_SCR(1,2)#
20009 #	(3) fmov.d into %fp0		#
20010 #########################################
20011 load_dbl:
20012 	movq.l		&0x8, %d0		# pass: 8 (bytes)
20013 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
20014 
20015 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
20016 	beq.b		load_dbl_immed
20017 
20018 	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20019 	movq.l		&0x8, %d0		# pass: # bytes to read
20020 	bsr.l		_dmem_read		# fetch src operand from memory
20021 
20022 	tst.l		%d1			# did dfetch fail?
20023 	bne.l		facc_in_d		# yes
20024 
20025 load_dbl_cont:
20026 	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
20027 	bsr.l		set_tag_d		# determine src type tag
20028 	mov.b		%d0, STAG(%a6)		# set src optype tag
20029 
20030 	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
20031 	beq.w		get_dbl_denorm		# yes
20032 
20033 	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
20034 	beq.w		get_dbl_snan		# yes
20035 
20036 	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
20037 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
20038 	rts
20039 
20040 load_dbl_immed:
20041 	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20042 	movq.l		&0x8, %d0		# pass: # bytes to read
20043 	bsr.l		_imem_read		# fetch src operand from memory
20044 
20045 	tst.l		%d1			# did ifetch fail?
20046 	bne.l		funimp_iacc		# yes
20047 	bra.b		load_dbl_cont
20048 
20049 # must convert dbl denorm format to an Xprec denorm fmt suitable for
20050 # normalization...
20051 # %a0 : loc. of dbl denorm
20052 get_dbl_denorm:
20053 	clr.w		FP_SRC_EX(%a6)
20054 	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20055 	mov.l		%d0, FP_SRC_HI(%a6)
20056 	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20057 	mov.l		&0xb, %d1
20058 	lsl.l		%d1, %d0
20059 	mov.l		%d0, FP_SRC_LO(%a6)
20060 
20061 	btst		&0x7, (%a0)		# is sgn bit set?
20062 	beq.b		dbl_dnrm_norm
20063 	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
20064 
20065 dbl_dnrm_norm:
20066 	lea		FP_SRC(%a6), %a0
20067 	bsr.l		norm			# normalize number
20068 	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
20069 	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
20070 	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
20071 
20072 	mov.b		&NORM, STAG(%a6)	# fix src type tag
20073 	rts
20074 
20075 # convert dbl to ext SNAN
20076 # %a0 : points to dbl SNAN
20077 get_dbl_snan:
20078 	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20079 
20080 	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20081 	mov.l		%d0, FP_SRC_HI(%a6)
20082 	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20083 	mov.l		&0xb, %d1
20084 	lsl.l		%d1, %d0
20085 	mov.l		%d0, FP_SRC_LO(%a6)
20086 
20087 	btst		&0x7, (%a0)		# see if sign of SNAN is set
20088 	beq.b		no_dbl_snan_sgn
20089 	bset		&0x7, FP_SRC_EX(%a6)
20090 no_dbl_snan_sgn:
20091 	rts
20092 
20093 #################################################
20094 # load a Xprec into %fp0:			#
20095 #	-number can't fault			#
20096 #	(1) calc ea				#
20097 #	(2) read 12 bytes into L_SCR(1,2)	#
20098 #	(3) fmov.x into %fp0			#
20099 #################################################
20100 load_ext:
20101 	mov.l		&0xc, %d0		# pass: 12 (bytes)
20102 	bsr.l		_dcalc_ea		# calc <ea>
20103 
20104 	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
20105 	mov.l		&0xc, %d0		# pass: # of bytes to read
20106 	bsr.l		_dmem_read		# fetch src operand from memory
20107 
20108 	tst.l		%d1			# did dfetch fail?
20109 	bne.l		facc_in_x		# yes
20110 
20111 	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
20112 	bsr.l		set_tag_x		# determine src type tag
20113 
20114 	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
20115 	beq.b		load_ext_unnorm		# yes
20116 
20117 	mov.b		%d0, STAG(%a6)		# store the src optype tag
20118 	rts
20119 
20120 load_ext_unnorm:
20121 	bsr.l		unnorm_fix		# fix the src UNNORM
20122 	mov.b		%d0, STAG(%a6)		# store the src optype tag
20123 	rts
20124 
20125 #################################################
20126 # load a packed into %fp0:			#
20127 #	-number can't fault			#
20128 #	(1) calc ea				#
20129 #	(2) read 12 bytes into L_SCR(1,2,3)	#
20130 #	(3) fmov.x into %fp0			#
20131 #################################################
20132 load_packed:
20133 	bsr.l		get_packed
20134 
20135 	lea		FP_SRC(%a6),%a0		# pass ptr to src op
20136 	bsr.l		set_tag_x		# determine src type tag
20137 	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
20138 	beq.b		load_packed_unnorm	# yes
20139 
20140 	mov.b		%d0,STAG(%a6)		# store the src optype tag
20141 	rts
20142 
20143 load_packed_unnorm:
20144 	bsr.l		unnorm_fix		# fix the UNNORM ZERO
20145 	mov.b		%d0,STAG(%a6)		# store the src optype tag
20146 	rts
20147 
20148 #########################################################################
20149 # XDEF ****************************************************************	#
20150 #	fout(): move from fp register to memory or data register	#
20151 #									#
20152 # XREF ****************************************************************	#
20153 #	_round() - needed to create EXOP for sgl/dbl precision		#
20154 #	norm() - needed to create EXOP for extended precision		#
20155 #	ovf_res() - create default overflow result for sgl/dbl precision#
20156 #	unf_res() - create default underflow result for sgl/dbl prec.	#
20157 #	dst_dbl() - create rounded dbl precision result.		#
20158 #	dst_sgl() - create rounded sgl precision result.		#
20159 #	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
20160 #	bindec() - convert FP binary number to packed number.		#
20161 #	_mem_write() - write data to memory.				#
20162 #	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
20163 #	_dmem_write_{byte,word,long}() - write data to memory.		#
20164 #	store_dreg_{b,w,l}() - store data to data register file.	#
20165 #	facc_out_{b,w,l,d,x}() - data access error occurred.		#
20166 #									#
20167 # INPUT ***************************************************************	#
20168 #	a0 = pointer to extended precision source operand		#
20169 #	d0 = round prec,mode						#
20170 #									#
20171 # OUTPUT **************************************************************	#
20172 #	fp0 : intermediate underflow or overflow result if		#
20173 #	      OVFL/UNFL occurred for a sgl or dbl operand		#
20174 #									#
20175 # ALGORITHM ***********************************************************	#
20176 #	This routine is accessed by many handlers that need to do an	#
20177 # opclass three move of an operand out to memory.			#
20178 #	Decode an fmove out (opclass 3) instruction to determine if	#
20179 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
20180 # register or memory. The algorithm uses a standard "fmove" to create	#
20181 # the rounded result. Also, since exceptions are disabled, this also	#
20182 # create the correct OPERR default result if appropriate.		#
20183 #	For sgl or dbl precision, overflow or underflow can occur. If	#
20184 # either occurs and is enabled, the EXOP.				#
20185 #	For extended precision, the stacked <ea> must be fixed along	#
20186 # w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
20187 # the source is a denorm and if underflow is enabled, an EXOP must be	#
20188 # created.								#
20189 #	For packed, the k-factor must be fetched from the instruction	#
20190 # word or a data register. The <ea> must be fixed as w/ extended	#
20191 # precision. Then, bindec() is called to create the appropriate		#
20192 # packed result.							#
20193 #	If at any time an access error is flagged by one of the move-	#
20194 # to-memory routines, then a special exit must be made so that the	#
20195 # access error can be handled properly.					#
20196 #									#
20197 #########################################################################
20198 
20199 	global		fout
20200 fout:
20201 	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20202 	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20203 	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
20204 
20205 	swbeg		&0x8
20206 tbl_fout:
20207 	short		fout_long	-	tbl_fout
20208 	short		fout_sgl	-	tbl_fout
20209 	short		fout_ext	-	tbl_fout
20210 	short		fout_pack	-	tbl_fout
20211 	short		fout_word	-	tbl_fout
20212 	short		fout_dbl	-	tbl_fout
20213 	short		fout_byte	-	tbl_fout
20214 	short		fout_pack	-	tbl_fout
20215 
20216 #################################################################
20217 # fmove.b out ###################################################
20218 #################################################################
20219 
20220 # Only "Unimplemented Data Type" exceptions enter here. The operand
20221 # is either a DENORM or a NORM.
20222 fout_byte:
20223 	tst.b		STAG(%a6)		# is operand normalized?
20224 	bne.b		fout_byte_denorm	# no
20225 
20226 	fmovm.x		SRC(%a0),&0x80		# load value
20227 
20228 fout_byte_norm:
20229 	fmov.l		%d0,%fpcr		# insert rnd prec,mode
20230 
20231 	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
20232 
20233 	fmov.l		&0x0,%fpcr		# clear FPCR
20234 	fmov.l		%fpsr,%d1		# fetch FPSR
20235 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20236 
20237 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20238 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20239 	beq.b		fout_byte_dn		# must save to integer regfile
20240 
20241 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20242 	bsr.l		_dmem_write_byte	# write byte
20243 
20244 	tst.l		%d1			# did dstore fail?
20245 	bne.l		facc_out_b		# yes
20246 
20247 	rts
20248 
20249 fout_byte_dn:
20250 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20251 	andi.w		&0x7,%d1
20252 	bsr.l		store_dreg_b
20253 	rts
20254 
20255 fout_byte_denorm:
20256 	mov.l		SRC_EX(%a0),%d1
20257 	andi.l		&0x80000000,%d1		# keep DENORM sign
20258 	ori.l		&0x00800000,%d1		# make smallest sgl
20259 	fmov.s		%d1,%fp0
20260 	bra.b		fout_byte_norm
20261 
20262 #################################################################
20263 # fmove.w out ###################################################
20264 #################################################################
20265 
20266 # Only "Unimplemented Data Type" exceptions enter here. The operand
20267 # is either a DENORM or a NORM.
20268 fout_word:
20269 	tst.b		STAG(%a6)		# is operand normalized?
20270 	bne.b		fout_word_denorm	# no
20271 
20272 	fmovm.x		SRC(%a0),&0x80		# load value
20273 
20274 fout_word_norm:
20275 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20276 
20277 	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
20278 
20279 	fmov.l		&0x0,%fpcr		# clear FPCR
20280 	fmov.l		%fpsr,%d1		# fetch FPSR
20281 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20282 
20283 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20284 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20285 	beq.b		fout_word_dn		# must save to integer regfile
20286 
20287 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20288 	bsr.l		_dmem_write_word	# write word
20289 
20290 	tst.l		%d1			# did dstore fail?
20291 	bne.l		facc_out_w		# yes
20292 
20293 	rts
20294 
20295 fout_word_dn:
20296 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20297 	andi.w		&0x7,%d1
20298 	bsr.l		store_dreg_w
20299 	rts
20300 
20301 fout_word_denorm:
20302 	mov.l		SRC_EX(%a0),%d1
20303 	andi.l		&0x80000000,%d1		# keep DENORM sign
20304 	ori.l		&0x00800000,%d1		# make smallest sgl
20305 	fmov.s		%d1,%fp0
20306 	bra.b		fout_word_norm
20307 
20308 #################################################################
20309 # fmove.l out ###################################################
20310 #################################################################
20311 
20312 # Only "Unimplemented Data Type" exceptions enter here. The operand
20313 # is either a DENORM or a NORM.
20314 fout_long:
20315 	tst.b		STAG(%a6)		# is operand normalized?
20316 	bne.b		fout_long_denorm	# no
20317 
20318 	fmovm.x		SRC(%a0),&0x80		# load value
20319 
20320 fout_long_norm:
20321 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20322 
20323 	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
20324 
20325 	fmov.l		&0x0,%fpcr		# clear FPCR
20326 	fmov.l		%fpsr,%d1		# fetch FPSR
20327 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20328 
20329 fout_long_write:
20330 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20331 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20332 	beq.b		fout_long_dn		# must save to integer regfile
20333 
20334 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20335 	bsr.l		_dmem_write_long	# write long
20336 
20337 	tst.l		%d1			# did dstore fail?
20338 	bne.l		facc_out_l		# yes
20339 
20340 	rts
20341 
20342 fout_long_dn:
20343 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20344 	andi.w		&0x7,%d1
20345 	bsr.l		store_dreg_l
20346 	rts
20347 
20348 fout_long_denorm:
20349 	mov.l		SRC_EX(%a0),%d1
20350 	andi.l		&0x80000000,%d1		# keep DENORM sign
20351 	ori.l		&0x00800000,%d1		# make smallest sgl
20352 	fmov.s		%d1,%fp0
20353 	bra.b		fout_long_norm
20354 
20355 #################################################################
20356 # fmove.x out ###################################################
20357 #################################################################
20358 
20359 # Only "Unimplemented Data Type" exceptions enter here. The operand
20360 # is either a DENORM or a NORM.
20361 # The DENORM causes an Underflow exception.
20362 fout_ext:
20363 
20364 # we copy the extended precision result to FP_SCR0 so that the reserved
20365 # 16-bit field gets zeroed. we do this since we promise not to disturb
20366 # what's at SRC(a0).
20367 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20368 	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
20369 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20370 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20371 
20372 	fmovm.x		SRC(%a0),&0x80		# return result
20373 
20374 	bsr.l		_calc_ea_fout		# fix stacked <ea>
20375 
20376 	mov.l		%a0,%a1			# pass: dst addr
20377 	lea		FP_SCR0(%a6),%a0	# pass: src addr
20378 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
20379 
20380 # we must not yet write the extended precision data to the stack
20381 # in the pre-decrement case from supervisor mode or else we'll corrupt
20382 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
20383 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
20384 	beq.b		fout_ext_a7
20385 
20386 	bsr.l		_dmem_write		# write ext prec number to memory
20387 
20388 	tst.l		%d1			# did dstore fail?
20389 	bne.w		fout_ext_err		# yes
20390 
20391 	tst.b		STAG(%a6)		# is operand normalized?
20392 	bne.b		fout_ext_denorm		# no
20393 	rts
20394 
20395 # the number is a DENORM. must set the underflow exception bit
20396 fout_ext_denorm:
20397 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20398 
20399 	mov.b		FPCR_ENABLE(%a6),%d0
20400 	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
20401 	bne.b		fout_ext_exc		# yes
20402 	rts
20403 
20404 # we don't want to do the write if the exception occurred in supervisor mode
20405 # so _mem_write2() handles this for us.
20406 fout_ext_a7:
20407 	bsr.l		_mem_write2		# write ext prec number to memory
20408 
20409 	tst.l		%d1			# did dstore fail?
20410 	bne.w		fout_ext_err		# yes
20411 
20412 	tst.b		STAG(%a6)		# is operand normalized?
20413 	bne.b		fout_ext_denorm		# no
20414 	rts
20415 
20416 fout_ext_exc:
20417 	lea		FP_SCR0(%a6),%a0
20418 	bsr.l		norm			# normalize the mantissa
20419 	neg.w		%d0			# new exp = -(shft amt)
20420 	andi.w		&0x7fff,%d0
20421 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
20422 	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
20423 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20424 	rts
20425 
20426 fout_ext_err:
20427 	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
20428 	bra.l		facc_out_x
20429 
20430 #########################################################################
20431 # fmove.s out ###########################################################
20432 #########################################################################
20433 fout_sgl:
20434 	andi.b		&0x30,%d0		# clear rnd prec
20435 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
20436 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20437 
20438 #
20439 # operand is a normalized number. first, we check to see if the move out
20440 # would cause either an underflow or overflow. these cases are handled
20441 # separately. otherwise, set the FPCR to the proper rounding mode and
20442 # execute the move.
20443 #
20444 	mov.w		SRC_EX(%a0),%d0		# extract exponent
20445 	andi.w		&0x7fff,%d0		# strip sign
20446 
20447 	cmpi.w		%d0,&SGL_HI		# will operand overflow?
20448 	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
20449 	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
20450 	cmpi.w		%d0,&SGL_LO		# will operand underflow?
20451 	blt.w		fout_sgl_unfl		# yes; go handle underflow
20452 
20453 #
20454 # NORMs(in range) can be stored out by a simple "fmov.s"
20455 # Unnormalized inputs can come through this point.
20456 #
20457 fout_sgl_exg:
20458 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20459 
20460 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20461 	fmov.l		&0x0,%fpsr		# clear FPSR
20462 
20463 	fmov.s		%fp0,%d0		# store does convert and round
20464 
20465 	fmov.l		&0x0,%fpcr		# clear FPCR
20466 	fmov.l		%fpsr,%d1		# save FPSR
20467 
20468 	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
20469 
20470 fout_sgl_exg_write:
20471 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20472 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20473 	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
20474 
20475 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20476 	bsr.l		_dmem_write_long	# write long
20477 
20478 	tst.l		%d1			# did dstore fail?
20479 	bne.l		facc_out_l		# yes
20480 
20481 	rts
20482 
20483 fout_sgl_exg_write_dn:
20484 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20485 	andi.w		&0x7,%d1
20486 	bsr.l		store_dreg_l
20487 	rts
20488 
20489 #
20490 # here, we know that the operand would UNFL if moved out to single prec,
20491 # so, denorm and round and then use generic store single routine to
20492 # write the value to memory.
20493 #
20494 fout_sgl_unfl:
20495 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20496 
20497 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20498 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20499 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20500 	mov.l		%a0,-(%sp)
20501 
20502 	clr.l		%d0			# pass: S.F. = 0
20503 
20504 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20505 	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
20506 
20507 	lea		FP_SCR0(%a6),%a0
20508 	bsr.l		norm			# normalize the DENORM
20509 
20510 fout_sgl_unfl_cont:
20511 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20512 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20513 	bsr.l		unf_res			# calc default underflow result
20514 
20515 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20516 	bsr.l		dst_sgl			# convert to single prec
20517 
20518 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20519 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20520 	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
20521 
20522 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20523 	bsr.l		_dmem_write_long	# write long
20524 
20525 	tst.l		%d1			# did dstore fail?
20526 	bne.l		facc_out_l		# yes
20527 
20528 	bra.b		fout_sgl_unfl_chkexc
20529 
20530 fout_sgl_unfl_dn:
20531 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20532 	andi.w		&0x7,%d1
20533 	bsr.l		store_dreg_l
20534 
20535 fout_sgl_unfl_chkexc:
20536 	mov.b		FPCR_ENABLE(%a6),%d1
20537 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20538 	bne.w		fout_sd_exc_unfl	# yes
20539 	addq.l		&0x4,%sp
20540 	rts
20541 
20542 #
20543 # it's definitely an overflow so call ovf_res to get the correct answer
20544 #
20545 fout_sgl_ovfl:
20546 	tst.b		3+SRC_HI(%a0)		# is result inexact?
20547 	bne.b		fout_sgl_ovfl_inex2
20548 	tst.l		SRC_LO(%a0)		# is result inexact?
20549 	bne.b		fout_sgl_ovfl_inex2
20550 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20551 	bra.b		fout_sgl_ovfl_cont
20552 fout_sgl_ovfl_inex2:
20553 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20554 
20555 fout_sgl_ovfl_cont:
20556 	mov.l		%a0,-(%sp)
20557 
20558 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20559 # overflow result. DON'T save the returned ccodes from ovf_res() since
20560 # fmove out doesn't alter them.
20561 	tst.b		SRC_EX(%a0)		# is operand negative?
20562 	smi		%d1			# set if so
20563 	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
20564 	bsr.l		ovf_res			# calc OVFL result
20565 	fmovm.x		(%a0),&0x80		# load default overflow result
20566 	fmov.s		%fp0,%d0		# store to single
20567 
20568 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20569 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20570 	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
20571 
20572 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20573 	bsr.l		_dmem_write_long	# write long
20574 
20575 	tst.l		%d1			# did dstore fail?
20576 	bne.l		facc_out_l		# yes
20577 
20578 	bra.b		fout_sgl_ovfl_chkexc
20579 
20580 fout_sgl_ovfl_dn:
20581 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20582 	andi.w		&0x7,%d1
20583 	bsr.l		store_dreg_l
20584 
20585 fout_sgl_ovfl_chkexc:
20586 	mov.b		FPCR_ENABLE(%a6),%d1
20587 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20588 	bne.w		fout_sd_exc_ovfl	# yes
20589 	addq.l		&0x4,%sp
20590 	rts
20591 
20592 #
20593 # move out MAY overflow:
20594 # (1) force the exp to 0x3fff
20595 # (2) do a move w/ appropriate rnd mode
20596 # (3) if exp still equals zero, then insert original exponent
20597 #	for the correct result.
20598 #     if exp now equals one, then it overflowed so call ovf_res.
20599 #
20600 fout_sgl_may_ovfl:
20601 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20602 	andi.w		&0x8000,%d1		# keep it,clear exp
20603 	ori.w		&0x3fff,%d1		# insert exp = 0
20604 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20605 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20606 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20607 
20608 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20609 
20610 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20611 	fmov.l		&0x0,%fpcr		# clear FPCR
20612 
20613 	fabs.x		%fp0			# need absolute value
20614 	fcmp.b		%fp0,&0x2		# did exponent increase?
20615 	fblt.w		fout_sgl_exg		# no; go finish NORM
20616 	bra.w		fout_sgl_ovfl		# yes; go handle overflow
20617 
20618 ################
20619 
20620 fout_sd_exc_unfl:
20621 	mov.l		(%sp)+,%a0
20622 
20623 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20624 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20625 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20626 
20627 	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
20628 	bne.b		fout_sd_exc_cont	# no
20629 
20630 	lea		FP_SCR0(%a6),%a0
20631 	bsr.l		norm
20632 	neg.l		%d0
20633 	andi.w		&0x7fff,%d0
20634 	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
20635 	bra.b		fout_sd_exc_cont
20636 
20637 fout_sd_exc:
20638 fout_sd_exc_ovfl:
20639 	mov.l		(%sp)+,%a0		# restore a0
20640 
20641 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20642 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20643 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20644 
20645 fout_sd_exc_cont:
20646 	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
20647 	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
20648 	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
20649 
20650 	mov.b		3+L_SCR3(%a6),%d1
20651 	lsr.b		&0x4,%d1
20652 	andi.w		&0x0c,%d1
20653 	swap		%d1
20654 	mov.b		3+L_SCR3(%a6),%d1
20655 	lsr.b		&0x4,%d1
20656 	andi.w		&0x03,%d1
20657 	clr.l		%d0			# pass: zero g,r,s
20658 	bsr.l		_round			# round the DENORM
20659 
20660 	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
20661 	beq.b		fout_sd_exc_done	# no
20662 	bset		&0x7,FP_SCR0_EX(%a6)	# yes
20663 
20664 fout_sd_exc_done:
20665 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20666 	rts
20667 
20668 #################################################################
20669 # fmove.d out ###################################################
20670 #################################################################
20671 fout_dbl:
20672 	andi.b		&0x30,%d0		# clear rnd prec
20673 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
20674 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20675 
20676 #
20677 # operand is a normalized number. first, we check to see if the move out
20678 # would cause either an underflow or overflow. these cases are handled
20679 # separately. otherwise, set the FPCR to the proper rounding mode and
20680 # execute the move.
20681 #
20682 	mov.w		SRC_EX(%a0),%d0		# extract exponent
20683 	andi.w		&0x7fff,%d0		# strip sign
20684 
20685 	cmpi.w		%d0,&DBL_HI		# will operand overflow?
20686 	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
20687 	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
20688 	cmpi.w		%d0,&DBL_LO		# will operand underflow?
20689 	blt.w		fout_dbl_unfl		# yes; go handle underflow
20690 
20691 #
20692 # NORMs(in range) can be stored out by a simple "fmov.d"
20693 # Unnormalized inputs can come through this point.
20694 #
20695 fout_dbl_exg:
20696 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20697 
20698 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20699 	fmov.l		&0x0,%fpsr		# clear FPSR
20700 
20701 	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
20702 
20703 	fmov.l		&0x0,%fpcr		# clear FPCR
20704 	fmov.l		%fpsr,%d0		# save FPSR
20705 
20706 	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
20707 
20708 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20709 	lea		L_SCR1(%a6),%a0		# pass: src addr
20710 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20711 	bsr.l		_dmem_write		# store dbl fop to memory
20712 
20713 	tst.l		%d1			# did dstore fail?
20714 	bne.l		facc_out_d		# yes
20715 
20716 	rts					# no; so we're finished
20717 
20718 #
20719 # here, we know that the operand would UNFL if moved out to double prec,
20720 # so, denorm and round and then use generic store double routine to
20721 # write the value to memory.
20722 #
20723 fout_dbl_unfl:
20724 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20725 
20726 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20727 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20728 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20729 	mov.l		%a0,-(%sp)
20730 
20731 	clr.l		%d0			# pass: S.F. = 0
20732 
20733 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20734 	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
20735 
20736 	lea		FP_SCR0(%a6),%a0
20737 	bsr.l		norm			# normalize the DENORM
20738 
20739 fout_dbl_unfl_cont:
20740 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20741 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20742 	bsr.l		unf_res			# calc default underflow result
20743 
20744 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20745 	bsr.l		dst_dbl			# convert to single prec
20746 	mov.l		%d0,L_SCR1(%a6)
20747 	mov.l		%d1,L_SCR2(%a6)
20748 
20749 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20750 	lea		L_SCR1(%a6),%a0		# pass: src addr
20751 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20752 	bsr.l		_dmem_write		# store dbl fop to memory
20753 
20754 	tst.l		%d1			# did dstore fail?
20755 	bne.l		facc_out_d		# yes
20756 
20757 	mov.b		FPCR_ENABLE(%a6),%d1
20758 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20759 	bne.w		fout_sd_exc_unfl	# yes
20760 	addq.l		&0x4,%sp
20761 	rts
20762 
20763 #
20764 # it's definitely an overflow so call ovf_res to get the correct answer
20765 #
20766 fout_dbl_ovfl:
20767 	mov.w		2+SRC_LO(%a0),%d0
20768 	andi.w		&0x7ff,%d0
20769 	bne.b		fout_dbl_ovfl_inex2
20770 
20771 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20772 	bra.b		fout_dbl_ovfl_cont
20773 fout_dbl_ovfl_inex2:
20774 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20775 
20776 fout_dbl_ovfl_cont:
20777 	mov.l		%a0,-(%sp)
20778 
20779 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20780 # overflow result. DON'T save the returned ccodes from ovf_res() since
20781 # fmove out doesn't alter them.
20782 	tst.b		SRC_EX(%a0)		# is operand negative?
20783 	smi		%d1			# set if so
20784 	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
20785 	bsr.l		ovf_res			# calc OVFL result
20786 	fmovm.x		(%a0),&0x80		# load default overflow result
20787 	fmov.d		%fp0,L_SCR1(%a6)	# store to double
20788 
20789 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20790 	lea		L_SCR1(%a6),%a0		# pass: src addr
20791 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20792 	bsr.l		_dmem_write		# store dbl fop to memory
20793 
20794 	tst.l		%d1			# did dstore fail?
20795 	bne.l		facc_out_d		# yes
20796 
20797 	mov.b		FPCR_ENABLE(%a6),%d1
20798 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20799 	bne.w		fout_sd_exc_ovfl	# yes
20800 	addq.l		&0x4,%sp
20801 	rts
20802 
20803 #
20804 # move out MAY overflow:
20805 # (1) force the exp to 0x3fff
20806 # (2) do a move w/ appropriate rnd mode
20807 # (3) if exp still equals zero, then insert original exponent
20808 #	for the correct result.
20809 #     if exp now equals one, then it overflowed so call ovf_res.
20810 #
20811 fout_dbl_may_ovfl:
20812 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20813 	andi.w		&0x8000,%d1		# keep it,clear exp
20814 	ori.w		&0x3fff,%d1		# insert exp = 0
20815 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20816 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20817 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20818 
20819 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20820 
20821 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20822 	fmov.l		&0x0,%fpcr		# clear FPCR
20823 
20824 	fabs.x		%fp0			# need absolute value
20825 	fcmp.b		%fp0,&0x2		# did exponent increase?
20826 	fblt.w		fout_dbl_exg		# no; go finish NORM
20827 	bra.w		fout_dbl_ovfl		# yes; go handle overflow
20828 
20829 #########################################################################
20830 # XDEF ****************************************************************	#
20831 #	dst_dbl(): create double precision value from extended prec.	#
20832 #									#
20833 # XREF ****************************************************************	#
20834 #	None								#
20835 #									#
20836 # INPUT ***************************************************************	#
20837 #	a0 = pointer to source operand in extended precision		#
20838 #									#
20839 # OUTPUT **************************************************************	#
20840 #	d0 = hi(double precision result)				#
20841 #	d1 = lo(double precision result)				#
20842 #									#
20843 # ALGORITHM ***********************************************************	#
20844 #									#
20845 #  Changes extended precision to double precision.			#
20846 #  Note: no attempt is made to round the extended value to double.	#
20847 #	dbl_sign = ext_sign						#
20848 #	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
20849 #	get rid of ext integer bit					#
20850 #	dbl_mant = ext_mant{62:12}					#
20851 #									#
20852 #		---------------   ---------------    ---------------	#
20853 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20854 #		---------------   ---------------    ---------------	#
20855 #		 95	    64    63 62	      32      31     11	  0	#
20856 #				     |			     |		#
20857 #				     |			     |		#
20858 #				     |			     |		#
20859 #			             v			     v		#
20860 #			      ---------------   ---------------		#
20861 #  double   ->		      |s|exp| mant  |   |  mant       |		#
20862 #			      ---------------   ---------------		#
20863 #			      63     51   32   31	       0	#
20864 #									#
20865 #########################################################################
20866 
20867 dst_dbl:
20868 	clr.l		%d0			# clear d0
20869 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20870 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20871 	addi.w		&DBL_BIAS,%d0		# add double precision bias
20872 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20873 	bmi.b		dst_get_dupper		# no
20874 	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
20875 dst_get_dupper:
20876 	swap		%d0			# d0 now in upper word
20877 	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
20878 	tst.b		FTEMP_EX(%a0)		# test sign
20879 	bpl.b		dst_get_dman		# if positive, go process mantissa
20880 	bset		&0x1f,%d0		# if negative, set sign
20881 dst_get_dman:
20882 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20883 	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
20884 	or.l		%d1,%d0			# put these bits in ms word of double
20885 	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
20886 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20887 	mov.l		&21,%d0			# load shift count
20888 	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
20889 	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
20890 	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
20891 	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
20892 	mov.l		L_SCR2(%a6),%d1
20893 	or.l		%d0,%d1			# put them in double result
20894 	mov.l		L_SCR1(%a6),%d0
20895 	rts
20896 
20897 #########################################################################
20898 # XDEF ****************************************************************	#
20899 #	dst_sgl(): create single precision value from extended prec	#
20900 #									#
20901 # XREF ****************************************************************	#
20902 #									#
20903 # INPUT ***************************************************************	#
20904 #	a0 = pointer to source operand in extended precision		#
20905 #									#
20906 # OUTPUT **************************************************************	#
20907 #	d0 = single precision result					#
20908 #									#
20909 # ALGORITHM ***********************************************************	#
20910 #									#
20911 # Changes extended precision to single precision.			#
20912 #	sgl_sign = ext_sign						#
20913 #	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
20914 #	get rid of ext integer bit					#
20915 #	sgl_mant = ext_mant{62:12}					#
20916 #									#
20917 #		---------------   ---------------    ---------------	#
20918 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20919 #		---------------   ---------------    ---------------	#
20920 #		 95	    64    63 62	   40 32      31     12	  0	#
20921 #				     |	   |				#
20922 #				     |	   |				#
20923 #				     |	   |				#
20924 #			             v     v				#
20925 #			      ---------------				#
20926 #  single   ->		      |s|exp| mant  |				#
20927 #			      ---------------				#
20928 #			      31     22     0				#
20929 #									#
20930 #########################################################################
20931 
20932 dst_sgl:
20933 	clr.l		%d0
20934 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20935 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20936 	addi.w		&SGL_BIAS,%d0		# add single precision bias
20937 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20938 	bmi.b		dst_get_supper		# no
20939 	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
20940 dst_get_supper:
20941 	swap		%d0			# put exp in upper word of d0
20942 	lsl.l		&0x7,%d0		# shift it into single exp bits
20943 	tst.b		FTEMP_EX(%a0)		# test sign
20944 	bpl.b		dst_get_sman		# if positive, continue
20945 	bset		&0x1f,%d0		# if negative, put in sign first
20946 dst_get_sman:
20947 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20948 	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
20949 	lsr.l		&0x8,%d1		# and put them flush right
20950 	or.l		%d1,%d0			# put these bits in ms word of single
20951 	rts
20952 
20953 ##############################################################################
20954 fout_pack:
20955 	bsr.l		_calc_ea_fout		# fetch the <ea>
20956 	mov.l		%a0,-(%sp)
20957 
20958 	mov.b		STAG(%a6),%d0		# fetch input type
20959 	bne.w		fout_pack_not_norm	# input is not NORM
20960 
20961 fout_pack_norm:
20962 	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
20963 	beq.b		fout_pack_s		# static
20964 
20965 fout_pack_d:
20966 	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
20967 	lsr.b		&0x4,%d1
20968 	andi.w		&0x7,%d1
20969 
20970 	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
20971 
20972 	bra.b		fout_pack_type
20973 fout_pack_s:
20974 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
20975 
20976 fout_pack_type:
20977 	bfexts		%d0{&25:&7},%d0		# extract k-factor
20978 	mov.l	%d0,-(%sp)
20979 
20980 	lea		FP_SRC(%a6),%a0		# pass: ptr to input
20981 
20982 # bindec is currently scrambling FP_SRC for denorm inputs.
20983 # we'll have to change this, but for now, tough luck!!!
20984 	bsr.l		bindec			# convert xprec to packed
20985 
20986 #	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
20987 	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
20988 
20989 	mov.l	(%sp)+,%d0
20990 
20991 	tst.b		3+FP_SCR0_EX(%a6)
20992 	bne.b		fout_pack_set
20993 	tst.l		FP_SCR0_HI(%a6)
20994 	bne.b		fout_pack_set
20995 	tst.l		FP_SCR0_LO(%a6)
20996 	bne.b		fout_pack_set
20997 
20998 # add the extra condition that only if the k-factor was zero, too, should
20999 # we zero the exponent
21000 	tst.l		%d0
21001 	bne.b		fout_pack_set
21002 # "mantissa" is all zero which means that the answer is zero. but, the '040
21003 # algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
21004 # if the mantissa is zero, I will zero the exponent, too.
21005 # the question now is whether the exponents sign bit is allowed to be non-zero
21006 # for a zero, also...
21007 	andi.w		&0xf000,FP_SCR0(%a6)
21008 
21009 fout_pack_set:
21010 
21011 	lea		FP_SCR0(%a6),%a0	# pass: src addr
21012 
21013 fout_pack_write:
21014 	mov.l		(%sp)+,%a1		# pass: dst addr
21015 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
21016 
21017 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
21018 	beq.b		fout_pack_a7
21019 
21020 	bsr.l		_dmem_write		# write ext prec number to memory
21021 
21022 	tst.l		%d1			# did dstore fail?
21023 	bne.w		fout_ext_err		# yes
21024 
21025 	rts
21026 
21027 # we don't want to do the write if the exception occurred in supervisor mode
21028 # so _mem_write2() handles this for us.
21029 fout_pack_a7:
21030 	bsr.l		_mem_write2		# write ext prec number to memory
21031 
21032 	tst.l		%d1			# did dstore fail?
21033 	bne.w		fout_ext_err		# yes
21034 
21035 	rts
21036 
21037 fout_pack_not_norm:
21038 	cmpi.b		%d0,&DENORM		# is it a DENORM?
21039 	beq.w		fout_pack_norm		# yes
21040 	lea		FP_SRC(%a6),%a0
21041 	clr.w		2+FP_SRC_EX(%a6)
21042 	cmpi.b		%d0,&SNAN		# is it an SNAN?
21043 	beq.b		fout_pack_snan		# yes
21044 	bra.b		fout_pack_write		# no
21045 
21046 fout_pack_snan:
21047 	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21048 	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
21049 	bra.b		fout_pack_write
21050 
21051 #########################################################################
21052 # XDEF ****************************************************************	#
21053 #	fetch_dreg(): fetch register according to index in d1		#
21054 #									#
21055 # XREF ****************************************************************	#
21056 #	None								#
21057 #									#
21058 # INPUT ***************************************************************	#
21059 #	d1 = index of register to fetch from				#
21060 #									#
21061 # OUTPUT **************************************************************	#
21062 #	d0 = value of register fetched					#
21063 #									#
21064 # ALGORITHM ***********************************************************	#
21065 #	According to the index value in d1 which can range from zero	#
21066 # to fifteen, load the corresponding register file value (where		#
21067 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
21068 # stack. The rest should still be in their original places.		#
21069 #									#
21070 #########################################################################
21071 
21072 # this routine leaves d1 intact for subsequent store_dreg calls.
21073 	global		fetch_dreg
21074 fetch_dreg:
21075 	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
21076 	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
21077 
21078 tbl_fdreg:
21079 	short		fdreg0 - tbl_fdreg
21080 	short		fdreg1 - tbl_fdreg
21081 	short		fdreg2 - tbl_fdreg
21082 	short		fdreg3 - tbl_fdreg
21083 	short		fdreg4 - tbl_fdreg
21084 	short		fdreg5 - tbl_fdreg
21085 	short		fdreg6 - tbl_fdreg
21086 	short		fdreg7 - tbl_fdreg
21087 	short		fdreg8 - tbl_fdreg
21088 	short		fdreg9 - tbl_fdreg
21089 	short		fdrega - tbl_fdreg
21090 	short		fdregb - tbl_fdreg
21091 	short		fdregc - tbl_fdreg
21092 	short		fdregd - tbl_fdreg
21093 	short		fdrege - tbl_fdreg
21094 	short		fdregf - tbl_fdreg
21095 
21096 fdreg0:
21097 	mov.l		EXC_DREGS+0x0(%a6),%d0
21098 	rts
21099 fdreg1:
21100 	mov.l		EXC_DREGS+0x4(%a6),%d0
21101 	rts
21102 fdreg2:
21103 	mov.l		%d2,%d0
21104 	rts
21105 fdreg3:
21106 	mov.l		%d3,%d0
21107 	rts
21108 fdreg4:
21109 	mov.l		%d4,%d0
21110 	rts
21111 fdreg5:
21112 	mov.l		%d5,%d0
21113 	rts
21114 fdreg6:
21115 	mov.l		%d6,%d0
21116 	rts
21117 fdreg7:
21118 	mov.l		%d7,%d0
21119 	rts
21120 fdreg8:
21121 	mov.l		EXC_DREGS+0x8(%a6),%d0
21122 	rts
21123 fdreg9:
21124 	mov.l		EXC_DREGS+0xc(%a6),%d0
21125 	rts
21126 fdrega:
21127 	mov.l		%a2,%d0
21128 	rts
21129 fdregb:
21130 	mov.l		%a3,%d0
21131 	rts
21132 fdregc:
21133 	mov.l		%a4,%d0
21134 	rts
21135 fdregd:
21136 	mov.l		%a5,%d0
21137 	rts
21138 fdrege:
21139 	mov.l		(%a6),%d0
21140 	rts
21141 fdregf:
21142 	mov.l		EXC_A7(%a6),%d0
21143 	rts
21144 
21145 #########################################################################
21146 # XDEF ****************************************************************	#
21147 #	store_dreg_l(): store longword to data register specified by d1	#
21148 #									#
21149 # XREF ****************************************************************	#
21150 #	None								#
21151 #									#
21152 # INPUT ***************************************************************	#
21153 #	d0 = longowrd value to store					#
21154 #	d1 = index of register to fetch from				#
21155 #									#
21156 # OUTPUT **************************************************************	#
21157 #	(data register is updated)					#
21158 #									#
21159 # ALGORITHM ***********************************************************	#
21160 #	According to the index value in d1, store the longword value	#
21161 # in d0 to the corresponding data register. D0/D1 are on the stack	#
21162 # while the rest are in their initial places.				#
21163 #									#
21164 #########################################################################
21165 
21166 	global		store_dreg_l
21167 store_dreg_l:
21168 	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
21169 	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
21170 
21171 tbl_sdregl:
21172 	short		sdregl0 - tbl_sdregl
21173 	short		sdregl1 - tbl_sdregl
21174 	short		sdregl2 - tbl_sdregl
21175 	short		sdregl3 - tbl_sdregl
21176 	short		sdregl4 - tbl_sdregl
21177 	short		sdregl5 - tbl_sdregl
21178 	short		sdregl6 - tbl_sdregl
21179 	short		sdregl7 - tbl_sdregl
21180 
21181 sdregl0:
21182 	mov.l		%d0,EXC_DREGS+0x0(%a6)
21183 	rts
21184 sdregl1:
21185 	mov.l		%d0,EXC_DREGS+0x4(%a6)
21186 	rts
21187 sdregl2:
21188 	mov.l		%d0,%d2
21189 	rts
21190 sdregl3:
21191 	mov.l		%d0,%d3
21192 	rts
21193 sdregl4:
21194 	mov.l		%d0,%d4
21195 	rts
21196 sdregl5:
21197 	mov.l		%d0,%d5
21198 	rts
21199 sdregl6:
21200 	mov.l		%d0,%d6
21201 	rts
21202 sdregl7:
21203 	mov.l		%d0,%d7
21204 	rts
21205 
21206 #########################################################################
21207 # XDEF ****************************************************************	#
21208 #	store_dreg_w(): store word to data register specified by d1	#
21209 #									#
21210 # XREF ****************************************************************	#
21211 #	None								#
21212 #									#
21213 # INPUT ***************************************************************	#
21214 #	d0 = word value to store					#
21215 #	d1 = index of register to fetch from				#
21216 #									#
21217 # OUTPUT **************************************************************	#
21218 #	(data register is updated)					#
21219 #									#
21220 # ALGORITHM ***********************************************************	#
21221 #	According to the index value in d1, store the word value	#
21222 # in d0 to the corresponding data register. D0/D1 are on the stack	#
21223 # while the rest are in their initial places.				#
21224 #									#
21225 #########################################################################
21226 
21227 	global		store_dreg_w
21228 store_dreg_w:
21229 	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
21230 	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
21231 
21232 tbl_sdregw:
21233 	short		sdregw0 - tbl_sdregw
21234 	short		sdregw1 - tbl_sdregw
21235 	short		sdregw2 - tbl_sdregw
21236 	short		sdregw3 - tbl_sdregw
21237 	short		sdregw4 - tbl_sdregw
21238 	short		sdregw5 - tbl_sdregw
21239 	short		sdregw6 - tbl_sdregw
21240 	short		sdregw7 - tbl_sdregw
21241 
21242 sdregw0:
21243 	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
21244 	rts
21245 sdregw1:
21246 	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
21247 	rts
21248 sdregw2:
21249 	mov.w		%d0,%d2
21250 	rts
21251 sdregw3:
21252 	mov.w		%d0,%d3
21253 	rts
21254 sdregw4:
21255 	mov.w		%d0,%d4
21256 	rts
21257 sdregw5:
21258 	mov.w		%d0,%d5
21259 	rts
21260 sdregw6:
21261 	mov.w		%d0,%d6
21262 	rts
21263 sdregw7:
21264 	mov.w		%d0,%d7
21265 	rts
21266 
21267 #########################################################################
21268 # XDEF ****************************************************************	#
21269 #	store_dreg_b(): store byte to data register specified by d1	#
21270 #									#
21271 # XREF ****************************************************************	#
21272 #	None								#
21273 #									#
21274 # INPUT ***************************************************************	#
21275 #	d0 = byte value to store					#
21276 #	d1 = index of register to fetch from				#
21277 #									#
21278 # OUTPUT **************************************************************	#
21279 #	(data register is updated)					#
21280 #									#
21281 # ALGORITHM ***********************************************************	#
21282 #	According to the index value in d1, store the byte value	#
21283 # in d0 to the corresponding data register. D0/D1 are on the stack	#
21284 # while the rest are in their initial places.				#
21285 #									#
21286 #########################################################################
21287 
21288 	global		store_dreg_b
21289 store_dreg_b:
21290 	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
21291 	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
21292 
21293 tbl_sdregb:
21294 	short		sdregb0 - tbl_sdregb
21295 	short		sdregb1 - tbl_sdregb
21296 	short		sdregb2 - tbl_sdregb
21297 	short		sdregb3 - tbl_sdregb
21298 	short		sdregb4 - tbl_sdregb
21299 	short		sdregb5 - tbl_sdregb
21300 	short		sdregb6 - tbl_sdregb
21301 	short		sdregb7 - tbl_sdregb
21302 
21303 sdregb0:
21304 	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
21305 	rts
21306 sdregb1:
21307 	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
21308 	rts
21309 sdregb2:
21310 	mov.b		%d0,%d2
21311 	rts
21312 sdregb3:
21313 	mov.b		%d0,%d3
21314 	rts
21315 sdregb4:
21316 	mov.b		%d0,%d4
21317 	rts
21318 sdregb5:
21319 	mov.b		%d0,%d5
21320 	rts
21321 sdregb6:
21322 	mov.b		%d0,%d6
21323 	rts
21324 sdregb7:
21325 	mov.b		%d0,%d7
21326 	rts
21327 
21328 #########################################################################
21329 # XDEF ****************************************************************	#
21330 #	inc_areg(): increment an address register by the value in d0	#
21331 #									#
21332 # XREF ****************************************************************	#
21333 #	None								#
21334 #									#
21335 # INPUT ***************************************************************	#
21336 #	d0 = amount to increment by					#
21337 #	d1 = index of address register to increment			#
21338 #									#
21339 # OUTPUT **************************************************************	#
21340 #	(address register is updated)					#
21341 #									#
21342 # ALGORITHM ***********************************************************	#
21343 #	Typically used for an instruction w/ a post-increment <ea>,	#
21344 # this routine adds the increment value in d0 to the address register	#
21345 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21346 # in their original places.						#
21347 #	For a7, if the increment amount is one, then we have to		#
21348 # increment by two. For any a7 update, set the mia7_flag so that if	#
21349 # an access error exception occurs later in emulation, this address	#
21350 # register update can be undone.					#
21351 #									#
21352 #########################################################################
21353 
21354 	global		inc_areg
21355 inc_areg:
21356 	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
21357 	jmp		(tbl_iareg.b,%pc,%d1.w*1)
21358 
21359 tbl_iareg:
21360 	short		iareg0 - tbl_iareg
21361 	short		iareg1 - tbl_iareg
21362 	short		iareg2 - tbl_iareg
21363 	short		iareg3 - tbl_iareg
21364 	short		iareg4 - tbl_iareg
21365 	short		iareg5 - tbl_iareg
21366 	short		iareg6 - tbl_iareg
21367 	short		iareg7 - tbl_iareg
21368 
21369 iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
21370 	rts
21371 iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
21372 	rts
21373 iareg2:	add.l		%d0,%a2
21374 	rts
21375 iareg3:	add.l		%d0,%a3
21376 	rts
21377 iareg4:	add.l		%d0,%a4
21378 	rts
21379 iareg5:	add.l		%d0,%a5
21380 	rts
21381 iareg6:	add.l		%d0,(%a6)
21382 	rts
21383 iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
21384 	cmpi.b		%d0,&0x1
21385 	beq.b		iareg7b
21386 	add.l		%d0,EXC_A7(%a6)
21387 	rts
21388 iareg7b:
21389 	addq.l		&0x2,EXC_A7(%a6)
21390 	rts
21391 
21392 #########################################################################
21393 # XDEF ****************************************************************	#
21394 #	dec_areg(): decrement an address register by the value in d0	#
21395 #									#
21396 # XREF ****************************************************************	#
21397 #	None								#
21398 #									#
21399 # INPUT ***************************************************************	#
21400 #	d0 = amount to decrement by					#
21401 #	d1 = index of address register to decrement			#
21402 #									#
21403 # OUTPUT **************************************************************	#
21404 #	(address register is updated)					#
21405 #									#
21406 # ALGORITHM ***********************************************************	#
21407 #	Typically used for an instruction w/ a pre-decrement <ea>,	#
21408 # this routine adds the decrement value in d0 to the address register	#
21409 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21410 # in their original places.						#
21411 #	For a7, if the decrement amount is one, then we have to		#
21412 # decrement by two. For any a7 update, set the mda7_flag so that if	#
21413 # an access error exception occurs later in emulation, this address	#
21414 # register update can be undone.					#
21415 #									#
21416 #########################################################################
21417 
21418 	global		dec_areg
21419 dec_areg:
21420 	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
21421 	jmp		(tbl_dareg.b,%pc,%d1.w*1)
21422 
21423 tbl_dareg:
21424 	short		dareg0 - tbl_dareg
21425 	short		dareg1 - tbl_dareg
21426 	short		dareg2 - tbl_dareg
21427 	short		dareg3 - tbl_dareg
21428 	short		dareg4 - tbl_dareg
21429 	short		dareg5 - tbl_dareg
21430 	short		dareg6 - tbl_dareg
21431 	short		dareg7 - tbl_dareg
21432 
21433 dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
21434 	rts
21435 dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
21436 	rts
21437 dareg2:	sub.l		%d0,%a2
21438 	rts
21439 dareg3:	sub.l		%d0,%a3
21440 	rts
21441 dareg4:	sub.l		%d0,%a4
21442 	rts
21443 dareg5:	sub.l		%d0,%a5
21444 	rts
21445 dareg6:	sub.l		%d0,(%a6)
21446 	rts
21447 dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
21448 	cmpi.b		%d0,&0x1
21449 	beq.b		dareg7b
21450 	sub.l		%d0,EXC_A7(%a6)
21451 	rts
21452 dareg7b:
21453 	subq.l		&0x2,EXC_A7(%a6)
21454 	rts
21455 
21456 ##############################################################################
21457 
21458 #########################################################################
21459 # XDEF ****************************************************************	#
21460 #	load_fpn1(): load FP register value into FP_SRC(a6).		#
21461 #									#
21462 # XREF ****************************************************************	#
21463 #	None								#
21464 #									#
21465 # INPUT ***************************************************************	#
21466 #	d0 = index of FP register to load				#
21467 #									#
21468 # OUTPUT **************************************************************	#
21469 #	FP_SRC(a6) = value loaded from FP register file			#
21470 #									#
21471 # ALGORITHM ***********************************************************	#
21472 #	Using the index in d0, load FP_SRC(a6) with a number from the	#
21473 # FP register file.							#
21474 #									#
21475 #########################################################################
21476 
21477 	global		load_fpn1
21478 load_fpn1:
21479 	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21480 	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
21481 
21482 tbl_load_fpn1:
21483 	short		load_fpn1_0 - tbl_load_fpn1
21484 	short		load_fpn1_1 - tbl_load_fpn1
21485 	short		load_fpn1_2 - tbl_load_fpn1
21486 	short		load_fpn1_3 - tbl_load_fpn1
21487 	short		load_fpn1_4 - tbl_load_fpn1
21488 	short		load_fpn1_5 - tbl_load_fpn1
21489 	short		load_fpn1_6 - tbl_load_fpn1
21490 	short		load_fpn1_7 - tbl_load_fpn1
21491 
21492 load_fpn1_0:
21493 	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21494 	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21495 	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21496 	lea		FP_SRC(%a6), %a0
21497 	rts
21498 load_fpn1_1:
21499 	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21500 	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21501 	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21502 	lea		FP_SRC(%a6), %a0
21503 	rts
21504 load_fpn1_2:
21505 	fmovm.x		&0x20, FP_SRC(%a6)
21506 	lea		FP_SRC(%a6), %a0
21507 	rts
21508 load_fpn1_3:
21509 	fmovm.x		&0x10, FP_SRC(%a6)
21510 	lea		FP_SRC(%a6), %a0
21511 	rts
21512 load_fpn1_4:
21513 	fmovm.x		&0x08, FP_SRC(%a6)
21514 	lea		FP_SRC(%a6), %a0
21515 	rts
21516 load_fpn1_5:
21517 	fmovm.x		&0x04, FP_SRC(%a6)
21518 	lea		FP_SRC(%a6), %a0
21519 	rts
21520 load_fpn1_6:
21521 	fmovm.x		&0x02, FP_SRC(%a6)
21522 	lea		FP_SRC(%a6), %a0
21523 	rts
21524 load_fpn1_7:
21525 	fmovm.x		&0x01, FP_SRC(%a6)
21526 	lea		FP_SRC(%a6), %a0
21527 	rts
21528 
21529 #############################################################################
21530 
21531 #########################################################################
21532 # XDEF ****************************************************************	#
21533 #	load_fpn2(): load FP register value into FP_DST(a6).		#
21534 #									#
21535 # XREF ****************************************************************	#
21536 #	None								#
21537 #									#
21538 # INPUT ***************************************************************	#
21539 #	d0 = index of FP register to load				#
21540 #									#
21541 # OUTPUT **************************************************************	#
21542 #	FP_DST(a6) = value loaded from FP register file			#
21543 #									#
21544 # ALGORITHM ***********************************************************	#
21545 #	Using the index in d0, load FP_DST(a6) with a number from the	#
21546 # FP register file.							#
21547 #									#
21548 #########################################################################
21549 
21550 	global		load_fpn2
21551 load_fpn2:
21552 	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21553 	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
21554 
21555 tbl_load_fpn2:
21556 	short		load_fpn2_0 - tbl_load_fpn2
21557 	short		load_fpn2_1 - tbl_load_fpn2
21558 	short		load_fpn2_2 - tbl_load_fpn2
21559 	short		load_fpn2_3 - tbl_load_fpn2
21560 	short		load_fpn2_4 - tbl_load_fpn2
21561 	short		load_fpn2_5 - tbl_load_fpn2
21562 	short		load_fpn2_6 - tbl_load_fpn2
21563 	short		load_fpn2_7 - tbl_load_fpn2
21564 
21565 load_fpn2_0:
21566 	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
21567 	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
21568 	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
21569 	lea		FP_DST(%a6), %a0
21570 	rts
21571 load_fpn2_1:
21572 	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
21573 	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
21574 	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
21575 	lea		FP_DST(%a6), %a0
21576 	rts
21577 load_fpn2_2:
21578 	fmovm.x		&0x20, FP_DST(%a6)
21579 	lea		FP_DST(%a6), %a0
21580 	rts
21581 load_fpn2_3:
21582 	fmovm.x		&0x10, FP_DST(%a6)
21583 	lea		FP_DST(%a6), %a0
21584 	rts
21585 load_fpn2_4:
21586 	fmovm.x		&0x08, FP_DST(%a6)
21587 	lea		FP_DST(%a6), %a0
21588 	rts
21589 load_fpn2_5:
21590 	fmovm.x		&0x04, FP_DST(%a6)
21591 	lea		FP_DST(%a6), %a0
21592 	rts
21593 load_fpn2_6:
21594 	fmovm.x		&0x02, FP_DST(%a6)
21595 	lea		FP_DST(%a6), %a0
21596 	rts
21597 load_fpn2_7:
21598 	fmovm.x		&0x01, FP_DST(%a6)
21599 	lea		FP_DST(%a6), %a0
21600 	rts
21601 
21602 #############################################################################
21603 
21604 #########################################################################
21605 # XDEF ****************************************************************	#
21606 #	store_fpreg(): store an fp value to the fpreg designated d0.	#
21607 #									#
21608 # XREF ****************************************************************	#
21609 #	None								#
21610 #									#
21611 # INPUT ***************************************************************	#
21612 #	fp0 = extended precision value to store				#
21613 #	d0  = index of floating-point register				#
21614 #									#
21615 # OUTPUT **************************************************************	#
21616 #	None								#
21617 #									#
21618 # ALGORITHM ***********************************************************	#
21619 #	Store the value in fp0 to the FP register designated by the	#
21620 # value in d0. The FP number can be DENORM or SNAN so we have to be	#
21621 # careful that we don't take an exception here.				#
21622 #									#
21623 #########################################################################
21624 
21625 	global		store_fpreg
21626 store_fpreg:
21627 	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21628 	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
21629 
21630 tbl_store_fpreg:
21631 	short		store_fpreg_0 - tbl_store_fpreg
21632 	short		store_fpreg_1 - tbl_store_fpreg
21633 	short		store_fpreg_2 - tbl_store_fpreg
21634 	short		store_fpreg_3 - tbl_store_fpreg
21635 	short		store_fpreg_4 - tbl_store_fpreg
21636 	short		store_fpreg_5 - tbl_store_fpreg
21637 	short		store_fpreg_6 - tbl_store_fpreg
21638 	short		store_fpreg_7 - tbl_store_fpreg
21639 
21640 store_fpreg_0:
21641 	fmovm.x		&0x80, EXC_FP0(%a6)
21642 	rts
21643 store_fpreg_1:
21644 	fmovm.x		&0x80, EXC_FP1(%a6)
21645 	rts
21646 store_fpreg_2:
21647 	fmovm.x		&0x01, -(%sp)
21648 	fmovm.x		(%sp)+, &0x20
21649 	rts
21650 store_fpreg_3:
21651 	fmovm.x		&0x01, -(%sp)
21652 	fmovm.x		(%sp)+, &0x10
21653 	rts
21654 store_fpreg_4:
21655 	fmovm.x		&0x01, -(%sp)
21656 	fmovm.x		(%sp)+, &0x08
21657 	rts
21658 store_fpreg_5:
21659 	fmovm.x		&0x01, -(%sp)
21660 	fmovm.x		(%sp)+, &0x04
21661 	rts
21662 store_fpreg_6:
21663 	fmovm.x		&0x01, -(%sp)
21664 	fmovm.x		(%sp)+, &0x02
21665 	rts
21666 store_fpreg_7:
21667 	fmovm.x		&0x01, -(%sp)
21668 	fmovm.x		(%sp)+, &0x01
21669 	rts
21670 
21671 #########################################################################
21672 # XDEF ****************************************************************	#
21673 #	_denorm(): denormalize an intermediate result			#
21674 #									#
21675 # XREF ****************************************************************	#
21676 #	None								#
21677 #									#
21678 # INPUT *************************************************************** #
21679 #	a0 = points to the operand to be denormalized			#
21680 #		(in the internal extended format)			#
21681 #									#
21682 #	d0 = rounding precision						#
21683 #									#
21684 # OUTPUT **************************************************************	#
21685 #	a0 = pointer to the denormalized result				#
21686 #		(in the internal extended format)			#
21687 #									#
21688 #	d0 = guard,round,sticky						#
21689 #									#
21690 # ALGORITHM ***********************************************************	#
21691 #	According to the exponent underflow threshold for the given	#
21692 # precision, shift the mantissa bits to the right in order raise the	#
21693 # exponent of the operand to the threshold value. While shifting the	#
21694 # mantissa bits right, maintain the value of the guard, round, and	#
21695 # sticky bits.								#
21696 # other notes:								#
21697 #	(1) _denorm() is called by the underflow routines		#
21698 #	(2) _denorm() does NOT affect the status register		#
21699 #									#
21700 #########################################################################
21701 
21702 #
21703 # table of exponent threshold values for each precision
21704 #
21705 tbl_thresh:
21706 	short		0x0
21707 	short		sgl_thresh
21708 	short		dbl_thresh
21709 
21710 	global		_denorm
21711 _denorm:
21712 #
21713 # Load the exponent threshold for the precision selected and check
21714 # to see if (threshold - exponent) is > 65 in which case we can
21715 # simply calculate the sticky bit and zero the mantissa. otherwise
21716 # we have to call the denormalization routine.
21717 #
21718 	lsr.b		&0x2, %d0		# shift prec to lo bits
21719 	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21720 	mov.w		%d1, %d0		# copy d1 into d0
21721 	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
21722 	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
21723 	bpl.b		denorm_set_stky		# yes; just calc sticky
21724 
21725 	clr.l		%d0			# clear g,r,s
21726 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21727 	beq.b		denorm_call		# no; don't change anything
21728 	bset		&29, %d0		# yes; set sticky bit
21729 
21730 denorm_call:
21731 	bsr.l		dnrm_lp			# denormalize the number
21732 	rts
21733 
21734 #
21735 # all bit would have been shifted off during the denorm so simply
21736 # calculate if the sticky should be set and clear the entire mantissa.
21737 #
21738 denorm_set_stky:
21739 	mov.l		&0x20000000, %d0	# set sticky bit in return value
21740 	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
21741 	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
21742 	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
21743 	rts
21744 
21745 #									#
21746 # dnrm_lp(): normalize exponent/mantissa to specified threshold		#
21747 #									#
21748 # INPUT:								#
21749 #	%a0	   : points to the operand to be denormalized		#
21750 #	%d0{31:29} : initial guard,round,sticky				#
21751 #	%d1{15:0}  : denormalization threshold				#
21752 # OUTPUT:								#
21753 #	%a0	   : points to the denormalized operand			#
21754 #	%d0{31:29} : final guard,round,sticky				#
21755 #									#
21756 
21757 # *** Local Equates *** #
21758 set	GRS,		L_SCR2			# g,r,s temp storage
21759 set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
21760 
21761 	global		dnrm_lp
21762 dnrm_lp:
21763 
21764 #
21765 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
21766 # in memory so as to make the bitfield extraction for denormalization easier.
21767 #
21768 	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21769 	mov.l		%d0, GRS(%a6)		# place g,r,s after it
21770 
21771 #
21772 # check to see how much less than the underflow threshold the operand
21773 # exponent is.
21774 #
21775 	mov.l		%d1, %d0		# copy the denorm threshold
21776 	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
21777 	ble.b		dnrm_no_lp		# d1 <= 0
21778 	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
21779 	blt.b		case_1			# yes
21780 	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
21781 	blt.b		case_2			# yes
21782 	bra.w		case_3			# (d1 >= 64)
21783 
21784 #
21785 # No normalization necessary
21786 #
21787 dnrm_no_lp:
21788 	mov.l		GRS(%a6), %d0		# restore original g,r,s
21789 	rts
21790 
21791 #
21792 # case (0<d1<32)
21793 #
21794 # %d0 = denorm threshold
21795 # %d1 = "n" = amt to shift
21796 #
21797 #	---------------------------------------------------------
21798 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21799 #	---------------------------------------------------------
21800 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21801 #	\	   \		      \			 \
21802 #	 \	    \		       \		  \
21803 #	  \	     \			\		   \
21804 #	   \	      \			 \		    \
21805 #	    \	       \		  \		     \
21806 #	     \		\		   \		      \
21807 #	      \		 \		    \		       \
21808 #	       \	  \		     \			\
21809 #	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21810 #	---------------------------------------------------------
21811 #	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
21812 #	---------------------------------------------------------
21813 #
21814 case_1:
21815 	mov.l		%d2, -(%sp)		# create temp storage
21816 
21817 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21818 	mov.l		&32, %d0
21819 	sub.w		%d1, %d0		# %d0 = 32 - %d1
21820 
21821 	cmpi.w		%d1, &29		# is shft amt >= 29
21822 	blt.b		case1_extract		# no; no fix needed
21823 	mov.b		GRS(%a6), %d2
21824 	or.b		%d2, 3+FTEMP_LO2(%a6)
21825 
21826 case1_extract:
21827 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21828 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21829 	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21830 
21831 	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
21832 	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
21833 
21834 	bftst		%d0{&2:&30}		# were bits shifted off?
21835 	beq.b		case1_sticky_clear	# no; go finish
21836 	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
21837 
21838 case1_sticky_clear:
21839 	and.l		&0xe0000000, %d0	# clear all but G,R,S
21840 	mov.l		(%sp)+, %d2		# restore temp register
21841 	rts
21842 
21843 #
21844 # case (32<=d1<64)
21845 #
21846 # %d0 = denorm threshold
21847 # %d1 = "n" = amt to shift
21848 #
21849 #	---------------------------------------------------------
21850 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21851 #	---------------------------------------------------------
21852 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21853 #	\	   \		      \
21854 #	 \	    \		       \
21855 #	  \	     \			-------------------
21856 #	   \	      --------------------		   \
21857 #	    -------------------		  \		    \
21858 #			       \	   \		     \
21859 #				\	    \		      \
21860 #				 \	     \		       \
21861 #	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21862 #	---------------------------------------------------------
21863 #	|0...............0|0....0| NEW_LO     |grs		|
21864 #	---------------------------------------------------------
21865 #
21866 case_2:
21867 	mov.l		%d2, -(%sp)		# create temp storage
21868 
21869 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21870 	subi.w		&0x20, %d1		# %d1 now between 0 and 32
21871 	mov.l		&0x20, %d0
21872 	sub.w		%d1, %d0		# %d0 = 32 - %d1
21873 
21874 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21875 # the number of bits to check for the sticky detect.
21876 # it only plays a role in shift amounts of 61-63.
21877 	mov.b		GRS(%a6), %d2
21878 	or.b		%d2, 3+FTEMP_LO2(%a6)
21879 
21880 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21881 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21882 
21883 	bftst		%d1{&2:&30}		# were any bits shifted off?
21884 	bne.b		case2_set_sticky	# yes; set sticky bit
21885 	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
21886 	bne.b		case2_set_sticky	# yes; set sticky bit
21887 
21888 	mov.l		%d1, %d0		# move new G,R,S to %d0
21889 	bra.b		case2_end
21890 
21891 case2_set_sticky:
21892 	mov.l		%d1, %d0		# move new G,R,S to %d0
21893 	bset		&rnd_stky_bit, %d0	# set sticky bit
21894 
21895 case2_end:
21896 	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
21897 	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
21898 	and.l		&0xe0000000, %d0	# clear all but G,R,S
21899 
21900 	mov.l		(%sp)+,%d2		# restore temp register
21901 	rts
21902 
21903 #
21904 # case (d1>=64)
21905 #
21906 # %d0 = denorm threshold
21907 # %d1 = amt to shift
21908 #
21909 case_3:
21910 	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
21911 
21912 	cmpi.w		%d1, &65		# is shift amt > 65?
21913 	blt.b		case3_64		# no; it's == 64
21914 	beq.b		case3_65		# no; it's == 65
21915 
21916 #
21917 # case (d1>65)
21918 #
21919 # Shift value is > 65 and out of range. All bits are shifted off.
21920 # Return a zero mantissa with the sticky bit set
21921 #
21922 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21923 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21924 	mov.l		&0x20000000, %d0	# set sticky bit
21925 	rts
21926 
21927 #
21928 # case (d1 == 64)
21929 #
21930 #	---------------------------------------------------------
21931 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21932 #	---------------------------------------------------------
21933 #	<-------(32)------>
21934 #	\		   \
21935 #	 \		    \
21936 #	  \		     \
21937 #	   \		      ------------------------------
21938 #	    -------------------------------		    \
21939 #					   \		     \
21940 #					    \		      \
21941 #					     \		       \
21942 #					      <-------(32)------>
21943 #	---------------------------------------------------------
21944 #	|0...............0|0................0|grs		|
21945 #	---------------------------------------------------------
21946 #
21947 case3_64:
21948 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21949 	mov.l		%d0, %d1		# make a copy
21950 	and.l		&0xc0000000, %d0	# extract G,R
21951 	and.l		&0x3fffffff, %d1	# extract other bits
21952 
21953 	bra.b		case3_complete
21954 
21955 #
21956 # case (d1 == 65)
21957 #
21958 #	---------------------------------------------------------
21959 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21960 #	---------------------------------------------------------
21961 #	<-------(32)------>
21962 #	\		   \
21963 #	 \		    \
21964 #	  \		     \
21965 #	   \		      ------------------------------
21966 #	    --------------------------------		    \
21967 #					    \		     \
21968 #					     \		      \
21969 #					      \		       \
21970 #					       <-------(31)----->
21971 #	---------------------------------------------------------
21972 #	|0...............0|0................0|0rs		|
21973 #	---------------------------------------------------------
21974 #
21975 case3_65:
21976 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21977 	and.l		&0x80000000, %d0	# extract R bit
21978 	lsr.l		&0x1, %d0		# shift high bit into R bit
21979 	and.l		&0x7fffffff, %d1	# extract other bits
21980 
21981 case3_complete:
21982 # last operation done was an "and" of the bits shifted off so the condition
21983 # codes are already set so branch accordingly.
21984 	bne.b		case3_set_sticky	# yes; go set new sticky
21985 	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
21986 	bne.b		case3_set_sticky	# yes; go set new sticky
21987 	tst.b		GRS(%a6)		# were any bits shifted off?
21988 	bne.b		case3_set_sticky	# yes; go set new sticky
21989 
21990 #
21991 # no bits were shifted off so don't set the sticky bit.
21992 # the guard and
21993 # the entire mantissa is zero.
21994 #
21995 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21996 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21997 	rts
21998 
21999 #
22000 # some bits were shifted off so set the sticky bit.
22001 # the entire mantissa is zero.
22002 #
22003 case3_set_sticky:
22004 	bset		&rnd_stky_bit,%d0	# set new sticky bit
22005 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
22006 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
22007 	rts
22008 
22009 #########################################################################
22010 # XDEF ****************************************************************	#
22011 #	_round(): round result according to precision/mode		#
22012 #									#
22013 # XREF ****************************************************************	#
22014 #	None								#
22015 #									#
22016 # INPUT ***************************************************************	#
22017 #	a0	  = ptr to input operand in internal extended format	#
22018 #	d1(hi)    = contains rounding precision:			#
22019 #			ext = $0000xxxx					#
22020 #			sgl = $0004xxxx					#
22021 #			dbl = $0008xxxx					#
22022 #	d1(lo)	  = contains rounding mode:				#
22023 #			RN  = $xxxx0000					#
22024 #			RZ  = $xxxx0001					#
22025 #			RM  = $xxxx0002					#
22026 #			RP  = $xxxx0003					#
22027 #	d0{31:29} = contains the g,r,s bits (extended)			#
22028 #									#
22029 # OUTPUT **************************************************************	#
22030 #	a0 = pointer to rounded result					#
22031 #									#
22032 # ALGORITHM ***********************************************************	#
22033 #	On return the value pointed to by a0 is correctly rounded,	#
22034 #	a0 is preserved and the g-r-s bits in d0 are cleared.		#
22035 #	The result is not typed - the tag field is invalid.  The	#
22036 #	result is still in the internal extended format.		#
22037 #									#
22038 #	The INEX bit of USER_FPSR will be set if the rounded result was	#
22039 #	inexact (i.e. if any of the g-r-s bits were set).		#
22040 #									#
22041 #########################################################################
22042 
22043 	global		_round
22044 _round:
22045 #
22046 # ext_grs() looks at the rounding precision and sets the appropriate
22047 # G,R,S bits.
22048 # If (G,R,S == 0) then result is exact and round is done, else set
22049 # the inex flag in status reg and continue.
22050 #
22051 	bsr.l		ext_grs			# extract G,R,S
22052 
22053 	tst.l		%d0			# are G,R,S zero?
22054 	beq.w		truncate		# yes; round is complete
22055 
22056 	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22057 
22058 #
22059 # Use rounding mode as an index into a jump table for these modes.
22060 # All of the following assumes grs != 0.
22061 #
22062 	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22063 	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
22064 
22065 tbl_mode:
22066 	short		rnd_near - tbl_mode
22067 	short		truncate - tbl_mode	# RZ always truncates
22068 	short		rnd_mnus - tbl_mode
22069 	short		rnd_plus - tbl_mode
22070 
22071 #################################################################
22072 #	ROUND PLUS INFINITY					#
22073 #								#
22074 #	If sign of fp number = 0 (positive), then add 1 to l.	#
22075 #################################################################
22076 rnd_plus:
22077 	tst.b		FTEMP_SGN(%a0)		# check for sign
22078 	bmi.w		truncate		# if positive then truncate
22079 
22080 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22081 	swap		%d1			# set up d1 for round prec.
22082 
22083 	cmpi.b		%d1, &s_mode		# is prec = sgl?
22084 	beq.w		add_sgl			# yes
22085 	bgt.w		add_dbl			# no; it's dbl
22086 	bra.w		add_ext			# no; it's ext
22087 
22088 #################################################################
22089 #	ROUND MINUS INFINITY					#
22090 #								#
22091 #	If sign of fp number = 1 (negative), then add 1 to l.	#
22092 #################################################################
22093 rnd_mnus:
22094 	tst.b		FTEMP_SGN(%a0)		# check for sign
22095 	bpl.w		truncate		# if negative then truncate
22096 
22097 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22098 	swap		%d1			# set up d1 for round prec.
22099 
22100 	cmpi.b		%d1, &s_mode		# is prec = sgl?
22101 	beq.w		add_sgl			# yes
22102 	bgt.w		add_dbl			# no; it's dbl
22103 	bra.w		add_ext			# no; it's ext
22104 
22105 #################################################################
22106 #	ROUND NEAREST						#
22107 #								#
22108 #	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
22109 #	Note that this will round to even in case of a tie.	#
22110 #################################################################
22111 rnd_near:
22112 	asl.l		&0x1, %d0		# shift g-bit to c-bit
22113 	bcc.w		truncate		# if (g=1) then
22114 
22115 	swap		%d1			# set up d1 for round prec.
22116 
22117 	cmpi.b		%d1, &s_mode		# is prec = sgl?
22118 	beq.w		add_sgl			# yes
22119 	bgt.w		add_dbl			# no; it's dbl
22120 	bra.w		add_ext			# no; it's ext
22121 
22122 # *** LOCAL EQUATES ***
22123 set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
22124 set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
22125 
22126 #########################
22127 #	ADD SINGLE	#
22128 #########################
22129 add_sgl:
22130 	add.l		&ad_1_sgl, FTEMP_HI(%a0)
22131 	bcc.b		scc_clr			# no mantissa overflow
22132 	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
22133 	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
22134 	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
22135 scc_clr:
22136 	tst.l		%d0			# test for rs = 0
22137 	bne.b		sgl_done
22138 	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22139 sgl_done:
22140 	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22141 	clr.l		FTEMP_LO(%a0)		# clear d2
22142 	rts
22143 
22144 #########################
22145 #	ADD EXTENDED	#
22146 #########################
22147 add_ext:
22148 	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
22149 	bcc.b		xcc_clr			# test for carry out
22150 	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
22151 	bcc.b		xcc_clr
22152 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22153 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22154 	roxr.w		FTEMP_LO(%a0)
22155 	roxr.w		FTEMP_LO+2(%a0)
22156 	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
22157 xcc_clr:
22158 	tst.l		%d0			# test rs = 0
22159 	bne.b		add_ext_done
22160 	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
22161 add_ext_done:
22162 	rts
22163 
22164 #########################
22165 #	ADD DOUBLE	#
22166 #########################
22167 add_dbl:
22168 	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22169 	bcc.b		dcc_clr			# no carry
22170 	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
22171 	bcc.b		dcc_clr			# no carry
22172 
22173 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22174 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22175 	roxr.w		FTEMP_LO(%a0)
22176 	roxr.w		FTEMP_LO+2(%a0)
22177 	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
22178 dcc_clr:
22179 	tst.l		%d0			# test for rs = 0
22180 	bne.b		dbl_done
22181 	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22182 
22183 dbl_done:
22184 	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22185 	rts
22186 
22187 ###########################
22188 # Truncate all other bits #
22189 ###########################
22190 truncate:
22191 	swap		%d1			# select rnd prec
22192 
22193 	cmpi.b		%d1, &s_mode		# is prec sgl?
22194 	beq.w		sgl_done		# yes
22195 	bgt.b		dbl_done		# no; it's dbl
22196 	rts					# no; it's ext
22197 
22198 
22199 #
22200 # ext_grs(): extract guard, round and sticky bits according to
22201 #	     rounding precision.
22202 #
22203 # INPUT
22204 #	d0	   = extended precision g,r,s (in d0{31:29})
22205 #	d1	   = {PREC,ROUND}
22206 # OUTPUT
22207 #	d0{31:29}  = guard, round, sticky
22208 #
22209 # The ext_grs extract the guard/round/sticky bits according to the
22210 # selected rounding precision. It is called by the round subroutine
22211 # only.  All registers except d0 are kept intact. d0 becomes an
22212 # updated guard,round,sticky in d0{31:29}
22213 #
22214 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22215 #	 prior to usage, and needs to restore d1 to original. this
22216 #	 routine is tightly tied to the round routine and not meant to
22217 #	 uphold standard subroutine calling practices.
22218 #
22219 
22220 ext_grs:
22221 	swap		%d1			# have d1.w point to round precision
22222 	tst.b		%d1			# is rnd prec = extended?
22223 	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
22224 
22225 #
22226 # %d0 actually already hold g,r,s since _round() had it before calling
22227 # this function. so, as long as we don't disturb it, we are "returning" it.
22228 #
22229 ext_grs_ext:
22230 	swap		%d1			# yes; return to correct positions
22231 	rts
22232 
22233 ext_grs_not_ext:
22234 	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
22235 
22236 	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
22237 	bne.b		ext_grs_dbl		# no; go handle dbl
22238 
22239 #
22240 # sgl:
22241 #	96		64	  40	32		0
22242 #	-----------------------------------------------------
22243 #	| EXP	|XXXXXXX|	  |xx	|		|grs|
22244 #	-----------------------------------------------------
22245 #			<--(24)--->nn\			   /
22246 #				   ee ---------------------
22247 #				   ww		|
22248 #						v
22249 #				   gr	   new sticky
22250 #
22251 ext_grs_sgl:
22252 	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22253 	mov.l		&30, %d2		# of the sgl prec. limits
22254 	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
22255 	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
22256 	and.l		&0x0000003f, %d2	# s bit is the or of all other
22257 	bne.b		ext_grs_st_stky		# bits to the right of g-r
22258 	tst.l		FTEMP_LO(%a0)		# test lower mantissa
22259 	bne.b		ext_grs_st_stky		# if any are set, set sticky
22260 	tst.l		%d0			# test original g,r,s
22261 	bne.b		ext_grs_st_stky		# if any are set, set sticky
22262 	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
22263 
22264 #
22265 # dbl:
22266 #	96		64		32	 11	0
22267 #	-----------------------------------------------------
22268 #	| EXP	|XXXXXXX|		|	 |xx	|grs|
22269 #	-----------------------------------------------------
22270 #						  nn\	    /
22271 #						  ee -------
22272 #						  ww	|
22273 #							v
22274 #						  gr	new sticky
22275 #
22276 ext_grs_dbl:
22277 	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22278 	mov.l		&30, %d2		# of the dbl prec. limits
22279 	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
22280 	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
22281 	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
22282 	bne.b		ext_grs_st_stky		# other bits to the right of g-r
22283 	tst.l		%d0			# test word original g,r,s
22284 	bne.b		ext_grs_st_stky		# if any are set, set sticky
22285 	bra.b		ext_grs_end_sd		# if clear, exit
22286 
22287 ext_grs_st_stky:
22288 	bset		&rnd_stky_bit, %d3	# set sticky bit
22289 ext_grs_end_sd:
22290 	mov.l		%d3, %d0		# return grs to d0
22291 
22292 	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
22293 
22294 	swap		%d1			# restore d1 to original
22295 	rts
22296 
22297 #########################################################################
22298 # norm(): normalize the mantissa of an extended precision input. the	#
22299 #	  input operand should not be normalized already.		#
22300 #									#
22301 # XDEF ****************************************************************	#
22302 #	norm()								#
22303 #									#
22304 # XREF **************************************************************** #
22305 #	none								#
22306 #									#
22307 # INPUT *************************************************************** #
22308 #	a0 = pointer fp extended precision operand to normalize		#
22309 #									#
22310 # OUTPUT ************************************************************** #
22311 #	d0 = number of bit positions the mantissa was shifted		#
22312 #	a0 = the input operand's mantissa is normalized; the exponent	#
22313 #	     is unchanged.						#
22314 #									#
22315 #########################################################################
22316 	global		norm
22317 norm:
22318 	mov.l		%d2, -(%sp)		# create some temp regs
22319 	mov.l		%d3, -(%sp)
22320 
22321 	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
22322 	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
22323 
22324 	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
22325 	beq.b		norm_lo			# hi(man) is all zeroes!
22326 
22327 norm_hi:
22328 	lsl.l		%d2, %d0		# left shift hi(man)
22329 	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
22330 
22331 	or.l		%d3, %d0		# create hi(man)
22332 	lsl.l		%d2, %d1		# create lo(man)
22333 
22334 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22335 	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
22336 
22337 	mov.l		%d2, %d0		# return shift amount
22338 
22339 	mov.l		(%sp)+, %d3		# restore temp regs
22340 	mov.l		(%sp)+, %d2
22341 
22342 	rts
22343 
22344 norm_lo:
22345 	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
22346 	lsl.l		%d2, %d1		# shift lo(man)
22347 	add.l		&32, %d2		# add 32 to shft amount
22348 
22349 	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
22350 	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
22351 
22352 	mov.l		%d2, %d0		# return shift amount
22353 
22354 	mov.l		(%sp)+, %d3		# restore temp regs
22355 	mov.l		(%sp)+, %d2
22356 
22357 	rts
22358 
22359 #########################################################################
22360 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
22361 #		- returns corresponding optype tag			#
22362 #									#
22363 # XDEF ****************************************************************	#
22364 #	unnorm_fix()							#
22365 #									#
22366 # XREF **************************************************************** #
22367 #	norm() - normalize the mantissa					#
22368 #									#
22369 # INPUT *************************************************************** #
22370 #	a0 = pointer to unnormalized extended precision number		#
22371 #									#
22372 # OUTPUT ************************************************************** #
22373 #	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
22374 #	a0 = input operand has been converted to a norm, denorm, or	#
22375 #	     zero; both the exponent and mantissa are changed.		#
22376 #									#
22377 #########################################################################
22378 
22379 	global		unnorm_fix
22380 unnorm_fix:
22381 	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22382 	bne.b		unnorm_shift		# hi(man) is not all zeroes
22383 
22384 #
22385 # hi(man) is all zeroes so see if any bits in lo(man) are set
22386 #
22387 unnorm_chk_lo:
22388 	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22389 	beq.w		unnorm_zero		# yes
22390 
22391 	add.w		&32, %d0		# no; fix shift distance
22392 
22393 #
22394 # d0 = # shifts needed for complete normalization
22395 #
22396 unnorm_shift:
22397 	clr.l		%d1			# clear top word
22398 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22399 	and.w		&0x7fff, %d1		# strip off sgn
22400 
22401 	cmp.w		%d0, %d1		# will denorm push exp < 0?
22402 	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
22403 
22404 #
22405 # exponent would not go < 0. Therefore, number stays normalized
22406 #
22407 	sub.w		%d0, %d1		# shift exponent value
22408 	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
22409 	and.w		&0x8000, %d0		# save old sign
22410 	or.w		%d0, %d1		# {sgn,new exp}
22411 	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
22412 
22413 	bsr.l		norm			# normalize UNNORM
22414 
22415 	mov.b		&NORM, %d0		# return new optype tag
22416 	rts
22417 
22418 #
22419 # exponent would go < 0, so only denormalize until exp = 0
22420 #
22421 unnorm_nrm_zero:
22422 	cmp.b		%d1, &32		# is exp <= 32?
22423 	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
22424 
22425 	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22426 	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
22427 
22428 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22429 	lsl.l		%d1, %d0		# extract new lo(man)
22430 	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
22431 
22432 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22433 
22434 	mov.b		&DENORM, %d0		# return new optype tag
22435 	rts
22436 
22437 #
22438 # only mantissa bits set are in lo(man)
22439 #
22440 unnorm_nrm_zero_lrg:
22441 	sub.w		&32, %d1		# adjust shft amt by 32
22442 
22443 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22444 	lsl.l		%d1, %d0		# left shift lo(man)
22445 
22446 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22447 	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
22448 
22449 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22450 
22451 	mov.b		&DENORM, %d0		# return new optype tag
22452 	rts
22453 
22454 #
22455 # whole mantissa is zero so this UNNORM is actually a zero
22456 #
22457 unnorm_zero:
22458 	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
22459 
22460 	mov.b		&ZERO, %d0		# fix optype tag
22461 	rts
22462 
22463 #########################################################################
22464 # XDEF ****************************************************************	#
22465 #	set_tag_x(): return the optype of the input ext fp number	#
22466 #									#
22467 # XREF ****************************************************************	#
22468 #	None								#
22469 #									#
22470 # INPUT ***************************************************************	#
22471 #	a0 = pointer to extended precision operand			#
22472 #									#
22473 # OUTPUT **************************************************************	#
22474 #	d0 = value of type tag						#
22475 #		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
22476 #									#
22477 # ALGORITHM ***********************************************************	#
22478 #	Simply test the exponent, j-bit, and mantissa values to		#
22479 # determine the type of operand.					#
22480 #	If it's an unnormalized zero, alter the operand and force it	#
22481 # to be a normal zero.							#
22482 #									#
22483 #########################################################################
22484 
22485 	global		set_tag_x
22486 set_tag_x:
22487 	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
22488 	andi.w		&0x7fff, %d0		# strip off sign
22489 	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
22490 	beq.b		inf_or_nan_x
22491 not_inf_or_nan_x:
22492 	btst		&0x7,FTEMP_HI(%a0)
22493 	beq.b		not_norm_x
22494 is_norm_x:
22495 	mov.b		&NORM, %d0
22496 	rts
22497 not_norm_x:
22498 	tst.w		%d0			# is exponent = 0?
22499 	bne.b		is_unnorm_x
22500 not_unnorm_x:
22501 	tst.l		FTEMP_HI(%a0)
22502 	bne.b		is_denorm_x
22503 	tst.l		FTEMP_LO(%a0)
22504 	bne.b		is_denorm_x
22505 is_zero_x:
22506 	mov.b		&ZERO, %d0
22507 	rts
22508 is_denorm_x:
22509 	mov.b		&DENORM, %d0
22510 	rts
22511 # must distinguish now "Unnormalized zeroes" which we
22512 # must convert to zero.
22513 is_unnorm_x:
22514 	tst.l		FTEMP_HI(%a0)
22515 	bne.b		is_unnorm_reg_x
22516 	tst.l		FTEMP_LO(%a0)
22517 	bne.b		is_unnorm_reg_x
22518 # it's an "unnormalized zero". let's convert it to an actual zero...
22519 	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
22520 	mov.b		&ZERO, %d0
22521 	rts
22522 is_unnorm_reg_x:
22523 	mov.b		&UNNORM, %d0
22524 	rts
22525 inf_or_nan_x:
22526 	tst.l		FTEMP_LO(%a0)
22527 	bne.b		is_nan_x
22528 	mov.l		FTEMP_HI(%a0), %d0
22529 	and.l		&0x7fffffff, %d0	# msb is a don't care!
22530 	bne.b		is_nan_x
22531 is_inf_x:
22532 	mov.b		&INF, %d0
22533 	rts
22534 is_nan_x:
22535 	btst		&0x6, FTEMP_HI(%a0)
22536 	beq.b		is_snan_x
22537 	mov.b		&QNAN, %d0
22538 	rts
22539 is_snan_x:
22540 	mov.b		&SNAN, %d0
22541 	rts
22542 
22543 #########################################################################
22544 # XDEF ****************************************************************	#
22545 #	set_tag_d(): return the optype of the input dbl fp number	#
22546 #									#
22547 # XREF ****************************************************************	#
22548 #	None								#
22549 #									#
22550 # INPUT ***************************************************************	#
22551 #	a0 = points to double precision operand				#
22552 #									#
22553 # OUTPUT **************************************************************	#
22554 #	d0 = value of type tag						#
22555 #		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22556 #									#
22557 # ALGORITHM ***********************************************************	#
22558 #	Simply test the exponent, j-bit, and mantissa values to		#
22559 # determine the type of operand.					#
22560 #									#
22561 #########################################################################
22562 
22563 	global		set_tag_d
22564 set_tag_d:
22565 	mov.l		FTEMP(%a0), %d0
22566 	mov.l		%d0, %d1
22567 
22568 	andi.l		&0x7ff00000, %d0
22569 	beq.b		zero_or_denorm_d
22570 
22571 	cmpi.l		%d0, &0x7ff00000
22572 	beq.b		inf_or_nan_d
22573 
22574 is_norm_d:
22575 	mov.b		&NORM, %d0
22576 	rts
22577 zero_or_denorm_d:
22578 	and.l		&0x000fffff, %d1
22579 	bne		is_denorm_d
22580 	tst.l		4+FTEMP(%a0)
22581 	bne		is_denorm_d
22582 is_zero_d:
22583 	mov.b		&ZERO, %d0
22584 	rts
22585 is_denorm_d:
22586 	mov.b		&DENORM, %d0
22587 	rts
22588 inf_or_nan_d:
22589 	and.l		&0x000fffff, %d1
22590 	bne		is_nan_d
22591 	tst.l		4+FTEMP(%a0)
22592 	bne		is_nan_d
22593 is_inf_d:
22594 	mov.b		&INF, %d0
22595 	rts
22596 is_nan_d:
22597 	btst		&19, %d1
22598 	bne		is_qnan_d
22599 is_snan_d:
22600 	mov.b		&SNAN, %d0
22601 	rts
22602 is_qnan_d:
22603 	mov.b		&QNAN, %d0
22604 	rts
22605 
22606 #########################################################################
22607 # XDEF ****************************************************************	#
22608 #	set_tag_s(): return the optype of the input sgl fp number	#
22609 #									#
22610 # XREF ****************************************************************	#
22611 #	None								#
22612 #									#
22613 # INPUT ***************************************************************	#
22614 #	a0 = pointer to single precision operand			#
22615 #									#
22616 # OUTPUT **************************************************************	#
22617 #	d0 = value of type tag						#
22618 #		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22619 #									#
22620 # ALGORITHM ***********************************************************	#
22621 #	Simply test the exponent, j-bit, and mantissa values to		#
22622 # determine the type of operand.					#
22623 #									#
22624 #########################################################################
22625 
22626 	global		set_tag_s
22627 set_tag_s:
22628 	mov.l		FTEMP(%a0), %d0
22629 	mov.l		%d0, %d1
22630 
22631 	andi.l		&0x7f800000, %d0
22632 	beq.b		zero_or_denorm_s
22633 
22634 	cmpi.l		%d0, &0x7f800000
22635 	beq.b		inf_or_nan_s
22636 
22637 is_norm_s:
22638 	mov.b		&NORM, %d0
22639 	rts
22640 zero_or_denorm_s:
22641 	and.l		&0x007fffff, %d1
22642 	bne		is_denorm_s
22643 is_zero_s:
22644 	mov.b		&ZERO, %d0
22645 	rts
22646 is_denorm_s:
22647 	mov.b		&DENORM, %d0
22648 	rts
22649 inf_or_nan_s:
22650 	and.l		&0x007fffff, %d1
22651 	bne		is_nan_s
22652 is_inf_s:
22653 	mov.b		&INF, %d0
22654 	rts
22655 is_nan_s:
22656 	btst		&22, %d1
22657 	bne		is_qnan_s
22658 is_snan_s:
22659 	mov.b		&SNAN, %d0
22660 	rts
22661 is_qnan_s:
22662 	mov.b		&QNAN, %d0
22663 	rts
22664 
22665 #########################################################################
22666 # XDEF ****************************************************************	#
22667 #	unf_res(): routine to produce default underflow result of a	#
22668 #		   scaled extended precision number; this is used by	#
22669 #		   fadd/fdiv/fmul/etc. emulation routines.		#
22670 #	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
22671 #		    single round prec and extended prec mode.		#
22672 #									#
22673 # XREF ****************************************************************	#
22674 #	_denorm() - denormalize according to scale factor		#
22675 #	_round() - round denormalized number according to rnd prec	#
22676 #									#
22677 # INPUT ***************************************************************	#
22678 #	a0 = pointer to extended precison operand			#
22679 #	d0 = scale factor						#
22680 #	d1 = rounding precision/mode					#
22681 #									#
22682 # OUTPUT **************************************************************	#
22683 #	a0 = pointer to default underflow result in extended precision	#
22684 #	d0.b = result FPSR_cc which caller may or may not want to save	#
22685 #									#
22686 # ALGORITHM ***********************************************************	#
22687 #	Convert the input operand to "internal format" which means the	#
22688 # exponent is extended to 16 bits and the sign is stored in the unused	#
22689 # portion of the extended precison operand. Denormalize the number	#
22690 # according to the scale factor passed in d0. Then, round the		#
22691 # denormalized result.							#
22692 #	Set the FPSR_exc bits as appropriate but return the cc bits in	#
22693 # d0 in case the caller doesn't want to save them (as is the case for	#
22694 # fmove out).								#
22695 #	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
22696 # precision and the rounding mode to single.				#
22697 #									#
22698 #########################################################################
22699 	global		unf_res
22700 unf_res:
22701 	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
22702 
22703 	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
22704 	sne		FTEMP_SGN(%a0)
22705 
22706 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22707 	and.w		&0x7fff, %d1
22708 	sub.w		%d0, %d1
22709 	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
22710 
22711 	mov.l		%a0, -(%sp)		# save operand ptr during calls
22712 
22713 	mov.l		0x4(%sp),%d0		# pass rnd prec.
22714 	andi.w		&0x00c0,%d0
22715 	lsr.w		&0x4,%d0
22716 	bsr.l		_denorm			# denorm result
22717 
22718 	mov.l		(%sp),%a0
22719 	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
22720 	andi.w		&0xc0,%d1		# extract rnd prec
22721 	lsr.w		&0x4,%d1
22722 	swap		%d1
22723 	mov.w		0x6(%sp),%d1
22724 	andi.w		&0x30,%d1
22725 	lsr.w		&0x4,%d1
22726 	bsr.l		_round			# round the denorm
22727 
22728 	mov.l		(%sp)+, %a0
22729 
22730 # result is now rounded properly. convert back to normal format
22731 	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
22732 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22733 	beq.b		unf_res_chkifzero	# no; result is positive
22734 	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
22735 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22736 
22737 # the number may have become zero after rounding. set ccodes accordingly.
22738 unf_res_chkifzero:
22739 	clr.l		%d0
22740 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22741 	bne.b		unf_res_cont		# no
22742 	tst.l		FTEMP_LO(%a0)
22743 	bne.b		unf_res_cont		# no
22744 #	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
22745 	bset		&z_bit, %d0		# yes; set zero ccode bit
22746 
22747 unf_res_cont:
22748 
22749 #
22750 # can inex1 also be set along with unfl and inex2???
22751 #
22752 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22753 #
22754 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22755 	beq.b		unf_res_end		# no
22756 	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22757 
22758 unf_res_end:
22759 	add.l		&0x4, %sp		# clear stack
22760 	rts
22761 
22762 # unf_res() for fsglmul() and fsgldiv().
22763 	global		unf_res4
22764 unf_res4:
22765 	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
22766 
22767 	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
22768 	sne		FTEMP_SGN(%a0)
22769 
22770 	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
22771 	and.w		&0x7fff,%d1
22772 	sub.w		%d0,%d1
22773 	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
22774 
22775 	mov.l		%a0,-(%sp)		# save operand ptr during calls
22776 
22777 	clr.l		%d0			# force rnd prec = ext
22778 	bsr.l		_denorm			# denorm result
22779 
22780 	mov.l		(%sp),%a0
22781 	mov.w		&s_mode,%d1		# force rnd prec = sgl
22782 	swap		%d1
22783 	mov.w		0x6(%sp),%d1		# load rnd mode
22784 	andi.w		&0x30,%d1		# extract rnd prec
22785 	lsr.w		&0x4,%d1
22786 	bsr.l		_round			# round the denorm
22787 
22788 	mov.l		(%sp)+,%a0
22789 
22790 # result is now rounded properly. convert back to normal format
22791 	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
22792 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22793 	beq.b		unf_res4_chkifzero	# no; result is positive
22794 	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
22795 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22796 
22797 # the number may have become zero after rounding. set ccodes accordingly.
22798 unf_res4_chkifzero:
22799 	clr.l		%d0
22800 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22801 	bne.b		unf_res4_cont		# no
22802 	tst.l		FTEMP_LO(%a0)
22803 	bne.b		unf_res4_cont		# no
22804 #	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
22805 	bset		&z_bit,%d0		# yes; set zero ccode bit
22806 
22807 unf_res4_cont:
22808 
22809 #
22810 # can inex1 also be set along with unfl and inex2???
22811 #
22812 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22813 #
22814 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22815 	beq.b		unf_res4_end		# no
22816 	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22817 
22818 unf_res4_end:
22819 	add.l		&0x4,%sp		# clear stack
22820 	rts
22821 
22822 #########################################################################
22823 # XDEF ****************************************************************	#
22824 #	ovf_res(): routine to produce the default overflow result of	#
22825 #		   an overflowing number.				#
22826 #	ovf_res2(): same as above but the rnd mode/prec are passed	#
22827 #		    differently.					#
22828 #									#
22829 # XREF ****************************************************************	#
22830 #	none								#
22831 #									#
22832 # INPUT ***************************************************************	#
22833 #	d1.b	= '-1' => (-); '0' => (+)				#
22834 #   ovf_res():								#
22835 #	d0	= rnd mode/prec						#
22836 #   ovf_res2():								#
22837 #	hi(d0)	= rnd prec						#
22838 #	lo(d0)	= rnd mode						#
22839 #									#
22840 # OUTPUT **************************************************************	#
22841 #	a0	= points to extended precision result			#
22842 #	d0.b	= condition code bits					#
22843 #									#
22844 # ALGORITHM ***********************************************************	#
22845 #	The default overflow result can be determined by the sign of	#
22846 # the result and the rounding mode/prec in effect. These bits are	#
22847 # concatenated together to create an index into the default result	#
22848 # table. A pointer to the correct result is returned in a0. The		#
22849 # resulting condition codes are returned in d0 in case the caller	#
22850 # doesn't want FPSR_cc altered (as is the case for fmove out).		#
22851 #									#
22852 #########################################################################
22853 
22854 	global		ovf_res
22855 ovf_res:
22856 	andi.w		&0x10,%d1		# keep result sign
22857 	lsr.b		&0x4,%d0		# shift prec/mode
22858 	or.b		%d0,%d1			# concat the two
22859 	mov.w		%d1,%d0			# make a copy
22860 	lsl.b		&0x1,%d1		# multiply d1 by 2
22861 	bra.b		ovf_res_load
22862 
22863 	global		ovf_res2
22864 ovf_res2:
22865 	and.w		&0x10, %d1		# keep result sign
22866 	or.b		%d0, %d1		# insert rnd mode
22867 	swap		%d0
22868 	or.b		%d0, %d1		# insert rnd prec
22869 	mov.w		%d1, %d0		# make a copy
22870 	lsl.b		&0x1, %d1		# shift left by 1
22871 
22872 #
22873 # use the rounding mode, precision, and result sign as in index into the
22874 # two tables below to fetch the default result and the result ccodes.
22875 #
22876 ovf_res_load:
22877 	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22878 	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22879 
22880 	rts
22881 
22882 tbl_ovfl_cc:
22883 	byte		0x2, 0x0, 0x0, 0x2
22884 	byte		0x2, 0x0, 0x0, 0x2
22885 	byte		0x2, 0x0, 0x0, 0x2
22886 	byte		0x0, 0x0, 0x0, 0x0
22887 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22888 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22889 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22890 
22891 tbl_ovfl_result:
22892 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22893 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22894 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22895 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22896 
22897 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22898 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22899 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22900 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22901 
22902 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22903 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22904 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22905 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906 
22907 	long		0x00000000,0x00000000,0x00000000,0x00000000
22908 	long		0x00000000,0x00000000,0x00000000,0x00000000
22909 	long		0x00000000,0x00000000,0x00000000,0x00000000
22910 	long		0x00000000,0x00000000,0x00000000,0x00000000
22911 
22912 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22913 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22914 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22915 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22916 
22917 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22918 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22919 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22920 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22921 
22922 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22923 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22924 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22925 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22926 
22927 #########################################################################
22928 # XDEF ****************************************************************	#
22929 #	get_packed(): fetch a packed operand from memory and then	#
22930 #		      convert it to a floating-point binary number.	#
22931 #									#
22932 # XREF ****************************************************************	#
22933 #	_dcalc_ea() - calculate the correct <ea>			#
22934 #	_mem_read() - fetch the packed operand from memory		#
22935 #	facc_in_x() - the fetch failed so jump to special exit code	#
22936 #	decbin()    - convert packed to binary extended precision	#
22937 #									#
22938 # INPUT ***************************************************************	#
22939 #	None								#
22940 #									#
22941 # OUTPUT **************************************************************	#
22942 #	If no failure on _mem_read():					#
22943 #	FP_SRC(a6) = packed operand now as a binary FP number		#
22944 #									#
22945 # ALGORITHM ***********************************************************	#
22946 #	Get the correct <ea> which is the value on the exception stack	#
22947 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
22948 # Then, fetch the operand from memory. If the fetch fails, exit		#
22949 # through facc_in_x().							#
22950 #	If the packed operand is a ZERO,NAN, or INF, convert it to	#
22951 # its binary representation here. Else, call decbin() which will	#
22952 # convert the packed value to an extended precision binary value.	#
22953 #									#
22954 #########################################################################
22955 
22956 # the stacked <ea> for packed is correct except for -(An).
22957 # the base reg must be updated for both -(An) and (An)+.
22958 	global		get_packed
22959 get_packed:
22960 	mov.l		&0xc,%d0		# packed is 12 bytes
22961 	bsr.l		_dcalc_ea		# fetch <ea>; correct An
22962 
22963 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
22964 	mov.l		&0xc,%d0		# pass: 12 bytes
22965 	bsr.l		_dmem_read		# read packed operand
22966 
22967 	tst.l		%d1			# did dfetch fail?
22968 	bne.l		facc_in_x		# yes
22969 
22970 # The packed operand is an INF or a NAN if the exponent field is all ones.
22971 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
22972 	cmpi.w		%d0,&0x7fff		# INF or NAN?
22973 	bne.b		gp_try_zero		# no
22974 	rts					# operand is an INF or NAN
22975 
22976 # The packed operand is a zero if the mantissa is all zero, else it's
22977 # a normal packed op.
22978 gp_try_zero:
22979 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
22980 	andi.b		&0x0f,%d0		# clear all but last nybble
22981 	bne.b		gp_not_spec		# not a zero
22982 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
22983 	bne.b		gp_not_spec		# not a zero
22984 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
22985 	bne.b		gp_not_spec		# not a zero
22986 	rts					# operand is a ZERO
22987 gp_not_spec:
22988 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
22989 	bsr.l		decbin			# convert to extended
22990 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
22991 	rts
22992 
22993 #########################################################################
22994 # decbin(): Converts normalized packed bcd value pointed to by register	#
22995 #	    a0 to extended-precision value in fp0.			#
22996 #									#
22997 # INPUT ***************************************************************	#
22998 #	a0 = pointer to normalized packed bcd value			#
22999 #									#
23000 # OUTPUT **************************************************************	#
23001 #	fp0 = exact fp representation of the packed bcd value.		#
23002 #									#
23003 # ALGORITHM ***********************************************************	#
23004 #	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
23005 #	and NaN operands are dispatched without entering this routine)	#
23006 #	value in 68881/882 format at location (a0).			#
23007 #									#
23008 #	A1. Convert the bcd exponent to binary by successive adds and	#
23009 #	muls. Set the sign according to SE. Subtract 16 to compensate	#
23010 #	for the mantissa which is to be interpreted as 17 integer	#
23011 #	digits, rather than 1 integer and 16 fraction digits.		#
23012 #	Note: this operation can never overflow.			#
23013 #									#
23014 #	A2. Convert the bcd mantissa to binary by successive		#
23015 #	adds and muls in FP0. Set the sign according to SM.		#
23016 #	The mantissa digits will be converted with the decimal point	#
23017 #	assumed following the least-significant digit.			#
23018 #	Note: this operation can never overflow.			#
23019 #									#
23020 #	A3. Count the number of leading/trailing zeros in the		#
23021 #	bcd string.  If SE is positive, count the leading zeros;	#
23022 #	if negative, count the trailing zeros.  Set the adjusted	#
23023 #	exponent equal to the exponent from A1 and the zero count	#
23024 #	added if SM = 1 and subtracted if SM = 0.  Scale the		#
23025 #	mantissa the equivalent of forcing in the bcd value:		#
23026 #									#
23027 #	SM = 0	a non-zero digit in the integer position		#
23028 #	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
23029 #									#
23030 #	this will insure that any value, regardless of its		#
23031 #	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
23032 #	consistently.							#
23033 #									#
23034 #	A4. Calculate the factor 10^exp in FP1 using a table of		#
23035 #	10^(2^n) values.  To reduce the error in forming factors	#
23036 #	greater than 10^27, a directed rounding scheme is used with	#
23037 #	tables rounded to RN, RM, and RP, according to the table	#
23038 #	in the comments of the pwrten section.				#
23039 #									#
23040 #	A5. Form the final binary number by scaling the mantissa by	#
23041 #	the exponent factor.  This is done by multiplying the		#
23042 #	mantissa in FP0 by the factor in FP1 if the adjusted		#
23043 #	exponent sign is positive, and dividing FP0 by FP1 if		#
23044 #	it is negative.							#
23045 #									#
23046 #	Clean up and return. Check if the final mul or div was inexact.	#
23047 #	If so, set INEX1 in USER_FPSR.					#
23048 #									#
23049 #########################################################################
23050 
23051 #
23052 #	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23053 #	to nearest, minus, and plus, respectively.  The tables include
23054 #	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
23055 #	is required until the power is greater than 27, however, all
23056 #	tables include the first 5 for ease of indexing.
23057 #
23058 RTABLE:
23059 	byte		0,0,0,0
23060 	byte		2,3,2,3
23061 	byte		2,3,3,2
23062 	byte		3,2,2,3
23063 
23064 	set		FNIBS,7
23065 	set		FSTRT,0
23066 
23067 	set		ESTRT,4
23068 	set		EDIGITS,2
23069 
23070 	global		decbin
23071 decbin:
23072 	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23073 	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23074 	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
23075 
23076 	lea		FP_SCR0(%a6),%a0
23077 
23078 	movm.l		&0x3c00,-(%sp)		# save d2-d5
23079 	fmovm.x		&0x1,-(%sp)		# save fp1
23080 #
23081 # Calculate exponent:
23082 #  1. Copy bcd value in memory for use as a working copy.
23083 #  2. Calculate absolute value of exponent in d1 by mul and add.
23084 #  3. Correct for exponent sign.
23085 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23086 #     (i.e., all digits assumed left of the decimal point.)
23087 #
23088 # Register usage:
23089 #
23090 #  calc_e:
23091 #	(*)  d0: temp digit storage
23092 #	(*)  d1: accumulator for binary exponent
23093 #	(*)  d2: digit count
23094 #	(*)  d3: offset pointer
23095 #	( )  d4: first word of bcd
23096 #	( )  a0: pointer to working bcd value
23097 #	( )  a6: pointer to original bcd value
23098 #	(*)  FP_SCR1: working copy of original bcd value
23099 #	(*)  L_SCR1: copy of original exponent word
23100 #
23101 calc_e:
23102 	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
23103 	mov.l		&ESTRT,%d3		# counter to pick up digits
23104 	mov.l		(%a0),%d4		# get first word of bcd
23105 	clr.l		%d1			# zero d1 for accumulator
23106 e_gd:
23107 	mulu.l		&0xa,%d1		# mul partial product by one digit place
23108 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
23109 	add.l		%d0,%d1			# d1 = d1 + d0
23110 	addq.b		&4,%d3			# advance d3 to the next digit
23111 	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
23112 	btst		&30,%d4			# get SE
23113 	beq.b		e_pos			# don't negate if pos
23114 	neg.l		%d1			# negate before subtracting
23115 e_pos:
23116 	sub.l		&16,%d1			# sub to compensate for shift of mant
23117 	bge.b		e_save			# if still pos, do not neg
23118 	neg.l		%d1			# now negative, make pos and set SE
23119 	or.l		&0x40000000,%d4		# set SE in d4,
23120 	or.l		&0x40000000,(%a0)	# and in working bcd
23121 e_save:
23122 	mov.l		%d1,-(%sp)		# save exp on stack
23123 #
23124 #
23125 # Calculate mantissa:
23126 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
23127 #  2. Correct for mantissa sign.
23128 #     (i.e., all digits assumed left of the decimal point.)
23129 #
23130 # Register usage:
23131 #
23132 #  calc_m:
23133 #	(*)  d0: temp digit storage
23134 #	(*)  d1: lword counter
23135 #	(*)  d2: digit count
23136 #	(*)  d3: offset pointer
23137 #	( )  d4: words 2 and 3 of bcd
23138 #	( )  a0: pointer to working bcd value
23139 #	( )  a6: pointer to original bcd value
23140 #	(*) fp0: mantissa accumulator
23141 #	( )  FP_SCR1: working copy of original bcd value
23142 #	( )  L_SCR1: copy of original exponent word
23143 #
23144 calc_m:
23145 	mov.l		&1,%d1			# word counter, init to 1
23146 	fmov.s		&0x00000000,%fp0	# accumulator
23147 #
23148 #
23149 #  Since the packed number has a long word between the first & second parts,
23150 #  get the integer digit then skip down & get the rest of the
23151 #  mantissa.  We will unroll the loop once.
23152 #
23153 	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
23154 	fadd.b		%d0,%fp0		# add digit to sum in fp0
23155 #
23156 #
23157 #  Get the rest of the mantissa.
23158 #
23159 loadlw:
23160 	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
23161 	mov.l		&FSTRT,%d3		# counter to pick up digits
23162 	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
23163 md2b:
23164 	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
23165 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
23166 	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
23167 #
23168 #
23169 #  If all the digits (8) in that long word have been converted (d2=0),
23170 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
23171 #  to initialize the digit offset, and set d2 to 7 for the digit count;
23172 #  else continue with this long word.
23173 #
23174 	addq.b		&4,%d3			# advance d3 to the next digit
23175 	dbf.w		%d2,md2b		# check for last digit in this lw
23176 nextlw:
23177 	addq.l		&1,%d1			# inc lw pointer in mantissa
23178 	cmp.l		%d1,&2			# test for last lw
23179 	ble.b		loadlw			# if not, get last one
23180 #
23181 #  Check the sign of the mant and make the value in fp0 the same sign.
23182 #
23183 m_sign:
23184 	btst		&31,(%a0)		# test sign of the mantissa
23185 	beq.b		ap_st_z			# if clear, go to append/strip zeros
23186 	fneg.x		%fp0			# if set, negate fp0
23187 #
23188 # Append/strip zeros:
23189 #
23190 #  For adjusted exponents which have an absolute value greater than 27*,
23191 #  this routine calculates the amount needed to normalize the mantissa
23192 #  for the adjusted exponent.  That number is subtracted from the exp
23193 #  if the exp was positive, and added if it was negative.  The purpose
23194 #  of this is to reduce the value of the exponent and the possibility
23195 #  of error in calculation of pwrten.
23196 #
23197 #  1. Branch on the sign of the adjusted exponent.
23198 #  2p.(positive exp)
23199 #   2. Check M16 and the digits in lwords 2 and 3 in descending order.
23200 #   3. Add one for each zero encountered until a non-zero digit.
23201 #   4. Subtract the count from the exp.
23202 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
23203 #	   and set SE.
23204 #	6. Multiply the mantissa by 10**count.
23205 #  2n.(negative exp)
23206 #   2. Check the digits in lwords 3 and 2 in descending order.
23207 #   3. Add one for each zero encountered until a non-zero digit.
23208 #   4. Add the count to the exp.
23209 #   5. Check if the exp has crossed zero in #3 above; clear SE.
23210 #   6. Divide the mantissa by 10**count.
23211 #
23212 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
23213 #   any adjustment due to append/strip zeros will drive the resultane
23214 #   exponent towards zero.  Since all pwrten constants with a power
23215 #   of 27 or less are exact, there is no need to use this routine to
23216 #   attempt to lessen the resultant exponent.
23217 #
23218 # Register usage:
23219 #
23220 #  ap_st_z:
23221 #	(*)  d0: temp digit storage
23222 #	(*)  d1: zero count
23223 #	(*)  d2: digit count
23224 #	(*)  d3: offset pointer
23225 #	( )  d4: first word of bcd
23226 #	(*)  d5: lword counter
23227 #	( )  a0: pointer to working bcd value
23228 #	( )  FP_SCR1: working copy of original bcd value
23229 #	( )  L_SCR1: copy of original exponent word
23230 #
23231 #
23232 # First check the absolute value of the exponent to see if this
23233 # routine is necessary.  If so, then check the sign of the exponent
23234 # and do append (+) or strip (-) zeros accordingly.
23235 # This section handles a positive adjusted exponent.
23236 #
23237 ap_st_z:
23238 	mov.l		(%sp),%d1		# load expA for range test
23239 	cmp.l		%d1,&27			# test is with 27
23240 	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
23241 	btst		&30,(%a0)		# check sign of exp
23242 	bne.b		ap_st_n			# if neg, go to neg side
23243 	clr.l		%d1			# zero count reg
23244 	mov.l		(%a0),%d4		# load lword 1 to d4
23245 	bfextu		%d4{&28:&4},%d0		# get M16 in d0
23246 	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
23247 	addq.l		&1,%d1			# inc zero count
23248 	mov.l		&1,%d5			# init lword counter
23249 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
23250 	bne.b		ap_p_cl			# if lw 2 is zero, skip it
23251 	addq.l		&8,%d1			# and inc count by 8
23252 	addq.l		&1,%d5			# inc lword counter
23253 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
23254 ap_p_cl:
23255 	clr.l		%d3			# init offset reg
23256 	mov.l		&7,%d2			# init digit counter
23257 ap_p_gd:
23258 	bfextu		%d4{%d3:&4},%d0		# get digit
23259 	bne.b		ap_p_fx			# if non-zero, go to fix exp
23260 	addq.l		&4,%d3			# point to next digit
23261 	addq.l		&1,%d1			# inc digit counter
23262 	dbf.w		%d2,ap_p_gd		# get next digit
23263 ap_p_fx:
23264 	mov.l		%d1,%d0			# copy counter to d2
23265 	mov.l		(%sp),%d1		# get adjusted exp from memory
23266 	sub.l		%d0,%d1			# subtract count from exp
23267 	bge.b		ap_p_fm			# if still pos, go to pwrten
23268 	neg.l		%d1			# now its neg; get abs
23269 	mov.l		(%a0),%d4		# load lword 1 to d4
23270 	or.l		&0x40000000,%d4		# and set SE in d4
23271 	or.l		&0x40000000,(%a0)	# and in memory
23272 #
23273 # Calculate the mantissa multiplier to compensate for the striping of
23274 # zeros from the mantissa.
23275 #
23276 ap_p_fm:
23277 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23278 	clr.l		%d3			# init table index
23279 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23280 	mov.l		&3,%d2			# init d2 to count bits in counter
23281 ap_p_el:
23282 	asr.l		&1,%d0			# shift lsb into carry
23283 	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
23284 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23285 ap_p_en:
23286 	add.l		&12,%d3			# inc d3 to next rtable entry
23287 	tst.l		%d0			# check if d0 is zero
23288 	bne.b		ap_p_el			# if not, get next bit
23289 	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
23290 	bra.b		pwrten			# go calc pwrten
23291 #
23292 # This section handles a negative adjusted exponent.
23293 #
23294 ap_st_n:
23295 	clr.l		%d1			# clr counter
23296 	mov.l		&2,%d5			# set up d5 to point to lword 3
23297 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
23298 	bne.b		ap_n_cl			# if not zero, check digits
23299 	sub.l		&1,%d5			# dec d5 to point to lword 2
23300 	addq.l		&8,%d1			# inc counter by 8
23301 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
23302 ap_n_cl:
23303 	mov.l		&28,%d3			# point to last digit
23304 	mov.l		&7,%d2			# init digit counter
23305 ap_n_gd:
23306 	bfextu		%d4{%d3:&4},%d0		# get digit
23307 	bne.b		ap_n_fx			# if non-zero, go to exp fix
23308 	subq.l		&4,%d3			# point to previous digit
23309 	addq.l		&1,%d1			# inc digit counter
23310 	dbf.w		%d2,ap_n_gd		# get next digit
23311 ap_n_fx:
23312 	mov.l		%d1,%d0			# copy counter to d0
23313 	mov.l		(%sp),%d1		# get adjusted exp from memory
23314 	sub.l		%d0,%d1			# subtract count from exp
23315 	bgt.b		ap_n_fm			# if still pos, go fix mantissa
23316 	neg.l		%d1			# take abs of exp and clr SE
23317 	mov.l		(%a0),%d4		# load lword 1 to d4
23318 	and.l		&0xbfffffff,%d4		# and clr SE in d4
23319 	and.l		&0xbfffffff,(%a0)	# and in memory
23320 #
23321 # Calculate the mantissa multiplier to compensate for the appending of
23322 # zeros to the mantissa.
23323 #
23324 ap_n_fm:
23325 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23326 	clr.l		%d3			# init table index
23327 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23328 	mov.l		&3,%d2			# init d2 to count bits in counter
23329 ap_n_el:
23330 	asr.l		&1,%d0			# shift lsb into carry
23331 	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
23332 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23333 ap_n_en:
23334 	add.l		&12,%d3			# inc d3 to next rtable entry
23335 	tst.l		%d0			# check if d0 is zero
23336 	bne.b		ap_n_el			# if not, get next bit
23337 	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
23338 #
23339 #
23340 # Calculate power-of-ten factor from adjusted and shifted exponent.
23341 #
23342 # Register usage:
23343 #
23344 #  pwrten:
23345 #	(*)  d0: temp
23346 #	( )  d1: exponent
23347 #	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23348 #	(*)  d3: FPCR work copy
23349 #	( )  d4: first word of bcd
23350 #	(*)  a1: RTABLE pointer
23351 #  calc_p:
23352 #	(*)  d0: temp
23353 #	( )  d1: exponent
23354 #	(*)  d3: PWRTxx table index
23355 #	( )  a0: pointer to working copy of bcd
23356 #	(*)  a1: PWRTxx pointer
23357 #	(*) fp1: power-of-ten accumulator
23358 #
23359 # Pwrten calculates the exponent factor in the selected rounding mode
23360 # according to the following table:
23361 #
23362 #	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
23363 #
23364 #	ANY	  ANY	RN	RN
23365 #
23366 #	 +	   +	RP	RP
23367 #	 -	   +	RP	RM
23368 #	 +	   -	RP	RM
23369 #	 -	   -	RP	RP
23370 #
23371 #	 +	   +	RM	RM
23372 #	 -	   +	RM	RP
23373 #	 +	   -	RM	RP
23374 #	 -	   -	RM	RM
23375 #
23376 #	 +	   +	RZ	RM
23377 #	 -	   +	RZ	RM
23378 #	 +	   -	RZ	RP
23379 #	 -	   -	RZ	RP
23380 #
23381 #
23382 pwrten:
23383 	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
23384 	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
23385 	mov.l		(%a0),%d4		# reload 1st bcd word to d4
23386 	asl.l		&2,%d2			# format d2 to be
23387 	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
23388 	add.l		%d0,%d2			# in d2 as index into RTABLE
23389 	lea.l		RTABLE(%pc),%a1		# load rtable base
23390 	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
23391 	clr.l		%d3			# clear d3 to force no exc and extended
23392 	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
23393 	fmov.l		%d3,%fpcr		# write new FPCR
23394 	asr.l		&1,%d0			# write correct PTENxx table
23395 	bcc.b		not_rp			# to a1
23396 	lea.l		PTENRP(%pc),%a1		# it is RP
23397 	bra.b		calc_p			# go to init section
23398 not_rp:
23399 	asr.l		&1,%d0			# keep checking
23400 	bcc.b		not_rm
23401 	lea.l		PTENRM(%pc),%a1		# it is RM
23402 	bra.b		calc_p			# go to init section
23403 not_rm:
23404 	lea.l		PTENRN(%pc),%a1		# it is RN
23405 calc_p:
23406 	mov.l		%d1,%d0			# copy exp to d0;use d0
23407 	bpl.b		no_neg			# if exp is negative,
23408 	neg.l		%d0			# invert it
23409 	or.l		&0x40000000,(%a0)	# and set SE bit
23410 no_neg:
23411 	clr.l		%d3			# table index
23412 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23413 e_loop:
23414 	asr.l		&1,%d0			# shift next bit into carry
23415 	bcc.b		e_next			# if zero, skip the mul
23416 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23417 e_next:
23418 	add.l		&12,%d3			# inc d3 to next rtable entry
23419 	tst.l		%d0			# check if d0 is zero
23420 	bne.b		e_loop			# not zero, continue shifting
23421 #
23422 #
23423 #  Check the sign of the adjusted exp and make the value in fp0 the
23424 #  same sign. If the exp was pos then multiply fp1*fp0;
23425 #  else divide fp0/fp1.
23426 #
23427 # Register Usage:
23428 #  norm:
23429 #	( )  a0: pointer to working bcd value
23430 #	(*) fp0: mantissa accumulator
23431 #	( ) fp1: scaling factor - 10**(abs(exp))
23432 #
23433 pnorm:
23434 	btst		&30,(%a0)		# test the sign of the exponent
23435 	beq.b		mul			# if clear, go to multiply
23436 div:
23437 	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
23438 	bra.b		end_dec
23439 mul:
23440 	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
23441 #
23442 #
23443 # Clean up and return with result in fp0.
23444 #
23445 # If the final mul/div in decbin incurred an inex exception,
23446 # it will be inex2, but will be reported as inex1 by get_op.
23447 #
23448 end_dec:
23449 	fmov.l		%fpsr,%d0		# get status register
23450 	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
23451 	beq.b		no_exc			# skip this if no exc
23452 	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23453 no_exc:
23454 	add.l		&0x4,%sp		# clear 1 lw param
23455 	fmovm.x		(%sp)+,&0x40		# restore fp1
23456 	movm.l		(%sp)+,&0x3c		# restore d2-d5
23457 	fmov.l		&0x0,%fpcr
23458 	fmov.l		&0x0,%fpsr
23459 	rts
23460 
23461 #########################################################################
23462 # bindec(): Converts an input in extended precision format to bcd format#
23463 #									#
23464 # INPUT ***************************************************************	#
23465 #	a0 = pointer to the input extended precision value in memory.	#
23466 #	     the input may be either normalized, unnormalized, or	#
23467 #	     denormalized.						#
23468 #	d0 = contains the k-factor sign-extended to 32-bits.		#
23469 #									#
23470 # OUTPUT **************************************************************	#
23471 #	FP_SCR0(a6) = bcd format result on the stack.			#
23472 #									#
23473 # ALGORITHM ***********************************************************	#
23474 #									#
23475 #	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
23476 #		The k-factor is saved for use in d7. Clear the		#
23477 #		BINDEC_FLG for separating normalized/denormalized	#
23478 #		input.  If input is unnormalized or denormalized,	#
23479 #		normalize it.						#
23480 #									#
23481 #	A2.	Set X = abs(input).					#
23482 #									#
23483 #	A3.	Compute ILOG.						#
23484 #		ILOG is the log base 10 of the input value.  It is	#
23485 #		approximated by adding e + 0.f when the original	#
23486 #		value is viewed as 2^^e * 1.f in extended precision.	#
23487 #		This value is stored in d6.				#
23488 #									#
23489 #	A4.	Clr INEX bit.						#
23490 #		The operation in A3 above may have set INEX2.		#
23491 #									#
23492 #	A5.	Set ICTR = 0;						#
23493 #		ICTR is a flag used in A13.  It must be set before the	#
23494 #		loop entry A6.						#
23495 #									#
23496 #	A6.	Calculate LEN.						#
23497 #		LEN is the number of digits to be displayed.  The	#
23498 #		k-factor can dictate either the total number of digits,	#
23499 #		if it is a positive number, or the number of digits	#
23500 #		after the decimal point which are to be included as	#
23501 #		significant.  See the 68882 manual for examples.	#
23502 #		If LEN is computed to be greater than 17, set OPERR in	#
23503 #		USER_FPSR.  LEN is stored in d4.			#
23504 #									#
23505 #	A7.	Calculate SCALE.					#
23506 #		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
23507 #		of decimal places needed to insure LEN integer digits	#
23508 #		in the output before conversion to bcd. LAMBDA is the	#
23509 #		sign of ISCALE, used in A9. Fp1 contains		#
23510 #		10^^(abs(ISCALE)) using a rounding mode which is a	#
23511 #		function of the original rounding mode and the signs	#
23512 #		of ISCALE and X.  A table is given in the code.		#
23513 #									#
23514 #	A8.	Clr INEX; Force RZ.					#
23515 #		The operation in A3 above may have set INEX2.		#
23516 #		RZ mode is forced for the scaling operation to insure	#
23517 #		only one rounding error.  The grs bits are collected in #
23518 #		the INEX flag for use in A10.				#
23519 #									#
23520 #	A9.	Scale X -> Y.						#
23521 #		The mantissa is scaled to the desired number of		#
23522 #		significant digits.  The excess digits are collected	#
23523 #		in INEX2.						#
23524 #									#
23525 #	A10.	Or in INEX.						#
23526 #		If INEX is set, round error occurred.  This is		#
23527 #		compensated for by 'or-ing' in the INEX2 flag to	#
23528 #		the lsb of Y.						#
23529 #									#
23530 #	A11.	Restore original FPCR; set size ext.			#
23531 #		Perform FINT operation in the user's rounding mode.	#
23532 #		Keep the size to extended.				#
23533 #									#
23534 #	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
23535 #		mode.  The FPSP routine sintd0 is used.  The output	#
23536 #		is in fp0.						#
23537 #									#
23538 #	A13.	Check for LEN digits.					#
23539 #		If the int operation results in more than LEN digits,	#
23540 #		or less than LEN -1 digits, adjust ILOG and repeat from	#
23541 #		A6.  This test occurs only on the first pass.  If the	#
23542 #		result is exactly 10^LEN, decrement ILOG and divide	#
23543 #		the mantissa by 10.					#
23544 #									#
23545 #	A14.	Convert the mantissa to bcd.				#
23546 #		The binstr routine is used to convert the LEN digit	#
23547 #		mantissa to bcd in memory.  The input to binstr is	#
23548 #		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
23549 #		such that the decimal point is to the left of bit 63.	#
23550 #		The bcd digits are stored in the correct position in	#
23551 #		the final string area in memory.			#
23552 #									#
23553 #	A15.	Convert the exponent to bcd.				#
23554 #		As in A14 above, the exp is converted to bcd and the	#
23555 #		digits are stored in the final string.			#
23556 #		Test the length of the final exponent string.  If the	#
23557 #		length is 4, set operr.					#
23558 #									#
23559 #	A16.	Write sign bits to final string.			#
23560 #									#
23561 #########################################################################
23562 
23563 set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
23564 
23565 # Constants in extended precision
23566 PLOG2:
23567 	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23568 PLOG2UP1:
23569 	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23570 
23571 # Constants in single precision
23572 FONE:
23573 	long		0x3F800000,0x00000000,0x00000000,0x00000000
23574 FTWO:
23575 	long		0x40000000,0x00000000,0x00000000,0x00000000
23576 FTEN:
23577 	long		0x41200000,0x00000000,0x00000000,0x00000000
23578 F4933:
23579 	long		0x459A2800,0x00000000,0x00000000,0x00000000
23580 
23581 RBDTBL:
23582 	byte		0,0,0,0
23583 	byte		3,3,2,2
23584 	byte		3,2,2,3
23585 	byte		2,3,3,2
23586 
23587 #	Implementation Notes:
23588 #
23589 #	The registers are used as follows:
23590 #
23591 #		d0: scratch; LEN input to binstr
23592 #		d1: scratch
23593 #		d2: upper 32-bits of mantissa for binstr
23594 #		d3: scratch;lower 32-bits of mantissa for binstr
23595 #		d4: LEN
23596 #		d5: LAMBDA/ICTR
23597 #		d6: ILOG
23598 #		d7: k-factor
23599 #		a0: ptr for original operand/final result
23600 #		a1: scratch pointer
23601 #		a2: pointer to FP_X; abs(original value) in ext
23602 #		fp0: scratch
23603 #		fp1: scratch
23604 #		fp2: scratch
23605 #		F_SCR1:
23606 #		F_SCR2:
23607 #		L_SCR1:
23608 #		L_SCR2:
23609 
23610 	global		bindec
23611 bindec:
23612 	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
23613 	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
23614 
23615 # A1. Set RM and size ext. Set SIGMA = sign input;
23616 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
23617 #     separating  normalized/denormalized input.  If the input
23618 #     is a denormalized number, set the BINDEC_FLG memory word
23619 #     to signal denorm.  If the input is unnormalized, normalize
23620 #     the input and test for denormalized result.
23621 #
23622 	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
23623 	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
23624 	mov.l		%d0,%d7		# move k-factor to d7
23625 
23626 	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
23627 	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
23628 	bne.w		A2_str		# no; input is a NORM
23629 
23630 #
23631 # Normalize the denorm
23632 #
23633 un_de_norm:
23634 	mov.w		(%a0),%d0
23635 	and.w		&0x7fff,%d0	# strip sign of normalized exp
23636 	mov.l		4(%a0),%d1
23637 	mov.l		8(%a0),%d2
23638 norm_loop:
23639 	sub.w		&1,%d0
23640 	lsl.l		&1,%d2
23641 	roxl.l		&1,%d1
23642 	tst.l		%d1
23643 	bge.b		norm_loop
23644 #
23645 # Test if the normalized input is denormalized
23646 #
23647 	tst.w		%d0
23648 	bgt.b		pos_exp		# if greater than zero, it is a norm
23649 	st		BINDEC_FLG(%a6)	# set flag for denorm
23650 pos_exp:
23651 	and.w		&0x7fff,%d0	# strip sign of normalized exp
23652 	mov.w		%d0,(%a0)
23653 	mov.l		%d1,4(%a0)
23654 	mov.l		%d2,8(%a0)
23655 
23656 # A2. Set X = abs(input).
23657 #
23658 A2_str:
23659 	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
23660 	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
23661 	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
23662 	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
23663 
23664 # A3. Compute ILOG.
23665 #     ILOG is the log base 10 of the input value.  It is approx-
23666 #     imated by adding e + 0.f when the original value is viewed
23667 #     as 2^^e * 1.f in extended precision.  This value is stored
23668 #     in d6.
23669 #
23670 # Register usage:
23671 #	Input/Output
23672 #	d0: k-factor/exponent
23673 #	d2: x/x
23674 #	d3: x/x
23675 #	d4: x/x
23676 #	d5: x/x
23677 #	d6: x/ILOG
23678 #	d7: k-factor/Unchanged
23679 #	a0: ptr for original operand/final result
23680 #	a1: x/x
23681 #	a2: x/x
23682 #	fp0: x/float(ILOG)
23683 #	fp1: x/x
23684 #	fp2: x/x
23685 #	F_SCR1:x/x
23686 #	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23687 #	L_SCR1:x/x
23688 #	L_SCR2:first word of X packed/Unchanged
23689 
23690 	tst.b		BINDEC_FLG(%a6)	# check for denorm
23691 	beq.b		A3_cont		# if clr, continue with norm
23692 	mov.l		&-4933,%d6	# force ILOG = -4933
23693 	bra.b		A4_str
23694 A3_cont:
23695 	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
23696 	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
23697 	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
23698 	sub.w		&0x3fff,%d0	# strip off bias
23699 	fadd.w		%d0,%fp0	# add in exp
23700 	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
23701 	fbge.w		pos_res		# if pos, branch
23702 	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
23703 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23704 	bra.b		A4_str		# go move out ILOG
23705 pos_res:
23706 	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
23707 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23708 
23709 
23710 # A4. Clr INEX bit.
23711 #     The operation in A3 above may have set INEX2.
23712 
23713 A4_str:
23714 	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
23715 
23716 
23717 # A5. Set ICTR = 0;
23718 #     ICTR is a flag used in A13.  It must be set before the
23719 #     loop entry A6. The lower word of d5 is used for ICTR.
23720 
23721 	clr.w		%d5		# clear ICTR
23722 
23723 # A6. Calculate LEN.
23724 #     LEN is the number of digits to be displayed.  The k-factor
23725 #     can dictate either the total number of digits, if it is
23726 #     a positive number, or the number of digits after the
23727 #     original decimal point which are to be included as
23728 #     significant.  See the 68882 manual for examples.
23729 #     If LEN is computed to be greater than 17, set OPERR in
23730 #     USER_FPSR.  LEN is stored in d4.
23731 #
23732 # Register usage:
23733 #	Input/Output
23734 #	d0: exponent/Unchanged
23735 #	d2: x/x/scratch
23736 #	d3: x/x
23737 #	d4: exc picture/LEN
23738 #	d5: ICTR/Unchanged
23739 #	d6: ILOG/Unchanged
23740 #	d7: k-factor/Unchanged
23741 #	a0: ptr for original operand/final result
23742 #	a1: x/x
23743 #	a2: x/x
23744 #	fp0: float(ILOG)/Unchanged
23745 #	fp1: x/x
23746 #	fp2: x/x
23747 #	F_SCR1:x/x
23748 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23749 #	L_SCR1:x/x
23750 #	L_SCR2:first word of X packed/Unchanged
23751 
23752 A6_str:
23753 	tst.l		%d7		# branch on sign of k
23754 	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
23755 	mov.l		%d7,%d4		# if k > 0, LEN = k
23756 	bra.b		len_ck		# skip to LEN check
23757 k_neg:
23758 	mov.l		%d6,%d4		# first load ILOG to d4
23759 	sub.l		%d7,%d4		# subtract off k
23760 	addq.l		&1,%d4		# add in the 1
23761 len_ck:
23762 	tst.l		%d4		# LEN check: branch on sign of LEN
23763 	ble.b		LEN_ng		# if neg, set LEN = 1
23764 	cmp.l		%d4,&17		# test if LEN > 17
23765 	ble.b		A7_str		# if not, forget it
23766 	mov.l		&17,%d4		# set max LEN = 17
23767 	tst.l		%d7		# if negative, never set OPERR
23768 	ble.b		A7_str		# if positive, continue
23769 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
23770 	bra.b		A7_str		# finished here
23771 LEN_ng:
23772 	mov.l		&1,%d4		# min LEN is 1
23773 
23774 
23775 # A7. Calculate SCALE.
23776 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
23777 #     of decimal places needed to insure LEN integer digits
23778 #     in the output before conversion to bcd. LAMBDA is the sign
23779 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
23780 #     the rounding mode as given in the following table (see
23781 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
23782 #     of opposite sign in bindec.sa from Coonen).
23783 #
23784 #	Initial					USE
23785 #	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
23786 #	----------------------------------------------
23787 #	 RN	00	   0	   0		00/0	RN
23788 #	 RN	00	   0	   1		00/0	RN
23789 #	 RN	00	   1	   0		00/0	RN
23790 #	 RN	00	   1	   1		00/0	RN
23791 #	 RZ	01	   0	   0		11/3	RP
23792 #	 RZ	01	   0	   1		11/3	RP
23793 #	 RZ	01	   1	   0		10/2	RM
23794 #	 RZ	01	   1	   1		10/2	RM
23795 #	 RM	10	   0	   0		11/3	RP
23796 #	 RM	10	   0	   1		10/2	RM
23797 #	 RM	10	   1	   0		10/2	RM
23798 #	 RM	10	   1	   1		11/3	RP
23799 #	 RP	11	   0	   0		10/2	RM
23800 #	 RP	11	   0	   1		11/3	RP
23801 #	 RP	11	   1	   0		11/3	RP
23802 #	 RP	11	   1	   1		10/2	RM
23803 #
23804 # Register usage:
23805 #	Input/Output
23806 #	d0: exponent/scratch - final is 0
23807 #	d2: x/0 or 24 for A9
23808 #	d3: x/scratch - offset ptr into PTENRM array
23809 #	d4: LEN/Unchanged
23810 #	d5: 0/ICTR:LAMBDA
23811 #	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23812 #	d7: k-factor/Unchanged
23813 #	a0: ptr for original operand/final result
23814 #	a1: x/ptr to PTENRM array
23815 #	a2: x/x
23816 #	fp0: float(ILOG)/Unchanged
23817 #	fp1: x/10^ISCALE
23818 #	fp2: x/x
23819 #	F_SCR1:x/x
23820 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23821 #	L_SCR1:x/x
23822 #	L_SCR2:first word of X packed/Unchanged
23823 
23824 A7_str:
23825 	tst.l		%d7		# test sign of k
23826 	bgt.b		k_pos		# if pos and > 0, skip this
23827 	cmp.l		%d7,%d6		# test k - ILOG
23828 	blt.b		k_pos		# if ILOG >= k, skip this
23829 	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
23830 k_pos:
23831 	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
23832 	addq.l		&1,%d0		# add the 1
23833 	sub.l		%d4,%d0		# sub off LEN
23834 	swap		%d5		# use upper word of d5 for LAMBDA
23835 	clr.w		%d5		# set it zero initially
23836 	clr.w		%d2		# set up d2 for very small case
23837 	tst.l		%d0		# test sign of ISCALE
23838 	bge.b		iscale		# if pos, skip next inst
23839 	addq.w		&1,%d5		# if neg, set LAMBDA true
23840 	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
23841 	bgt.b		no_inf		# if false, skip rest
23842 	add.l		&24,%d0		# add in 24 to iscale
23843 	mov.l		&24,%d2		# put 24 in d2 for A9
23844 no_inf:
23845 	neg.l		%d0		# and take abs of ISCALE
23846 iscale:
23847 	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
23848 	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
23849 	lsl.w		&1,%d1		# put them in bits 2:1
23850 	add.w		%d5,%d1		# add in LAMBDA
23851 	lsl.w		&1,%d1		# put them in bits 3:1
23852 	tst.l		L_SCR2(%a6)	# test sign of original x
23853 	bge.b		x_pos		# if pos, don't set bit 0
23854 	addq.l		&1,%d1		# if neg, set bit 0
23855 x_pos:
23856 	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
23857 	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
23858 	lsl.l		&4,%d3		# put bits in proper position
23859 	fmov.l		%d3,%fpcr	# load bits into fpu
23860 	lsr.l		&4,%d3		# put bits in proper position
23861 	tst.b		%d3		# decode new rmode for pten table
23862 	bne.b		not_rn		# if zero, it is RN
23863 	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
23864 	bra.b		rmode		# exit decode
23865 not_rn:
23866 	lsr.b		&1,%d3		# get lsb in carry
23867 	bcc.b		not_rp2		# if carry clear, it is RM
23868 	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
23869 	bra.b		rmode		# exit decode
23870 not_rp2:
23871 	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
23872 rmode:
23873 	clr.l		%d3		# clr table index
23874 e_loop2:
23875 	lsr.l		&1,%d0		# shift next bit into carry
23876 	bcc.b		e_next2		# if zero, skip the mul
23877 	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
23878 e_next2:
23879 	add.l		&12,%d3		# inc d3 to next pwrten table entry
23880 	tst.l		%d0		# test if ISCALE is zero
23881 	bne.b		e_loop2		# if not, loop
23882 
23883 # A8. Clr INEX; Force RZ.
23884 #     The operation in A3 above may have set INEX2.
23885 #     RZ mode is forced for the scaling operation to insure
23886 #     only one rounding error.  The grs bits are collected in
23887 #     the INEX flag for use in A10.
23888 #
23889 # Register usage:
23890 #	Input/Output
23891 
23892 	fmov.l		&0,%fpsr	# clr INEX
23893 	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
23894 
23895 # A9. Scale X -> Y.
23896 #     The mantissa is scaled to the desired number of significant
23897 #     digits.  The excess digits are collected in INEX2. If mul,
23898 #     Check d2 for excess 10 exponential value.  If not zero,
23899 #     the iscale value would have caused the pwrten calculation
23900 #     to overflow.  Only a negative iscale can cause this, so
23901 #     multiply by 10^(d2), which is now only allowed to be 24,
23902 #     with a multiply by 10^8 and 10^16, which is exact since
23903 #     10^24 is exact.  If the input was denormalized, we must
23904 #     create a busy stack frame with the mul command and the
23905 #     two operands, and allow the fpu to complete the multiply.
23906 #
23907 # Register usage:
23908 #	Input/Output
23909 #	d0: FPCR with RZ mode/Unchanged
23910 #	d2: 0 or 24/unchanged
23911 #	d3: x/x
23912 #	d4: LEN/Unchanged
23913 #	d5: ICTR:LAMBDA
23914 #	d6: ILOG/Unchanged
23915 #	d7: k-factor/Unchanged
23916 #	a0: ptr for original operand/final result
23917 #	a1: ptr to PTENRM array/Unchanged
23918 #	a2: x/x
23919 #	fp0: float(ILOG)/X adjusted for SCALE (Y)
23920 #	fp1: 10^ISCALE/Unchanged
23921 #	fp2: x/x
23922 #	F_SCR1:x/x
23923 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23924 #	L_SCR1:x/x
23925 #	L_SCR2:first word of X packed/Unchanged
23926 
23927 A9_str:
23928 	fmov.x		(%a0),%fp0	# load X from memory
23929 	fabs.x		%fp0		# use abs(X)
23930 	tst.w		%d5		# LAMBDA is in lower word of d5
23931 	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
23932 	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
23933 	bra.w		A10_st		# branch to A10
23934 
23935 sc_mul:
23936 	tst.b		BINDEC_FLG(%a6)	# check for denorm
23937 	beq.w		A9_norm		# if norm, continue with mul
23938 
23939 # for DENORM, we must calculate:
23940 #	fp0 = input_op * 10^ISCALE * 10^24
23941 # since the input operand is a DENORM, we can't multiply it directly.
23942 # so, we do the multiplication of the exponents and mantissas separately.
23943 # in this way, we avoid underflow on intermediate stages of the
23944 # multiplication and guarantee a result without exception.
23945 	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
23946 
23947 	mov.w		(%sp),%d3	# grab exponent
23948 	andi.w		&0x7fff,%d3	# clear sign
23949 	ori.w		&0x8000,(%a0)	# make DENORM exp negative
23950 	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
23951 	subi.w		&0x3fff,%d3	# subtract BIAS
23952 	add.w		36(%a1),%d3
23953 	subi.w		&0x3fff,%d3	# subtract BIAS
23954 	add.w		48(%a1),%d3
23955 	subi.w		&0x3fff,%d3	# subtract BIAS
23956 
23957 	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
23958 
23959 	andi.w		&0x8000,(%sp)	# keep sign
23960 	or.w		%d3,(%sp)	# insert new exponent
23961 	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
23962 	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
23963 	mov.l		0x4(%a0),-(%sp)
23964 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23965 	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
23966 	fmul.x		(%sp)+,%fp0
23967 
23968 #	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
23969 #	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
23970 	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
23971 	mov.l		36+4(%a1),-(%sp)
23972 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23973 	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
23974 	mov.l		48+4(%a1),-(%sp)
23975 	mov.l		&0x3fff0000,-(%sp)# force exp to zero
23976 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
23977 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
23978 	bra.b		A10_st
23979 
23980 sc_mul_err:
23981 	bra.b		sc_mul_err
23982 
23983 A9_norm:
23984 	tst.w		%d2		# test for small exp case
23985 	beq.b		A9_con		# if zero, continue as normal
23986 	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
23987 	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
23988 A9_con:
23989 	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
23990 
23991 # A10. Or in INEX.
23992 #      If INEX is set, round error occurred.  This is compensated
23993 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
23994 #
23995 # Register usage:
23996 #	Input/Output
23997 #	d0: FPCR with RZ mode/FPSR with INEX2 isolated
23998 #	d2: x/x
23999 #	d3: x/x
24000 #	d4: LEN/Unchanged
24001 #	d5: ICTR:LAMBDA
24002 #	d6: ILOG/Unchanged
24003 #	d7: k-factor/Unchanged
24004 #	a0: ptr for original operand/final result
24005 #	a1: ptr to PTENxx array/Unchanged
24006 #	a2: x/ptr to FP_SCR1(a6)
24007 #	fp0: Y/Y with lsb adjusted
24008 #	fp1: 10^ISCALE/Unchanged
24009 #	fp2: x/x
24010 
24011 A10_st:
24012 	fmov.l		%fpsr,%d0	# get FPSR
24013 	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
24014 	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
24015 	btst		&9,%d0		# check if INEX2 set
24016 	beq.b		A11_st		# if clear, skip rest
24017 	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
24018 	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
24019 
24020 
24021 # A11. Restore original FPCR; set size ext.
24022 #      Perform FINT operation in the user's rounding mode.  Keep
24023 #      the size to extended.  The sintdo entry point in the sint
24024 #      routine expects the FPCR value to be in USER_FPCR for
24025 #      mode and precision.  The original FPCR is saved in L_SCR1.
24026 
24027 A11_st:
24028 	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
24029 	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
24030 #					;block exceptions
24031 
24032 
24033 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24034 #      The FPSP routine sintd0 is used.  The output is in fp0.
24035 #
24036 # Register usage:
24037 #	Input/Output
24038 #	d0: FPSR with AINEX cleared/FPCR with size set to ext
24039 #	d2: x/x/scratch
24040 #	d3: x/x
24041 #	d4: LEN/Unchanged
24042 #	d5: ICTR:LAMBDA/Unchanged
24043 #	d6: ILOG/Unchanged
24044 #	d7: k-factor/Unchanged
24045 #	a0: ptr for original operand/src ptr for sintdo
24046 #	a1: ptr to PTENxx array/Unchanged
24047 #	a2: ptr to FP_SCR1(a6)/Unchanged
24048 #	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24049 #	fp0: Y/YINT
24050 #	fp1: 10^ISCALE/Unchanged
24051 #	fp2: x/x
24052 #	F_SCR1:x/x
24053 #	F_SCR2:Y adjusted for inex/Y with original exponent
24054 #	L_SCR1:x/original USER_FPCR
24055 #	L_SCR2:first word of X packed/Unchanged
24056 
24057 A12_st:
24058 	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
24059 	mov.l	L_SCR1(%a6),-(%sp)
24060 	mov.l	L_SCR2(%a6),-(%sp)
24061 
24062 	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
24063 	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
24064 	tst.l		L_SCR2(%a6)	# test sign of original operand
24065 	bge.b		do_fint12		# if pos, use Y
24066 	or.l		&0x80000000,(%a0)	# if neg, use -Y
24067 do_fint12:
24068 	mov.l	USER_FPSR(%a6),-(%sp)
24069 #	bsr	sintdo		# sint routine returns int in fp0
24070 
24071 	fmov.l	USER_FPCR(%a6),%fpcr
24072 	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
24073 ##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
24074 ##	andi.l		&0x00000030,%d0
24075 ##	fmov.l		%d0,%fpcr
24076 	fint.x		FP_SCR1(%a6),%fp0	# do fint()
24077 	fmov.l	%fpsr,%d0
24078 	or.w	%d0,FPSR_EXCEPT(%a6)
24079 ##	fmov.l		&0x0,%fpcr
24080 ##	fmov.l		%fpsr,%d0		# don't keep ccodes
24081 ##	or.w		%d0,FPSR_EXCEPT(%a6)
24082 
24083 	mov.b	(%sp),USER_FPSR(%a6)
24084 	add.l	&4,%sp
24085 
24086 	mov.l	(%sp)+,L_SCR2(%a6)
24087 	mov.l	(%sp)+,L_SCR1(%a6)
24088 	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
24089 
24090 	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
24091 	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
24092 
24093 # A13. Check for LEN digits.
24094 #      If the int operation results in more than LEN digits,
24095 #      or less than LEN -1 digits, adjust ILOG and repeat from
24096 #      A6.  This test occurs only on the first pass.  If the
24097 #      result is exactly 10^LEN, decrement ILOG and divide
24098 #      the mantissa by 10.  The calculation of 10^LEN cannot
24099 #      be inexact, since all powers of ten up to 10^27 are exact
24100 #      in extended precision, so the use of a previous power-of-ten
24101 #      table will introduce no error.
24102 #
24103 #
24104 # Register usage:
24105 #	Input/Output
24106 #	d0: FPCR with size set to ext/scratch final = 0
24107 #	d2: x/x
24108 #	d3: x/scratch final = x
24109 #	d4: LEN/LEN adjusted
24110 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
24111 #	d6: ILOG/ILOG adjusted
24112 #	d7: k-factor/Unchanged
24113 #	a0: pointer into memory for packed bcd string formation
24114 #	a1: ptr to PTENxx array/Unchanged
24115 #	a2: ptr to FP_SCR1(a6)/Unchanged
24116 #	fp0: int portion of Y/abs(YINT) adjusted
24117 #	fp1: 10^ISCALE/Unchanged
24118 #	fp2: x/10^LEN
24119 #	F_SCR1:x/x
24120 #	F_SCR2:Y with original exponent/Unchanged
24121 #	L_SCR1:original USER_FPCR/Unchanged
24122 #	L_SCR2:first word of X packed/Unchanged
24123 
24124 A13_st:
24125 	swap		%d5		# put ICTR in lower word of d5
24126 	tst.w		%d5		# check if ICTR = 0
24127 	bne		not_zr		# if non-zero, go to second test
24128 #
24129 # Compute 10^(LEN-1)
24130 #
24131 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24132 	mov.l		%d4,%d0		# put LEN in d0
24133 	subq.l		&1,%d0		# d0 = LEN -1
24134 	clr.l		%d3		# clr table index
24135 l_loop:
24136 	lsr.l		&1,%d0		# shift next bit into carry
24137 	bcc.b		l_next		# if zero, skip the mul
24138 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24139 l_next:
24140 	add.l		&12,%d3		# inc d3 to next pwrten table entry
24141 	tst.l		%d0		# test if LEN is zero
24142 	bne.b		l_loop		# if not, loop
24143 #
24144 # 10^LEN-1 is computed for this test and A14.  If the input was
24145 # denormalized, check only the case in which YINT > 10^LEN.
24146 #
24147 	tst.b		BINDEC_FLG(%a6)	# check if input was norm
24148 	beq.b		A13_con		# if norm, continue with checking
24149 	fabs.x		%fp0		# take abs of YINT
24150 	bra		test_2
24151 #
24152 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24153 #
24154 A13_con:
24155 	fabs.x		%fp0		# take abs of YINT
24156 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
24157 	fbge.w		test_2		# if greater, do next test
24158 	subq.l		&1,%d6		# subtract 1 from ILOG
24159 	mov.w		&1,%d5		# set ICTR
24160 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24161 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24162 	bra.w		A6_str		# return to A6 and recompute YINT
24163 test_2:
24164 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24165 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
24166 	fblt.w		A14_st		# if less, all is ok, go to A14
24167 	fbgt.w		fix_ex		# if greater, fix and redo
24168 	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
24169 	addq.l		&1,%d6		# and inc ILOG
24170 	bra.b		A14_st		# and continue elsewhere
24171 fix_ex:
24172 	addq.l		&1,%d6		# increment ILOG by 1
24173 	mov.w		&1,%d5		# set ICTR
24174 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24175 	bra.w		A6_str		# return to A6 and recompute YINT
24176 #
24177 # Since ICTR <> 0, we have already been through one adjustment,
24178 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24179 # 10^LEN is again computed using whatever table is in a1 since the
24180 # value calculated cannot be inexact.
24181 #
24182 not_zr:
24183 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24184 	mov.l		%d4,%d0		# put LEN in d0
24185 	clr.l		%d3		# clr table index
24186 z_loop:
24187 	lsr.l		&1,%d0		# shift next bit into carry
24188 	bcc.b		z_next		# if zero, skip the mul
24189 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24190 z_next:
24191 	add.l		&12,%d3		# inc d3 to next pwrten table entry
24192 	tst.l		%d0		# test if LEN is zero
24193 	bne.b		z_loop		# if not, loop
24194 	fabs.x		%fp0		# get abs(YINT)
24195 	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
24196 	fbneq.w		A14_st		# if not, skip this
24197 	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
24198 	addq.l		&1,%d6		# and inc ILOG by 1
24199 	addq.l		&1,%d4		# and inc LEN
24200 	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
24201 
24202 # A14. Convert the mantissa to bcd.
24203 #      The binstr routine is used to convert the LEN digit
24204 #      mantissa to bcd in memory.  The input to binstr is
24205 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24206 #      such that the decimal point is to the left of bit 63.
24207 #      The bcd digits are stored in the correct position in
24208 #      the final string area in memory.
24209 #
24210 #
24211 # Register usage:
24212 #	Input/Output
24213 #	d0: x/LEN call to binstr - final is 0
24214 #	d1: x/0
24215 #	d2: x/ms 32-bits of mant of abs(YINT)
24216 #	d3: x/ls 32-bits of mant of abs(YINT)
24217 #	d4: LEN/Unchanged
24218 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
24219 #	d6: ILOG
24220 #	d7: k-factor/Unchanged
24221 #	a0: pointer into memory for packed bcd string formation
24222 #	    /ptr to first mantissa byte in result string
24223 #	a1: ptr to PTENxx array/Unchanged
24224 #	a2: ptr to FP_SCR1(a6)/Unchanged
24225 #	fp0: int portion of Y/abs(YINT) adjusted
24226 #	fp1: 10^ISCALE/Unchanged
24227 #	fp2: 10^LEN/Unchanged
24228 #	F_SCR1:x/Work area for final result
24229 #	F_SCR2:Y with original exponent/Unchanged
24230 #	L_SCR1:original USER_FPCR/Unchanged
24231 #	L_SCR2:first word of X packed/Unchanged
24232 
24233 A14_st:
24234 	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
24235 	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
24236 	lea.l		FP_SCR0(%a6),%a0
24237 	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
24238 	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
24239 	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
24240 	clr.l		4(%a0)		# zero word 2 of FP_RES
24241 	clr.l		8(%a0)		# zero word 3 of FP_RES
24242 	mov.l		(%a0),%d0	# move exponent to d0
24243 	swap		%d0		# put exponent in lower word
24244 	beq.b		no_sft		# if zero, don't shift
24245 	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
24246 	tst.l		%d0		# check if > 1
24247 	bgt.b		no_sft		# if so, don't shift
24248 	neg.l		%d0		# make exp positive
24249 m_loop:
24250 	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
24251 	roxr.l		&1,%d3		# the number of places
24252 	dbf.w		%d0,m_loop	# given in d0
24253 no_sft:
24254 	tst.l		%d2		# check for mantissa of zero
24255 	bne.b		no_zr		# if not, go on
24256 	tst.l		%d3		# continue zero check
24257 	beq.b		zer_m		# if zero, go directly to binstr
24258 no_zr:
24259 	clr.l		%d1		# put zero in d1 for addx
24260 	add.l		&0x00000080,%d3	# inc at bit 7
24261 	addx.l		%d1,%d2		# continue inc
24262 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24263 zer_m:
24264 	mov.l		%d4,%d0		# put LEN in d0 for binstr call
24265 	addq.l		&3,%a0		# a0 points to M16 byte in result
24266 	bsr		binstr		# call binstr to convert mant
24267 
24268 
24269 # A15. Convert the exponent to bcd.
24270 #      As in A14 above, the exp is converted to bcd and the
24271 #      digits are stored in the final string.
24272 #
24273 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
24274 #
24275 #	 32               16 15                0
24276 #	-----------------------------------------
24277 #	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
24278 #	-----------------------------------------
24279 #
24280 # And are moved into their proper places in FP_SCR0.  If digit e4
24281 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
24282 # written as specified in the 881/882 manual for packed decimal.
24283 #
24284 # Register usage:
24285 #	Input/Output
24286 #	d0: x/LEN call to binstr - final is 0
24287 #	d1: x/scratch (0);shift count for final exponent packing
24288 #	d2: x/ms 32-bits of exp fraction/scratch
24289 #	d3: x/ls 32-bits of exp fraction
24290 #	d4: LEN/Unchanged
24291 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
24292 #	d6: ILOG
24293 #	d7: k-factor/Unchanged
24294 #	a0: ptr to result string/ptr to L_SCR1(a6)
24295 #	a1: ptr to PTENxx array/Unchanged
24296 #	a2: ptr to FP_SCR1(a6)/Unchanged
24297 #	fp0: abs(YINT) adjusted/float(ILOG)
24298 #	fp1: 10^ISCALE/Unchanged
24299 #	fp2: 10^LEN/Unchanged
24300 #	F_SCR1:Work area for final result/BCD result
24301 #	F_SCR2:Y with original exponent/ILOG/10^4
24302 #	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24303 #	L_SCR2:first word of X packed/Unchanged
24304 
24305 A15_st:
24306 	tst.b		BINDEC_FLG(%a6)	# check for denorm
24307 	beq.b		not_denorm
24308 	ftest.x		%fp0		# test for zero
24309 	fbeq.w		den_zero	# if zero, use k-factor or 4933
24310 	fmov.l		%d6,%fp0	# float ILOG
24311 	fabs.x		%fp0		# get abs of ILOG
24312 	bra.b		convrt
24313 den_zero:
24314 	tst.l		%d7		# check sign of the k-factor
24315 	blt.b		use_ilog	# if negative, use ILOG
24316 	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
24317 	bra.b		convrt		# do it
24318 use_ilog:
24319 	fmov.l		%d6,%fp0	# float ILOG
24320 	fabs.x		%fp0		# get abs of ILOG
24321 	bra.b		convrt
24322 not_denorm:
24323 	ftest.x		%fp0		# test for zero
24324 	fbneq.w		not_zero	# if zero, force exponent
24325 	fmov.s		FONE(%pc),%fp0	# force exponent to 1
24326 	bra.b		convrt		# do it
24327 not_zero:
24328 	fmov.l		%d6,%fp0	# float ILOG
24329 	fabs.x		%fp0		# get abs of ILOG
24330 convrt:
24331 	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
24332 	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
24333 	mov.l		4(%a2),%d2	# move word 2 to d2
24334 	mov.l		8(%a2),%d3	# move word 3 to d3
24335 	mov.w		(%a2),%d0	# move exp to d0
24336 	beq.b		x_loop_fin	# if zero, skip the shift
24337 	sub.w		&0x3ffd,%d0	# subtract off bias
24338 	neg.w		%d0		# make exp positive
24339 x_loop:
24340 	lsr.l		&1,%d2		# shift d2:d3 right
24341 	roxr.l		&1,%d3		# the number of places
24342 	dbf.w		%d0,x_loop	# given in d0
24343 x_loop_fin:
24344 	clr.l		%d1		# put zero in d1 for addx
24345 	add.l		&0x00000080,%d3	# inc at bit 6
24346 	addx.l		%d1,%d2		# continue inc
24347 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24348 	mov.l		&4,%d0		# put 4 in d0 for binstr call
24349 	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
24350 	bsr		binstr		# call binstr to convert exp
24351 	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
24352 	mov.l		&12,%d1		# use d1 for shift count
24353 	lsr.l		%d1,%d0		# shift d0 right by 12
24354 	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
24355 	lsr.l		%d1,%d0		# shift d0 right by 12
24356 	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
24357 	tst.b		%d0		# check if e4 is zero
24358 	beq.b		A16_st		# if zero, skip rest
24359 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
24360 
24361 
24362 # A16. Write sign bits to final string.
24363 #	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24364 #
24365 # Register usage:
24366 #	Input/Output
24367 #	d0: x/scratch - final is x
24368 #	d2: x/x
24369 #	d3: x/x
24370 #	d4: LEN/Unchanged
24371 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
24372 #	d6: ILOG/ILOG adjusted
24373 #	d7: k-factor/Unchanged
24374 #	a0: ptr to L_SCR1(a6)/Unchanged
24375 #	a1: ptr to PTENxx array/Unchanged
24376 #	a2: ptr to FP_SCR1(a6)/Unchanged
24377 #	fp0: float(ILOG)/Unchanged
24378 #	fp1: 10^ISCALE/Unchanged
24379 #	fp2: 10^LEN/Unchanged
24380 #	F_SCR1:BCD result with correct signs
24381 #	F_SCR2:ILOG/10^4
24382 #	L_SCR1:Exponent digits on return from binstr
24383 #	L_SCR2:first word of X packed/Unchanged
24384 
24385 A16_st:
24386 	clr.l		%d0		# clr d0 for collection of signs
24387 	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
24388 	tst.l		L_SCR2(%a6)	# check sign of original mantissa
24389 	bge.b		mant_p		# if pos, don't set SM
24390 	mov.l		&2,%d0		# move 2 in to d0 for SM
24391 mant_p:
24392 	tst.l		%d6		# check sign of ILOG
24393 	bge.b		wr_sgn		# if pos, don't set SE
24394 	addq.l		&1,%d0		# set bit 0 in d0 for SE
24395 wr_sgn:
24396 	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
24397 
24398 # Clean up and restore all registers used.
24399 
24400 	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
24401 	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
24402 	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
24403 	rts
24404 
24405 	global		PTENRN
24406 PTENRN:
24407 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24408 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24409 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24410 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24411 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24412 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24413 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24414 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24415 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24416 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24417 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24418 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24419 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24420 
24421 	global		PTENRP
24422 PTENRP:
24423 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24424 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24425 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24426 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24427 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24428 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24429 	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
24430 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24431 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24432 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24433 	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
24434 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24435 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24436 
24437 	global		PTENRM
24438 PTENRM:
24439 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24440 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24441 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24442 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24443 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24444 	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
24445 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24446 	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
24447 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
24448 	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
24449 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24450 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
24451 	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
24452 
24453 #########################################################################
24454 # binstr(): Converts a 64-bit binary integer to bcd.			#
24455 #									#
24456 # INPUT *************************************************************** #
24457 #	d2:d3 = 64-bit binary integer					#
24458 #	d0    = desired length (LEN)					#
24459 #	a0    = pointer to start in memory for bcd characters		#
24460 #		(This pointer must point to byte 4 of the first		#
24461 #		 lword of the packed decimal memory string.)		#
24462 #									#
24463 # OUTPUT ************************************************************** #
24464 #	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
24465 #									#
24466 # ALGORITHM ***********************************************************	#
24467 #	The 64-bit binary is assumed to have a decimal point before	#
24468 #	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
24469 #	shift and a mul by 8 shift.  The bits shifted out of the	#
24470 #	msb form a decimal digit.  This process is iterated until	#
24471 #	LEN digits are formed.						#
24472 #									#
24473 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
24474 #     digit formed will be assumed the least significant.  This is	#
24475 #     to force the first byte formed to have a 0 in the upper 4 bits.	#
24476 #									#
24477 # A2. Beginning of the loop:						#
24478 #     Copy the fraction in d2:d3 to d4:d5.				#
24479 #									#
24480 # A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
24481 #     extracts and shifts.  The three msbs from d2 will go into d1.	#
24482 #									#
24483 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
24484 #     will be collected by the carry.					#
24485 #									#
24486 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
24487 #     into d2:d3.  D1 will contain the bcd digit formed.		#
24488 #									#
24489 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
24490 #     zero, it is the ls digit.  Put the digit in its place in the	#
24491 #     upper word of d0.  If it is the ls digit, write the word		#
24492 #     from d0 to memory.						#
24493 #									#
24494 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
24495 #									#
24496 #########################################################################
24497 
24498 #	Implementation Notes:
24499 #
24500 #	The registers are used as follows:
24501 #
24502 #		d0: LEN counter
24503 #		d1: temp used to form the digit
24504 #		d2: upper 32-bits of fraction for mul by 8
24505 #		d3: lower 32-bits of fraction for mul by 8
24506 #		d4: upper 32-bits of fraction for mul by 2
24507 #		d5: lower 32-bits of fraction for mul by 2
24508 #		d6: temp for bit-field extracts
24509 #		d7: byte digit formation word;digit count {0,1}
24510 #		a0: pointer into memory for packed bcd string formation
24511 #
24512 
24513 	global		binstr
24514 binstr:
24515 	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
24516 
24517 #
24518 # A1: Init d7
24519 #
24520 	mov.l		&1,%d7		# init d7 for second digit
24521 	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
24522 #
24523 # A2. Copy d2:d3 to d4:d5.  Start loop.
24524 #
24525 loop:
24526 	mov.l		%d2,%d4		# copy the fraction before muls
24527 	mov.l		%d3,%d5		# to d4:d5
24528 #
24529 # A3. Multiply d2:d3 by 8; extract msbs into d1.
24530 #
24531 	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
24532 	asl.l		&3,%d2		# shift d2 left by 3 places
24533 	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
24534 	asl.l		&3,%d3		# shift d3 left by 3 places
24535 	or.l		%d6,%d2		# or in msbs from d3 into d2
24536 #
24537 # A4. Multiply d4:d5 by 2; add carry out to d1.
24538 #
24539 	asl.l		&1,%d5		# mul d5 by 2
24540 	roxl.l		&1,%d4		# mul d4 by 2
24541 	swap		%d6		# put 0 in d6 lower word
24542 	addx.w		%d6,%d1		# add in extend from mul by 2
24543 #
24544 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
24545 #
24546 	add.l		%d5,%d3		# add lower 32 bits
24547 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24548 	addx.l		%d4,%d2		# add with extend upper 32 bits
24549 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24550 	addx.w		%d6,%d1		# add in extend from add to d1
24551 	swap		%d6		# with d6 = 0; put 0 in upper word
24552 #
24553 # A6. Test d7 and branch.
24554 #
24555 	tst.w		%d7		# if zero, store digit & to loop
24556 	beq.b		first_d		# if non-zero, form byte & write
24557 sec_d:
24558 	swap		%d7		# bring first digit to word d7b
24559 	asl.w		&4,%d7		# first digit in upper 4 bits d7b
24560 	add.w		%d1,%d7		# add in ls digit to d7b
24561 	mov.b		%d7,(%a0)+	# store d7b byte in memory
24562 	swap		%d7		# put LEN counter in word d7a
24563 	clr.w		%d7		# set d7a to signal no digits done
24564 	dbf.w		%d0,loop	# do loop some more!
24565 	bra.b		end_bstr	# finished, so exit
24566 first_d:
24567 	swap		%d7		# put digit word in d7b
24568 	mov.w		%d1,%d7		# put new digit in d7b
24569 	swap		%d7		# put LEN counter in word d7a
24570 	addq.w		&1,%d7		# set d7a to signal first digit done
24571 	dbf.w		%d0,loop	# do loop some more!
24572 	swap		%d7		# put last digit in string
24573 	lsl.w		&4,%d7		# move it to upper 4 bits
24574 	mov.b		%d7,(%a0)+	# store it in memory string
24575 #
24576 # Clean up and return with result in fp0.
24577 #
24578 end_bstr:
24579 	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
24580 	rts
24581 
24582 #########################################################################
24583 # XDEF ****************************************************************	#
24584 #	facc_in_b(): dmem_read_byte failed				#
24585 #	facc_in_w(): dmem_read_word failed				#
24586 #	facc_in_l(): dmem_read_long failed				#
24587 #	facc_in_d(): dmem_read of dbl prec failed			#
24588 #	facc_in_x(): dmem_read of ext prec failed			#
24589 #									#
24590 #	facc_out_b(): dmem_write_byte failed				#
24591 #	facc_out_w(): dmem_write_word failed				#
24592 #	facc_out_l(): dmem_write_long failed				#
24593 #	facc_out_d(): dmem_write of dbl prec failed			#
24594 #	facc_out_x(): dmem_write of ext prec failed			#
24595 #									#
24596 # XREF ****************************************************************	#
24597 #	_real_access() - exit through access error handler		#
24598 #									#
24599 # INPUT ***************************************************************	#
24600 #	None								#
24601 #									#
24602 # OUTPUT **************************************************************	#
24603 #	None								#
24604 #									#
24605 # ALGORITHM ***********************************************************	#
24606 #	Flow jumps here when an FP data fetch call gets an error	#
24607 # result. This means the operating system wants an access error frame	#
24608 # made out of the current exception stack frame.			#
24609 #	So, we first call restore() which makes sure that any updated	#
24610 # -(an)+ register gets returned to its pre-exception value and then	#
24611 # we change the stack to an access error stack frame.			#
24612 #									#
24613 #########################################################################
24614 
24615 facc_in_b:
24616 	movq.l		&0x1,%d0			# one byte
24617 	bsr.w		restore				# fix An
24618 
24619 	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
24620 	bra.w		facc_finish
24621 
24622 facc_in_w:
24623 	movq.l		&0x2,%d0			# two bytes
24624 	bsr.w		restore				# fix An
24625 
24626 	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
24627 	bra.b		facc_finish
24628 
24629 facc_in_l:
24630 	movq.l		&0x4,%d0			# four bytes
24631 	bsr.w		restore				# fix An
24632 
24633 	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
24634 	bra.b		facc_finish
24635 
24636 facc_in_d:
24637 	movq.l		&0x8,%d0			# eight bytes
24638 	bsr.w		restore				# fix An
24639 
24640 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24641 	bra.b		facc_finish
24642 
24643 facc_in_x:
24644 	movq.l		&0xc,%d0			# twelve bytes
24645 	bsr.w		restore				# fix An
24646 
24647 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24648 	bra.b		facc_finish
24649 
24650 ################################################################
24651 
24652 facc_out_b:
24653 	movq.l		&0x1,%d0			# one byte
24654 	bsr.w		restore				# restore An
24655 
24656 	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
24657 	bra.b		facc_finish
24658 
24659 facc_out_w:
24660 	movq.l		&0x2,%d0			# two bytes
24661 	bsr.w		restore				# restore An
24662 
24663 	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
24664 	bra.b		facc_finish
24665 
24666 facc_out_l:
24667 	movq.l		&0x4,%d0			# four bytes
24668 	bsr.w		restore				# restore An
24669 
24670 	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
24671 	bra.b		facc_finish
24672 
24673 facc_out_d:
24674 	movq.l		&0x8,%d0			# eight bytes
24675 	bsr.w		restore				# restore An
24676 
24677 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24678 	bra.b		facc_finish
24679 
24680 facc_out_x:
24681 	mov.l		&0xc,%d0			# twelve bytes
24682 	bsr.w		restore				# restore An
24683 
24684 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24685 
24686 # here's where we actually create the access error frame from the
24687 # current exception stack frame.
24688 facc_finish:
24689 	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24690 
24691 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
24692 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24693 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
24694 
24695 	unlk		%a6
24696 
24697 	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
24698 	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
24699 	mov.l		0xc(%sp),0x8(%sp)	# store EA
24700 	mov.l		&0x00000001,0xc(%sp)	# store FSLW
24701 	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
24702 	mov.w		&0x4008,0x6(%sp)	# store voff
24703 
24704 	btst		&0x5,(%sp)		# supervisor or user mode?
24705 	beq.b		facc_out2		# user
24706 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
24707 
24708 facc_out2:
24709 	bra.l		_real_access
24710 
24711 ##################################################################
24712 
24713 # if the effective addressing mode was predecrement or postincrement,
24714 # the emulation has already changed its value to the correct post-
24715 # instruction value. but since we're exiting to the access error
24716 # handler, then AN must be returned to its pre-instruction value.
24717 # we do that here.
24718 restore:
24719 	mov.b		EXC_OPWORD+0x1(%a6),%d1
24720 	andi.b		&0x38,%d1		# extract opmode
24721 	cmpi.b		%d1,&0x18		# postinc?
24722 	beq.w		rest_inc
24723 	cmpi.b		%d1,&0x20		# predec?
24724 	beq.w		rest_dec
24725 	rts
24726 
24727 rest_inc:
24728 	mov.b		EXC_OPWORD+0x1(%a6),%d1
24729 	andi.w		&0x0007,%d1		# fetch An
24730 
24731 	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
24732 	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
24733 
24734 tbl_rest_inc:
24735 	short		ri_a0 - tbl_rest_inc
24736 	short		ri_a1 - tbl_rest_inc
24737 	short		ri_a2 - tbl_rest_inc
24738 	short		ri_a3 - tbl_rest_inc
24739 	short		ri_a4 - tbl_rest_inc
24740 	short		ri_a5 - tbl_rest_inc
24741 	short		ri_a6 - tbl_rest_inc
24742 	short		ri_a7 - tbl_rest_inc
24743 
24744 ri_a0:
24745 	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
24746 	rts
24747 ri_a1:
24748 	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
24749 	rts
24750 ri_a2:
24751 	sub.l		%d0,%a2			# fix a2
24752 	rts
24753 ri_a3:
24754 	sub.l		%d0,%a3			# fix a3
24755 	rts
24756 ri_a4:
24757 	sub.l		%d0,%a4			# fix a4
24758 	rts
24759 ri_a5:
24760 	sub.l		%d0,%a5			# fix a5
24761 	rts
24762 ri_a6:
24763 	sub.l		%d0,(%a6)		# fix stacked a6
24764 	rts
24765 # if it's a fmove out instruction, we don't have to fix a7
24766 # because we hadn't changed it yet. if it's an opclass two
24767 # instruction (data moved in) and the exception was in supervisor
24768 # mode, then also also wasn't updated. if it was user mode, then
24769 # restore the correct a7 which is in the USP currently.
24770 ri_a7:
24771 	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
24772 	bne.b		ri_a7_done		# out
24773 
24774 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
24775 	bne.b		ri_a7_done		# supervisor
24776 	movc		%usp,%a0		# restore USP
24777 	sub.l		%d0,%a0
24778 	movc		%a0,%usp
24779 ri_a7_done:
24780 	rts
24781 
24782 # need to invert adjustment value if the <ea> was predec
24783 rest_dec:
24784 	neg.l		%d0
24785 	bra.b		rest_inc
24786