1/*
2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include "pico/asm_helper.S"
8#if HAS_DOUBLE_COPROCESSOR
9
10pico_default_asm_setup
11
12.macro double_section name
13#if PICO_DOUBLE_IN_RAM
14.section RAM_SECTION_NAME(\name), "ax"
15#else
16.section SECTION_NAME(\name), "ax"
17#endif
18.endm
19
20.macro double_wrapper_section func
21double_section WRAPPER_FUNC_NAME(\func)
22.endm
23
24double_wrapper_section conv_tod
25
26@ convert int64 to double, rounding
27wrapper_func __aeabi_l2d
28regular_func int642double
29 movs r2,#0       @ fall through
30@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2
31regular_func fix642double
32 cmp r1,#0
33 bge 10f @ positive? can use unsigned code
34 rsbs r0,#0
35 sbc r1,r1,r1,lsl#1 @ make positive
36 cbz r1,7f @ high word is zero?
37 clz r3,r1
38 subs r3,#11
39 bmi 2f
40 rsbs r12,r3,#32
41 lsrs r12,r0,r12
42 lsls r0,r3
43 lsls r1,r3
44 orrs r1,r1,r12
45 add r2,r2,r3
46 rsbs r2,#0
47 add r2,#0x3ff+19+32
48 add r1,r1,r2,lsl#20 @ insert exponent
49 orr r1,#0x80000000
50 mov r3,0x7fe
51 cmp r2,r3
52 it lo @ over/underflow?
53 bxlo r14
54 b 3f
557:
56 mov r1,r2
57 b fix2double_neg
582:
59 add r3,#33
60 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z
61 sub r3,#1
62 lsl r12,r1,r3
63 rsb r3,#32
64 lsr r0,r3
65 lsr r1,r3
66 orr r0,r0,r12
67@ push {r14}
68@ bl dumpreg
69@ pop {r14}
70 sub r2,r3,r2
71 add r2,#0x3ff+19+32
72 beq 4f @ potential rounding tie?
73 adcs r0,r0,#0
745:
75 adc r1,r1,r2,lsl#20 @ insert exponent, add rounding
76 orr r1,#0x80000000
77 mov r3,0x7fe
78 cmp r2,r3
79 it lo
80 bxlo r14
81@ over/underflow?
823:
83 mov r1,#0
84 it ge
85 movtge r1,#0x7ff0 @ overflow
86 mov r0,#0
87 bx r14
881:
89 movs r1,#0
90 bx r14
914:
92 bcc 5b @ not a rounding tie after all
93 adcs r0,r0,#0
94 bic r0,r0,#1 @ force to even
95 b 5b
96
97@ convert uint64 to double, rounding
98wrapper_func __aeabi_ul2d
99regular_func uint642double
100 movs r2,#0       @ fall through
101@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2
102regular_func ufix642double
10310:
104 cbz r1,7f @ high word zero?
105 clz r3,r1
106 subs r3,#11
107 bmi 2f
108 rsbs r12,r3,#32
109 lsrs r12,r0,r12
110 lsls r0,r3
111 lsls r1,r3
112 orrs r1,r1,r12
113 add r2,r2,r3
114 rsbs r2,#0
115 add r2,#0x3ff+19+32
116 add r1,r1,r2,lsl#20 @ insert exponent
117 mov r3,0x7fe
118 cmp r2,r3
119 it lo @ over/underflow?
120 bxlo r14
121 b 3f
1227:
123 mov r1,r2
124 b ufix2double
1252:
126 add r3,#33
127 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z
128 sub r3,#1
129 lsl r12,r1,r3
130 rsb r3,#32
131 lsr r0,r3
132 lsr r1,r3
133 orr r0,r0,r12
134@ push {r14}
135@ bl dumpreg
136@ pop {r14}
137 sub r2,r3,r2
138 add r2,#0x3ff+19+32
139 beq 4f @ potential rounding tie?
140 adcs r0,r0,#0
1415:
142 adc r1,r1,r2,lsl#20 @ insert exponent, add rounding
143 mov r3,0x7fe
144 cmp r2,r3
145 it lo
146 bxlo r14
147@ over/underflow?
1483:
149 mov r1,#0
150 it ge
151 movtge r1,#0x7ff0 @ overflow
152 mov r0,#0
153 bx r14
1541:
155 movs r1,#0
156 bx r14
1574:
158 bcc 5b @ not a rounding tie after all
159 adcs r0,r0,#0
160 bic r0,r0,#1 @ force to even
161 b 5b
162
163regular_func fix2double
164 cmp r0,#0
165 bge ufix2double @ positive? can use unsigned code
166 rsbs r0,#0 @ make positive
167fix2double_neg:
168 clz r3,r0
169 subs r3,#11
170 bmi 2f
171 lsls r0,r3
172 add r2,r1,r3
173 rsbs r2,#0
174 add r2,#0x3ff+19
175 add r1,r0,r2,lsl#20 @ insert exponent
176 orr r1,#0x80000000
177 mov r0,#0
178 mov r3,0x7fe
179 cmp r2,r3
180 it lo @ over/underflow?
181 bxlo r14
182 b 3f
1832:
184 rsb r3,#0
185 lsrs r12,r0,r3
186 rsb r2,r3,#32
187 lsls r0,r0,r2
188@ push {r14}
189@ bl dumpreg
190@ pop {r14}
191 sub r2,r3,r1
192 add r2,#0x3ff+19
193 add r1,r12,r2,lsl#20 @ insert exponent
194 orr r1,#0x80000000
195 mov r3,0x7fe
196 cmp r2,r3
197 it lo
198 bxlo r14
199@ over/underflow?
2003:
201 mov r1,#0x80000000
202 it ge
203 movtge r1,#0xfff0 @ overflow
204 mov r0,#0
205 bx r14
2061:
207 movs r1,#0
208 bx r14
209
210regular_func ufix2double
211 cbz r0,1f @ zero? return it
212 clz r3,r0
213 subs r3,#11
214 bmi 2f
215 lsls r0,r3
216 add r2,r1,r3
217 rsbs r2,#0
218 add r2,#0x3ff+19
219 add r1,r0,r2,lsl#20 @ insert exponent
220 mov r0,#0
221 mov r3,0x7fe
222 cmp r2,r3
223 it lo @ over/underflow?
224 bxlo r14
225 b 3f
2262:
227 rsbs r3,#0
228 lsrs r12,r0,r3
229 rsb r2,r3,#32
230 lsls r0,r0,r2
231@ push {r14}
232@ bl dumpreg
233@ pop {r14}
234 sub r2,r3,r1
235 add r2,#0x3ff+19
236 add r1,r12,r2,lsl#20 @ insert exponent
237 mov r3,0x7fe
238 cmp r2,r3
239 it lo
240 bxlo r14
241@ over/underflow?
2423:
243 mov r1,#0
244 it ge
245 movtge r1,#0x7ff0 @ overflow
246 mov r0,#0
247 bx r14
2481:
249 movs r1,#0
250 bx r14
251
252double_wrapper_section conv_dtoi64
253
254@ convert double to signed int64, rounding towards 0, clamping
255wrapper_func __aeabi_d2lz
256regular_func double2int64_z
257 movs r2,#0      @ fall through
258@ convert double in r0:r1 to signed fixed point in r0:r1, clamping
259regular_func double2fix64_z
260 sub r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length
261 asrs r12,r1,#20 @ sign and exponent
262 sub r3,r12,#1
263 sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent
264 lsls r3,#21
265@ push {r14}
266@ bl dumpreg
267@ pop {r14}
268 cmp r3,#0xffc00000
269 bhs 1f @ 0, ∞/NaN?
270 adds r2,r2,r3,lsr#21 @ offset exponent by fix precision; r1 is now required left shift
271 bmi 4f @ actually a right shift?
272 cmp r2,#11 @ overflow?
273 bge 5f
274 lsls r1,r2
275 rsbs r3,r2,#32
276 lsrs r3,r0,r3
277 orrs r1,r1,r3
278 lsls r0,r2
279 cmp r12,#0
280 it ge
281 bxge r14
282 rsbs r0,#0
283 sbc r1,r1,r1,lsl#1
284 bx r14
2854:
286 adds r2,#32
287 ble 6f @ result fits in low word?
288 lsl r3,r1,r2
289 rsbs r2,#32
290 lsrs r1,r2
291 lsrs r0,r2
292 orrs r0,r0,r3
293 cmp r12,#0
294 it ge
295 bxge r14
296 rsbs r0,#0
297 sbc r1,r1,r1,lsl#1
298 bx r14
2996:
300 rsbs r2,#0
301 usat r2,#5,r2 @ underflow to 0
302 lsrs r0,r1,r2
303 movs r1,#0
304 cmp r12,#0
305 it ge
306 bxge r14
307 rsbs r0,#0
308 sbc r1,r1,r1,lsl#1
309 bx r14
3101:
311 beq 3f @ ±∞/±NaN?
3122:
313 movs r0,#0 @ ±0: return 0
314 movs r1,#0
315 bx r14
3163:
317 orrs r1,r0,r1,lsl#12 @ mantissa field
318 it ne @ NaN?
319 movne r12,#0 @ treat NaNs as +∞
320@ here original argument was ±Inf or we have under/overflow
3215:
322 mvn r1,#0x80000000
323 add r1,r1,r12,lsr#31 @ so -Inf0x80000000, +Inf0x7fffffff
324 mvn r0,r12,asr#31
325 bx r14
326
327double_wrapper_section conv_dtoui64
328
329@ convert double to unsigned int64, rounding towards -Inf, clamping
330wrapper_func __aeabi_d2ulz
331regular_func double2uint64
332regular_func double2uint64_z
333 movs r2,#0      @ fall through
334@ convert double in r0:r1 to unsigned fixed point in r0:r1, clamping
335regular_func double2ufix64
336regular_func double2ufix64_z
337 subw r2,r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length
338 asrs r3,r1,#20 @ sign and exponent
339 sub r3,#1
340 sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent and sign
341 bmi 7f @ argument negative?
342 movw r12,#0x7fe
343 cmp r3,r12
344 bhs 1f @ 0, ∞/NaN?
345 adds r2,r3 @ offset exponent by fix precision; r2 is now required left shift
346 bmi 2f @ actually a right shift?
347 cmp r2,#12 @ overflow?
348 bge 4f
349 lsls r1,r2
350 rsbs r3,r2,#32
351 lsrs r3,r0,r3
352 lsls r0,r2
353 orrs r1,r1,r3
354 bx r14
3552:
356 adds r2,#32
357 ble 5f @ result fits in low word?
358 lsl r3,r1,r2
359 rsbs r2,#32
360 lsrs r1,r2
361 lsrs r0,r2
362 orrs r0,r0,r3
363 bx r14
3645:
365 rsbs r2,#0
366 usat r2,#5,r2 @ underflow to 0
367 lsrs r0,r1,r2
368 movs r1,#0
369 bx r14
3701:
371 bhi 3f @ 0? return 0
3724:
373@ here overflow has occurred
374 mvn r0,#0
375 mvn r1,#0
376 bx r14
3777:
378 cmp r3,#0xfffffffe
379 bne 3f @ -0? return 0
380 orrs r2,r0,r1,lsl#12 @ mantissa field
381 bne 4b
3823:
383 movs r0,#0
384 movs r1,#0
385 bx r14
386
387#endif
388