1/*
2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#if !PICO_RP2040
8#include "pico/asm_helper.S"
9
10pico_default_asm_setup
11
12.macro float_section name
13#if PICO_FLOAT_IN_RAM
14.section RAM_SECTION_NAME(\name), "ax"
15#else
16.section SECTION_NAME(\name), "ax"
17#endif
18.endm
19
20.macro float_wrapper_section func
21float_section WRAPPER_FUNC_NAME(\func)
22.endm
23
24float_wrapper_section conv_tof
25
26@ convert int64 to float, rounding
27wrapper_func __aeabi_l2f
28regular_func int642float
29 movs r2,#0       @ fall through
30@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
31regular_func fix642float
32 cmp r1,#0
33 bge 10f @ positive? use unsigned code
34 rsbs r0,#0
35 sbc r1,r1,r1,lsl#1 @ make positive
36 cbz r1,7f @ high word is zero?
37 clz r3,r1
38 subs r3,#8
39 bmi 2f
40 lsls r1,r3
41 lsls r12,r0,r3 @ bits that will be lost
42 rsb r3,#32
43 lsr r0,r3
44 orr r0,r0,r1
45 sub r2,r2,r3
46 rsb r2,#149
47 adds r12,r12,r12 @ rounding bit into carry
48 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
49 orr r0,r0,#0x80000000
50 beq 4f @ potential rounding tie?
51 cmp r2,#0xfe
52 bhs 3f @ over/underflow?
53 bx r14
542:
55 add r3,#33
56 lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12
57 orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C
58 rsb r3,#33
59 lsr r0,r1,r3
60@ push {r14}
61@ bl dumpreg
62@ pop {r14}
63 sub r2,r3,r2
64 add r2,#22+127+32
65 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
66 orr r0,r0,#0x80000000
67 beq 4f @ potential rounding tie?
68 cmp r2,#0xfe
69 it lo
70 bxlo r14
71@ over/underflow?
723:
73 mov r0,#0x80000000 @ underflow
74 it ge
75 movtge r0,#0xff80 @ overflow
761:
77 bx r14
787:
79 mov r1,r2
80 b fix2float_neg
814:
82 it cs @ rounding tie?
83 biccs r0,r0,#1 @ force to even if we rounded up
84 cmp r2,#0xfe
85 it lo
86 bxlo r14
87 b 3b
88
89@ convert signed 32-bit fix to float, rounding; number of r0 bits after point in r1
90.thumb_func
91regular_func fix2float
92 cmp r0,#0
93 bge ufix2float @ positive? can use unsigned code
94 rsbs r0,#0 @ make positive
95fix2float_neg:
96 clz r3,r0
97 subs r3,#8
98 bmi 2f
99 lsls r0,r3
100 add r2,r1,r3
101 rsb r2,#149
102 add r0,r0,r2,lsl#23 @ insert exponent
103 orr r0,#0x80000000
104 cmp r2,#0xfe
105 it lo @ over/underflow?
106 bxlo r14
107 b 3f
1082:
109 add r3,#33
110 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12
111 rsb r3,#33
112 lsr r0,r3
113@ push {r14}
114@ bl dumpreg
115@ pop {r14}
116 sub r2,r3,r1
117 add r2,#22+127
118 adc r0,r0,r2,lsl#23 @ insert exponent
119 orr r0,#0x80000000
120 beq 4f @ potential rounding tie?
121 cmp r2,#0xfe
122 it lo
123 bxlo r14
124@ over/underflow?
1253:
126 mov r0,#0x80000000 @ underflow
127 it ge
128 orrge r0,#0x7f800000 @ overflow
1291:
130 bx r14
1314:
132 it cs @ rounding tie?
133 biccs r0,r0,#1 @ force to even if we rounded up
134 cmp r2,#0xfe
135 it lo
136 bxlo r14
137 b 3b
138
139@ convert unsigned 32-bit fix to float, rounding; number of r0 bits after point in r1
140regular_func ufix2float
141 cbz r0,1f @ zero? return it
142 clz r3,r0
143 subs r3,#8
144 bmi 2f
145 lsls r0,r3
146 add r2,r1,r3
147 rsb r2,#149
148 add r0,r0,r2,lsl#23 @ insert exponent
149@ push {r14}
150@ bl dumpreg
151@ pop {r14}
152 cmp r2,#0xfe
153 it lo @ over/underflow?
154 bxlo r14
155 b 3f
1562:
157 add r3,#33
158 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12
159 rsb r3,#33
160 lsr r0,r3
161@ push {r14}
162@ bl dumpreg
163@ pop {r14}
164 sub r2,r3,r1
165 add r2,#22+127
166 adc r0,r0,r2,lsl#23 @ insert exponent
167 beq 4f @ potential rounding tie?
168 cmp r2,#0xfe
169 it lo
170 bxlo r14
171@ over/underflow?
1723:
173 ite ge
174 movge r0,#0x7f800000 @ overflow
175 movlt r0,#0x00000000 @ underflow
1761:
177 bx r14
1784:
179 it cs @ rounding tie?
180 biccs r0,r0,#1 @ force to even if we rounded up
181 cmp r2,#0xfe
182 it lo
183 bxlo r14
184 b 3b
185
186@ convert uint64 to float, rounding
187wrapper_func __aeabi_ul2f
188regular_func uint642float
189 movs r2,#0       @ fall through
190@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
191regular_func ufix642float
19210:
193 cbz r1,7f @ high word is zero?
194 clz r3,r1
195 subs r3,#8
196 bmi 2f
197 lsls r1,r3
198 lsls r12,r0,r3 @ bits that will be lost
199 rsb r3,#32
200 lsr r0,r3
201 orr r0,r0,r1
202 sub r2,r2,r3
203 rsb r2,#149
204 adds r12,r12,r12 @ rounding bit into carry
205 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
206 beq 4f @ potential rounding tie?
207 cmp r2,#0xfe
208 bhs 3f @ over/underflow?
209 bx r14
2102:
211 add r3,#33
212 lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12
213 orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C
214 rsb r3,#33
215 lsr r0,r1,r3
216@ push {r14}
217@ bl dumpreg
218@ pop {r14}
219 sub r2,r3,r2
220 add r2,#22+127+32
221 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
222 beq 4f @ potential rounding tie?
223 cmp r2,#0xfe
224 it lo
225 bxlo r14
226@ over/underflow?
2273:
228 ite ge
229 movge r0,#0x7f800000 @ overflow
230 movlt r0,#0x00000000 @ underflow
2311:
232 bx r14
2337:
234 mov r1,r2
235 b ufix2float
2364:
237 it cs @ rounding tie?
238 biccs r0,r0,#1 @ force to even if we rounded up
239 cmp r2,#0xfe
240 it lo
241 bxlo r14
242 b 3b
243
244float_wrapper_section conv_ftoi64
245
246@ convert float to signed int64, rounding towards 0, clamping
247wrapper_func __aeabi_f2lz
248regular_func float2int64_z
249 movs r1,#0      @ fall through
250@ convert float in r0 to signed fixed point in r0:r1, clamping
251regular_func float2fix64_z
252 subs r1,#0x95 @ remove exponent bias, compensate for mantissa length
253 asrs r2,r0,#23 @ sign and exponent
254 sub r3,r2,#1
255 sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent
256 uxtb r3,r3
257 cmp r3,#0xfe
258 bhs 1f @ 0 or Inf/NaN?
259 adds r1,r3 @ offset exponent by fix precision; r1 is now required left shift
260 bmi 4f @ actually a right shift?
261 subs r3,r1,#32 @ result fits in high 32 bits only?
262 bge 8f
263 subs r3,r1,#8 @ result fits in low 32 bits only?
264 ble 7f
265 lsls r0,#8
266 rsbs r1,r3,#32
267 lsrs r1,r0,r1
268 lsls r0,r3
269 cmp r2,#0
270 it ge
271 bxge r14
272 rsbs r0,#0 @ negate if necessary
273 sbcs r1,r1,r1,lsl#1
274 bx r14
2757:
276 lsls r0,r0,r1
277 movs r1,r2,asr#31 @ sign extend
278 eors r0,r0,r1 @ negate if necessary
279 subs r0,r0,r1
280 bx r14
2818:
282 cmp r3,#8 @ overflow?
283 bge 5f
284 lsls r0,r0,r3
285 eor r1,r0,r2,asr#31 @ negate if necessary
286 add r1,r1,r2,lsr#31
287 movs r0,#0
288 bx r14
2891:
290 bhi 3f @ 0?
291 lsls r1,r0,#9 @ mantissa field
292 it ne @ NaN?
293 movne r2,#0 @ treat NaNs as +∞
2945:
295 mvn r1,#0x80000000 @ = 0x7fffffff
296 add r1,r1,r2,lsr#31 @ so -Inf0x80000000, +Inf0x7fffffff
297 mvn r0,r2,asr#31
298 bx r14
2993:
300 movs r0,#0
301 movs r1,#0
302 bx r14
3034:
304 rsbs r1,#0
305 usat r1,#5,r1
306 lsrs r0,r0,r1
307 eors r0,r0,r2,asr#31 @ negate if necessary
308 adds r0,r0,r2,lsr#31
309 movs r1,r0,asr#31 @ sign extend
310 bx r14
311
312float_wrapper_section conv_ftoui64
313
314@ convert float to unsigned int64, rounding towards -Inf, clamping
315wrapper_func __aeabi_f2ulz
316regular_func float2uint64
317regular_func float2uint64_z
318 movs r1,#0      @ fall through
319@ convert float in r0 to unsigned fixed point in r0:r1, clamping
320regular_func float2ufix64
321//regular_func float2ufix64_z
322 subs r1,#0x96 @ remove exponent bias, compensate for mantissa length
323 asrs r2,r0,#23 @ sign and exponent
324 sub r3,r2,#1
325 cmp r3,#0xfe
326 bhs 1f @ -ve, 0 or Inf/NaN?
327 sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent
328 adds r1,r2 @ offset exponent by fix precision; r1 is now required left shift
329 bmi 4f @ actually a right shift?
330 subs r2,r1,#7
331 ble 7f @ result (easily) fits in lo 32 bits?
332 subs r3,r1,#32
333 bge 8f @ results might fit in hi 32 bits?
334 lsls r0,r0,#7
335 rsbs r3,r2,#32
336 lsrs r1,r0,r3
337 lsls r0,r0,r2
338 bx r14
3397:
340 lsls r0,r1
341 movs r1,#0
342 bx r14
3438:
344 cmp r1,#32+9 @ overflow?
345 bge 5f
346 lsls r1,r0,r3
347 movs r0,#0
348 bx r14
3495:
350 mvn r0,#0 @ = 0xffffffff
351 mvn r1,#0 @ = 0xffffffff
352 bx r14
3534:
354 rsbs r1,#0
355 usat r1,#5,r1 @ if shift is long return 0
356 lsrs r0,r0,r1
357 movs r1,#0
358 bx r14
3591:
360 cmp r0,#0xff800000
361 bhi 5b @ -NaN, return 0xffffffff
362 cmp r0,#0x00800000
363 bgt 5b @ +Inf or +NaN, return 0xfffffff
3642:
365 movs r0,#0 @ return 0
366 movs r1,#0
367 bx r14
368
369#endif
370