1/* 2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#if !PICO_RP2040 8#include "pico/asm_helper.S" 9 10pico_default_asm_setup 11 12.macro float_section name 13#if PICO_FLOAT_IN_RAM 14.section RAM_SECTION_NAME(\name), "ax" 15#else 16.section SECTION_NAME(\name), "ax" 17#endif 18.endm 19 20.macro float_wrapper_section func 21float_section WRAPPER_FUNC_NAME(\func) 22.endm 23 24float_wrapper_section conv_tof 25 26@ convert int64 to float, rounding 27wrapper_func __aeabi_l2f 28regular_func int642float 29 movs r2,#0 @ fall through 30@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 31regular_func fix642float 32 cmp r1,#0 33 bge 10f @ positive? use unsigned code 34 rsbs r0,#0 35 sbc r1,r1,r1,lsl#1 @ make positive 36 cbz r1,7f @ high word is zero? 37 clz r3,r1 38 subs r3,#8 39 bmi 2f 40 lsls r1,r3 41 lsls r12,r0,r3 @ bits that will be lost 42 rsb r3,#32 43 lsr r0,r3 44 orr r0,r0,r1 45 sub r2,r2,r3 46 rsb r2,#149 47 adds r12,r12,r12 @ rounding bit into carry 48 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding 49 orr r0,r0,#0x80000000 50 beq 4f @ potential rounding tie? 51 cmp r2,#0xfe 52 bhs 3f @ over/underflow? 53 bx r14 542: 55 add r3,#33 56 lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12 57 orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C 58 rsb r3,#33 59 lsr r0,r1,r3 60@ push {r14} 61@ bl dumpreg 62@ pop {r14} 63 sub r2,r3,r2 64 add r2,#22+127+32 65 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding 66 orr r0,r0,#0x80000000 67 beq 4f @ potential rounding tie? 68 cmp r2,#0xfe 69 it lo 70 bxlo r14 71@ over/underflow? 723: 73 mov r0,#0x80000000 @ underflow 74 it ge 75 movtge r0,#0xff80 @ overflow 761: 77 bx r14 787: 79 mov r1,r2 80 b fix2float_neg 814: 82 it cs @ rounding tie? 83 biccs r0,r0,#1 @ force to even if we rounded up 84 cmp r2,#0xfe 85 it lo 86 bxlo r14 87 b 3b 88 89@ convert signed 32-bit fix to float, rounding; number of r0 bits after point in r1 90.thumb_func 91regular_func fix2float 92 cmp r0,#0 93 bge ufix2float @ positive? can use unsigned code 94 rsbs r0,#0 @ make positive 95fix2float_neg: 96 clz r3,r0 97 subs r3,#8 98 bmi 2f 99 lsls r0,r3 100 add r2,r1,r3 101 rsb r2,#149 102 add r0,r0,r2,lsl#23 @ insert exponent 103 orr r0,#0x80000000 104 cmp r2,#0xfe 105 it lo @ over/underflow? 106 bxlo r14 107 b 3f 1082: 109 add r3,#33 110 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12 111 rsb r3,#33 112 lsr r0,r3 113@ push {r14} 114@ bl dumpreg 115@ pop {r14} 116 sub r2,r3,r1 117 add r2,#22+127 118 adc r0,r0,r2,lsl#23 @ insert exponent 119 orr r0,#0x80000000 120 beq 4f @ potential rounding tie? 121 cmp r2,#0xfe 122 it lo 123 bxlo r14 124@ over/underflow? 1253: 126 mov r0,#0x80000000 @ underflow 127 it ge 128 orrge r0,#0x7f800000 @ overflow 1291: 130 bx r14 1314: 132 it cs @ rounding tie? 133 biccs r0,r0,#1 @ force to even if we rounded up 134 cmp r2,#0xfe 135 it lo 136 bxlo r14 137 b 3b 138 139@ convert unsigned 32-bit fix to float, rounding; number of r0 bits after point in r1 140regular_func ufix2float 141 cbz r0,1f @ zero? return it 142 clz r3,r0 143 subs r3,#8 144 bmi 2f 145 lsls r0,r3 146 add r2,r1,r3 147 rsb r2,#149 148 add r0,r0,r2,lsl#23 @ insert exponent 149@ push {r14} 150@ bl dumpreg 151@ pop {r14} 152 cmp r2,#0xfe 153 it lo @ over/underflow? 154 bxlo r14 155 b 3f 1562: 157 add r3,#33 158 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12 159 rsb r3,#33 160 lsr r0,r3 161@ push {r14} 162@ bl dumpreg 163@ pop {r14} 164 sub r2,r3,r1 165 add r2,#22+127 166 adc r0,r0,r2,lsl#23 @ insert exponent 167 beq 4f @ potential rounding tie? 168 cmp r2,#0xfe 169 it lo 170 bxlo r14 171@ over/underflow? 1723: 173 ite ge 174 movge r0,#0x7f800000 @ overflow 175 movlt r0,#0x00000000 @ underflow 1761: 177 bx r14 1784: 179 it cs @ rounding tie? 180 biccs r0,r0,#1 @ force to even if we rounded up 181 cmp r2,#0xfe 182 it lo 183 bxlo r14 184 b 3b 185 186@ convert uint64 to float, rounding 187wrapper_func __aeabi_ul2f 188regular_func uint642float 189 movs r2,#0 @ fall through 190@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 191regular_func ufix642float 19210: 193 cbz r1,7f @ high word is zero? 194 clz r3,r1 195 subs r3,#8 196 bmi 2f 197 lsls r1,r3 198 lsls r12,r0,r3 @ bits that will be lost 199 rsb r3,#32 200 lsr r0,r3 201 orr r0,r0,r1 202 sub r2,r2,r3 203 rsb r2,#149 204 adds r12,r12,r12 @ rounding bit into carry 205 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding 206 beq 4f @ potential rounding tie? 207 cmp r2,#0xfe 208 bhs 3f @ over/underflow? 209 bx r14 2102: 211 add r3,#33 212 lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12 213 orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C 214 rsb r3,#33 215 lsr r0,r1,r3 216@ push {r14} 217@ bl dumpreg 218@ pop {r14} 219 sub r2,r3,r2 220 add r2,#22+127+32 221 adc r0,r0,r2,lsl#23 @ insert exponent, add rounding 222 beq 4f @ potential rounding tie? 223 cmp r2,#0xfe 224 it lo 225 bxlo r14 226@ over/underflow? 2273: 228 ite ge 229 movge r0,#0x7f800000 @ overflow 230 movlt r0,#0x00000000 @ underflow 2311: 232 bx r14 2337: 234 mov r1,r2 235 b ufix2float 2364: 237 it cs @ rounding tie? 238 biccs r0,r0,#1 @ force to even if we rounded up 239 cmp r2,#0xfe 240 it lo 241 bxlo r14 242 b 3b 243 244float_wrapper_section conv_ftoi64 245 246@ convert float to signed int64, rounding towards 0, clamping 247wrapper_func __aeabi_f2lz 248regular_func float2int64_z 249 movs r1,#0 @ fall through 250@ convert float in r0 to signed fixed point in r0:r1, clamping 251regular_func float2fix64_z 252 subs r1,#0x95 @ remove exponent bias, compensate for mantissa length 253 asrs r2,r0,#23 @ sign and exponent 254 sub r3,r2,#1 255 sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent 256 uxtb r3,r3 257 cmp r3,#0xfe 258 bhs 1f @ 0 or Inf/NaN? 259 adds r1,r3 @ offset exponent by fix precision; r1 is now required left shift 260 bmi 4f @ actually a right shift? 261 subs r3,r1,#32 @ result fits in high 32 bits only? 262 bge 8f 263 subs r3,r1,#8 @ result fits in low 32 bits only? 264 ble 7f 265 lsls r0,#8 266 rsbs r1,r3,#32 267 lsrs r1,r0,r1 268 lsls r0,r3 269 cmp r2,#0 270 it ge 271 bxge r14 272 rsbs r0,#0 @ negate if necessary 273 sbcs r1,r1,r1,lsl#1 274 bx r14 2757: 276 lsls r0,r0,r1 277 movs r1,r2,asr#31 @ sign extend 278 eors r0,r0,r1 @ negate if necessary 279 subs r0,r0,r1 280 bx r14 2818: 282 cmp r3,#8 @ overflow? 283 bge 5f 284 lsls r0,r0,r3 285 eor r1,r0,r2,asr#31 @ negate if necessary 286 add r1,r1,r2,lsr#31 287 movs r0,#0 288 bx r14 2891: 290 bhi 3f @ 0? 291 lsls r1,r0,#9 @ mantissa field 292 it ne @ NaN? 293 movne r2,#0 @ treat NaNs as +∞ 2945: 295 mvn r1,#0x80000000 @ = 0x7fffffff 296 add r1,r1,r2,lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff 297 mvn r0,r2,asr#31 298 bx r14 2993: 300 movs r0,#0 301 movs r1,#0 302 bx r14 3034: 304 rsbs r1,#0 305 usat r1,#5,r1 306 lsrs r0,r0,r1 307 eors r0,r0,r2,asr#31 @ negate if necessary 308 adds r0,r0,r2,lsr#31 309 movs r1,r0,asr#31 @ sign extend 310 bx r14 311 312float_wrapper_section conv_ftoui64 313 314@ convert float to unsigned int64, rounding towards -Inf, clamping 315wrapper_func __aeabi_f2ulz 316regular_func float2uint64 317regular_func float2uint64_z 318 movs r1,#0 @ fall through 319@ convert float in r0 to unsigned fixed point in r0:r1, clamping 320regular_func float2ufix64 321//regular_func float2ufix64_z 322 subs r1,#0x96 @ remove exponent bias, compensate for mantissa length 323 asrs r2,r0,#23 @ sign and exponent 324 sub r3,r2,#1 325 cmp r3,#0xfe 326 bhs 1f @ -ve, 0 or Inf/NaN? 327 sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent 328 adds r1,r2 @ offset exponent by fix precision; r1 is now required left shift 329 bmi 4f @ actually a right shift? 330 subs r2,r1,#7 331 ble 7f @ result (easily) fits in lo 32 bits? 332 subs r3,r1,#32 333 bge 8f @ results might fit in hi 32 bits? 334 lsls r0,r0,#7 335 rsbs r3,r2,#32 336 lsrs r1,r0,r3 337 lsls r0,r0,r2 338 bx r14 3397: 340 lsls r0,r1 341 movs r1,#0 342 bx r14 3438: 344 cmp r1,#32+9 @ overflow? 345 bge 5f 346 lsls r1,r0,r3 347 movs r0,#0 348 bx r14 3495: 350 mvn r0,#0 @ = 0xffffffff 351 mvn r1,#0 @ = 0xffffffff 352 bx r14 3534: 354 rsbs r1,#0 355 usat r1,#5,r1 @ if shift is long return 0 356 lsrs r0,r0,r1 357 movs r1,#0 358 bx r14 3591: 360 cmp r0,#0xff800000 361 bhi 5b @ -NaN, return 0xffffffff 362 cmp r0,#0x00800000 363 bgt 5b @ +Inf or +NaN, return 0xfffffff 3642: 365 movs r0,#0 @ return 0 366 movs r1,#0 367 bx r14 368 369#endif 370