1/* 2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include "pico/asm_helper.S" 8#if HAS_DOUBLE_COPROCESSOR 9 10pico_default_asm_setup 11 12.macro double_section name 13#if PICO_DOUBLE_IN_RAM 14.section RAM_SECTION_NAME(\name), "ax" 15#else 16.section SECTION_NAME(\name), "ax" 17#endif 18.endm 19 20.macro double_wrapper_section func 21double_section WRAPPER_FUNC_NAME(\func) 22.endm 23 24double_wrapper_section conv_tod 25 26@ convert int64 to double, rounding 27wrapper_func __aeabi_l2d 28regular_func int642double 29 movs r2,#0 @ fall through 30@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2 31regular_func fix642double 32 cmp r1,#0 33 bge 10f @ positive? can use unsigned code 34 rsbs r0,#0 35 sbc r1,r1,r1,lsl#1 @ make positive 36 cbz r1,7f @ high word is zero? 37 clz r3,r1 38 subs r3,#11 39 bmi 2f 40 rsbs r12,r3,#32 41 lsrs r12,r0,r12 42 lsls r0,r3 43 lsls r1,r3 44 orrs r1,r1,r12 45 add r2,r2,r3 46 rsbs r2,#0 47 add r2,#0x3ff+19+32 48 add r1,r1,r2,lsl#20 @ insert exponent 49 orr r1,#0x80000000 50 mov r3,0x7fe 51 cmp r2,r3 52 it lo @ over/underflow? 53 bxlo r14 54 b 3f 557: 56 mov r1,r2 57 b fix2double_neg 582: 59 add r3,#33 60 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z 61 sub r3,#1 62 lsl r12,r1,r3 63 rsb r3,#32 64 lsr r0,r3 65 lsr r1,r3 66 orr r0,r0,r12 67@ push {r14} 68@ bl dumpreg 69@ pop {r14} 70 sub r2,r3,r2 71 add r2,#0x3ff+19+32 72 beq 4f @ potential rounding tie? 73 adcs r0,r0,#0 745: 75 adc r1,r1,r2,lsl#20 @ insert exponent, add rounding 76 orr r1,#0x80000000 77 mov r3,0x7fe 78 cmp r2,r3 79 it lo 80 bxlo r14 81@ over/underflow? 823: 83 mov r1,#0 84 it ge 85 movtge r1,#0x7ff0 @ overflow 86 mov r0,#0 87 bx r14 881: 89 movs r1,#0 90 bx r14 914: 92 bcc 5b @ not a rounding tie after all 93 adcs r0,r0,#0 94 bic r0,r0,#1 @ force to even 95 b 5b 96 97@ convert uint64 to double, rounding 98wrapper_func __aeabi_ul2d 99regular_func uint642double 100 movs r2,#0 @ fall through 101@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2 102regular_func ufix642double 10310: 104 cbz r1,7f @ high word zero? 105 clz r3,r1 106 subs r3,#11 107 bmi 2f 108 rsbs r12,r3,#32 109 lsrs r12,r0,r12 110 lsls r0,r3 111 lsls r1,r3 112 orrs r1,r1,r12 113 add r2,r2,r3 114 rsbs r2,#0 115 add r2,#0x3ff+19+32 116 add r1,r1,r2,lsl#20 @ insert exponent 117 mov r3,0x7fe 118 cmp r2,r3 119 it lo @ over/underflow? 120 bxlo r14 121 b 3f 1227: 123 mov r1,r2 124 b ufix2double 1252: 126 add r3,#33 127 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z 128 sub r3,#1 129 lsl r12,r1,r3 130 rsb r3,#32 131 lsr r0,r3 132 lsr r1,r3 133 orr r0,r0,r12 134@ push {r14} 135@ bl dumpreg 136@ pop {r14} 137 sub r2,r3,r2 138 add r2,#0x3ff+19+32 139 beq 4f @ potential rounding tie? 140 adcs r0,r0,#0 1415: 142 adc r1,r1,r2,lsl#20 @ insert exponent, add rounding 143 mov r3,0x7fe 144 cmp r2,r3 145 it lo 146 bxlo r14 147@ over/underflow? 1483: 149 mov r1,#0 150 it ge 151 movtge r1,#0x7ff0 @ overflow 152 mov r0,#0 153 bx r14 1541: 155 movs r1,#0 156 bx r14 1574: 158 bcc 5b @ not a rounding tie after all 159 adcs r0,r0,#0 160 bic r0,r0,#1 @ force to even 161 b 5b 162 163regular_func fix2double 164 cmp r0,#0 165 bge ufix2double @ positive? can use unsigned code 166 rsbs r0,#0 @ make positive 167fix2double_neg: 168 clz r3,r0 169 subs r3,#11 170 bmi 2f 171 lsls r0,r3 172 add r2,r1,r3 173 rsbs r2,#0 174 add r2,#0x3ff+19 175 add r1,r0,r2,lsl#20 @ insert exponent 176 orr r1,#0x80000000 177 mov r0,#0 178 mov r3,0x7fe 179 cmp r2,r3 180 it lo @ over/underflow? 181 bxlo r14 182 b 3f 1832: 184 rsb r3,#0 185 lsrs r12,r0,r3 186 rsb r2,r3,#32 187 lsls r0,r0,r2 188@ push {r14} 189@ bl dumpreg 190@ pop {r14} 191 sub r2,r3,r1 192 add r2,#0x3ff+19 193 add r1,r12,r2,lsl#20 @ insert exponent 194 orr r1,#0x80000000 195 mov r3,0x7fe 196 cmp r2,r3 197 it lo 198 bxlo r14 199@ over/underflow? 2003: 201 mov r1,#0x80000000 202 it ge 203 movtge r1,#0xfff0 @ overflow 204 mov r0,#0 205 bx r14 2061: 207 movs r1,#0 208 bx r14 209 210regular_func ufix2double 211 cbz r0,1f @ zero? return it 212 clz r3,r0 213 subs r3,#11 214 bmi 2f 215 lsls r0,r3 216 add r2,r1,r3 217 rsbs r2,#0 218 add r2,#0x3ff+19 219 add r1,r0,r2,lsl#20 @ insert exponent 220 mov r0,#0 221 mov r3,0x7fe 222 cmp r2,r3 223 it lo @ over/underflow? 224 bxlo r14 225 b 3f 2262: 227 rsbs r3,#0 228 lsrs r12,r0,r3 229 rsb r2,r3,#32 230 lsls r0,r0,r2 231@ push {r14} 232@ bl dumpreg 233@ pop {r14} 234 sub r2,r3,r1 235 add r2,#0x3ff+19 236 add r1,r12,r2,lsl#20 @ insert exponent 237 mov r3,0x7fe 238 cmp r2,r3 239 it lo 240 bxlo r14 241@ over/underflow? 2423: 243 mov r1,#0 244 it ge 245 movtge r1,#0x7ff0 @ overflow 246 mov r0,#0 247 bx r14 2481: 249 movs r1,#0 250 bx r14 251 252double_wrapper_section conv_dtoi64 253 254@ convert double to signed int64, rounding towards 0, clamping 255wrapper_func __aeabi_d2lz 256regular_func double2int64_z 257 movs r2,#0 @ fall through 258@ convert double in r0:r1 to signed fixed point in r0:r1, clamping 259regular_func double2fix64_z 260 sub r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length 261 asrs r12,r1,#20 @ sign and exponent 262 sub r3,r12,#1 263 sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent 264 lsls r3,#21 265@ push {r14} 266@ bl dumpreg 267@ pop {r14} 268 cmp r3,#0xffc00000 269 bhs 1f @ 0, ∞/NaN? 270 adds r2,r2,r3,lsr#21 @ offset exponent by fix precision; r1 is now required left shift 271 bmi 4f @ actually a right shift? 272 cmp r2,#11 @ overflow? 273 bge 5f 274 lsls r1,r2 275 rsbs r3,r2,#32 276 lsrs r3,r0,r3 277 orrs r1,r1,r3 278 lsls r0,r2 279 cmp r12,#0 280 it ge 281 bxge r14 282 rsbs r0,#0 283 sbc r1,r1,r1,lsl#1 284 bx r14 2854: 286 adds r2,#32 287 ble 6f @ result fits in low word? 288 lsl r3,r1,r2 289 rsbs r2,#32 290 lsrs r1,r2 291 lsrs r0,r2 292 orrs r0,r0,r3 293 cmp r12,#0 294 it ge 295 bxge r14 296 rsbs r0,#0 297 sbc r1,r1,r1,lsl#1 298 bx r14 2996: 300 rsbs r2,#0 301 usat r2,#5,r2 @ underflow to 0 302 lsrs r0,r1,r2 303 movs r1,#0 304 cmp r12,#0 305 it ge 306 bxge r14 307 rsbs r0,#0 308 sbc r1,r1,r1,lsl#1 309 bx r14 3101: 311 beq 3f @ ±∞/±NaN? 3122: 313 movs r0,#0 @ ±0: return 0 314 movs r1,#0 315 bx r14 3163: 317 orrs r1,r0,r1,lsl#12 @ mantissa field 318 it ne @ NaN? 319 movne r12,#0 @ treat NaNs as +∞ 320@ here original argument was ±Inf or we have under/overflow 3215: 322 mvn r1,#0x80000000 323 add r1,r1,r12,lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff 324 mvn r0,r12,asr#31 325 bx r14 326 327double_wrapper_section conv_dtoui64 328 329@ convert double to unsigned int64, rounding towards -Inf, clamping 330wrapper_func __aeabi_d2ulz 331regular_func double2uint64 332regular_func double2uint64_z 333 movs r2,#0 @ fall through 334@ convert double in r0:r1 to unsigned fixed point in r0:r1, clamping 335regular_func double2ufix64 336regular_func double2ufix64_z 337 subw r2,r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length 338 asrs r3,r1,#20 @ sign and exponent 339 sub r3,#1 340 sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent and sign 341 bmi 7f @ argument negative? 342 movw r12,#0x7fe 343 cmp r3,r12 344 bhs 1f @ 0, ∞/NaN? 345 adds r2,r3 @ offset exponent by fix precision; r2 is now required left shift 346 bmi 2f @ actually a right shift? 347 cmp r2,#12 @ overflow? 348 bge 4f 349 lsls r1,r2 350 rsbs r3,r2,#32 351 lsrs r3,r0,r3 352 lsls r0,r2 353 orrs r1,r1,r3 354 bx r14 3552: 356 adds r2,#32 357 ble 5f @ result fits in low word? 358 lsl r3,r1,r2 359 rsbs r2,#32 360 lsrs r1,r2 361 lsrs r0,r2 362 orrs r0,r0,r3 363 bx r14 3645: 365 rsbs r2,#0 366 usat r2,#5,r2 @ underflow to 0 367 lsrs r0,r1,r2 368 movs r1,#0 369 bx r14 3701: 371 bhi 3f @ 0? return 0 3724: 373@ here overflow has occurred 374 mvn r0,#0 375 mvn r1,#0 376 bx r14 3777: 378 cmp r3,#0xfffffffe 379 bne 3f @ -0? return 0 380 orrs r2,r0,r1,lsl#12 @ mantissa field 381 bne 4b 3823: 383 movs r0,#0 384 movs r1,#0 385 bx r14 386 387#endif 388