1/* 2 * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include "pico/asm_helper.S" 8 9#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED 10 11#ifndef PICO_FLOAT_IN_RAM 12#define PICO_FLOAT_IN_RAM 0 13#endif 14 15pico_default_asm_setup 16 17.macro float_section name 18// todo separate flag for shims? 19#if PICO_FLOAT_IN_RAM 20.section RAM_SECTION_NAME(\name), "ax" 21#else 22.section SECTION_NAME(\name), "ax" 23#endif 24.endm 25 26float_section float_table_shim_on_use_helper 27regular_func float_table_shim_on_use_helper 28 push {r0-r2, lr} 29 mov r0, ip 30#ifndef NDEBUG 31 // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro 32 cmp r0, #0 33 bne 1f 34 bkpt #0 35#endif 361: 37 ldrh r1, [r0] 38 lsrs r2, r1, #8 39 adds r0, #2 40 cmp r2, #0xdf 41 bne 1b 42 uxtb r1, r1 // r1 holds table offset 43 lsrs r2, r0, #2 44 bcc 1f 45 // unaligned 46 ldrh r2, [r0, #0] 47 ldrh r0, [r0, #2] 48 lsls r0, #16 49 orrs r0, r2 50 b 2f 511: 52 ldr r0, [r0] 532: 54 ldr r2, =sf_table 55 str r0, [r2, r1] 56 str r0, [sp, #12] 57 pop {r0-r2, pc} 58 59float_section 642float_shims 60 61@ convert uint64 to float, rounding 62regular_func uint642float_shim 63 movs r2,#0 @ fall through 64 65@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 66regular_func ufix642float_shim 67 push {r4,r5,r14} 68 cmp r1,#0 69 bpl 3f @ positive? we can use signed code 70 lsls r5,r1,#31 @ contribution to sticky bits 71 orrs r5,r0 72 lsrs r0,r1,#1 73 subs r2,#1 74 b 4f 75 76@ convert int64 to float, rounding 77regular_func int642float_shim 78 movs r2,#0 @ fall through 79 80@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 81regular_func fix642float_shim 82 push {r4,r5,r14} 833: 84 movs r5,r0 85 orrs r5,r1 86 beq ret_pop45 @ zero? return +0 87 asrs r5,r1,#31 @ sign bits 882: 89 asrs r4,r1,#24 @ try shifting 7 bits at a time 90 cmp r4,r5 91 bne 1f @ next shift will overflow? 92 lsls r1,#7 93 lsrs r4,r0,#25 94 orrs r1,r4 95 lsls r0,#7 96 adds r2,#7 97 b 2b 981: 99 movs r5,r0 100 movs r0,r1 1014: 102 negs r2,r2 103 adds r2,#32+29 104 105 // bl packx 106 ldr r1, =0x29ef // packx 107 blx r1 108ret_pop45: 109 pop {r4,r5,r15} 110 111float_section fatan2_shim 112regular_func fatan2_shim 113 push {r4,r5,r14} 114 115 ldr r4, =0x29c1 // unpackx 116 mov ip, r4 117@ unpack arguments and shift one down to have common exponent 118 blx ip 119 mov r4,r0 120 mov r0,r1 121 mov r1,r4 122 mov r4,r2 123 mov r2,r3 124 mov r3,r4 125 blx ip 126 lsls r0,r0,#5 @ Q28 127 lsls r1,r1,#5 @ Q28 128 adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise 129 asrs r4,#9 130 adds r4,#1 131 bmi 2f @ force y to 0 proper, so result will be zero 132 subs r4,r2,r3 @ calculate shift 133 bge 1f @ ex>=ey? 134 negs r4,r4 @ make shift positive 135 asrs r0,r4 136 cmp r4,#28 137 blo 3f 138 asrs r0,#31 139 b 3f 1401: 141 asrs r1,r4 142 cmp r4,#28 143 blo 3f 1442: 145@ here |x|>>|y| or both x and y are ±0 146 cmp r0,#0 147 bge 4f @ x positive, return signed 0 148 ldr r3, =0x2cfc @ &pi_q29, circular coefficients 149 ldr r0,[r3] @ x negative, return +/- pi 150 asrs r1,#31 151 eors r0,r1 152 b 7f 1534: 154 asrs r0,r1,#31 155 b 7f 1563: 157 movs r2,#0 @ initial angle 158 ldr r3, =0x2cfc @ &pi_q29, circular coefficients 159 cmp r0,#0 @ x negative 160 bge 5f 161 negs r0,r0 @ rotate to 1st/4th quadrants 162 negs r1,r1 163 ldr r2,[r3] @ pi Q29 1645: 165 movs r4,#1 @ m=1 166 ldr r5, =0x2b97 @ cordic_vec 167 blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded 168 mov r0,r2 @ result here is -pi/2..3pi/2 Q29 169@ asrs r2,#29 170@ subs r0,r2 171 ldr r3, =0x2cfc @ &pi_q29, circular coefficients 172 ldr r2,[r3] @ pi Q29 173 adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case 174 bcs 6f @ -pi/2..0? leave result as is 175 subs r4,r0,r2 @ <pi? leave as is 176 bmi 6f 177 subs r0,r4,r2 @ >pi: take off 2pi 1786: 179 subs r0,#1 @ fiddle factor so atan2(0,1)==0 1807: 181 movs r2,#0 @ exponent for pack 182 ldr r3, =0x2b19 183 bx r3 184 185float_section float232_shims 186 187regular_func float2int_shim 188 movs r1,#0 @ fall through 189regular_func float2fix_shim 190 // check for -0 or -denormal upfront 191 asrs r2, r0, #23 192 adds r2, #128 193 adds r2, #128 194 beq 1f 195 // call original 196 ldr r2, =0x2acd 197 bx r2 198 1: 199 movs r0, #0 200 bx lr 201 202float_section float264_shims 203 204regular_func float2int64_shim 205 movs r1,#0 @ and fall through 206regular_func float2fix64_shim 207 push {r14} 208 bl f2fix 209 b d2f64_a 210 211regular_func float2uint64_shim 212 movs r1,#0 @ and fall through 213regular_func float2ufix64_shim 214 asrs r3,r0,#23 @ negative? return 0 215 bmi ret_dzero 216@ and fall through 217 218@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf 219@ result clamped so that r3 can only be 0 or -1 220@ trashes r12 221.thumb_func 222f2fix: 223 push {r4,r14} 224 mov r12,r1 225 asrs r3,r0,#31 226 lsls r0,#1 227 lsrs r2,r0,#24 228 beq 1f @ zero? 229 cmp r2,#0xff @ Inf? 230 beq 2f 231 subs r1,r2,#1 232 subs r2,#0x7f @ remove exponent bias 233 lsls r1,#24 234 subs r0,r1 @ insert implied 1 235 eors r0,r3 236 subs r0,r3 @ top two's complement 237 asrs r1,r0,#4 @ convert to double format 238 lsls r0,#28 239 ldr r4, =d2fix_a 240 bx r4 2411: 242 movs r0,#0 243 movs r1,r0 244 movs r3,r0 245 pop {r4,r15} 2462: 247 mvns r0,r3 @ return max/min value 248 mvns r1,r3 249 pop {r4,r15} 250 251ret_dzero: 252 movs r0,#0 253 movs r1,#0 254 bx r14 255 256float_section d2fix_a_float 257 258.weak d2fix_a // weak because it exists in float shims too 259.thumb_func 260d2fix_a: 261@ here 262@ r0:r1 two's complement mantissa 263@ r2 unbaised exponent 264@ r3 mantissa sign extension bits 265 add r2,r12 @ exponent plus offset for required binary point position 266 subs r2,#52 @ required shift 267 bmi 1f @ shift down? 268@ here a shift up by r2 places 269 cmp r2,#12 @ will clamp? 270 bge 2f 271 movs r4,r0 272 lsls r1,r2 273 lsls r0,r2 274 negs r2,r2 275 adds r2,#32 @ complementary shift 276 lsrs r4,r2 277 orrs r1,r4 278 pop {r4,r15} 2792: 280 mvns r0,r3 281 mvns r1,r3 @ overflow: clamp to extreme fixed-point values 282 pop {r4,r15} 2831: 284@ here a shift down by -r2 places 285 adds r2,#32 286 bmi 1f @ long shift? 287 mov r4,r1 288 lsls r4,r2 289 negs r2,r2 290 adds r2,#32 @ complementary shift 291 asrs r1,r2 292 lsrs r0,r2 293 orrs r0,r4 294 pop {r4,r15} 2951: 296@ here a long shift down 297 movs r0,r1 298 asrs r1,#31 @ shift down 32 places 299 adds r2,#32 300 bmi 1f @ very long shift? 301 negs r2,r2 302 adds r2,#32 303 asrs r0,r2 304 pop {r4,r15} 3051: 306 movs r0,r3 @ result very near zero: use sign extension bits 307 movs r1,r3 308 pop {r4,r15} 309d2f64_a: 310 asrs r2,r1,#31 311 cmp r2,r3 312 bne 1f @ sign extension bits fail to match sign of result? 313 pop {r15} 3141: 315 mvns r0,r3 316 movs r1,#1 317 lsls r1,#31 318 eors r1,r1,r0 @ generate extreme fixed-point values 319 pop {r15} 320 321float_section float2double_shim 322regular_func float2double_shim 323 lsrs r3,r0,#31 @ sign bit 324 lsls r3,#31 325 lsls r1,r0,#1 326 lsrs r2,r1,#24 @ exponent 327 beq 1f @ zero? 328 cmp r2,#0xff @ Inf? 329 beq 2f 330 lsrs r1,#4 @ exponent and top 20 bits of mantissa 331 ldr r2,=(0x3ff-0x7f)<<20 @ difference in exponent offsets 332 adds r1,r2 333 orrs r1,r3 334 lsls r0,#29 @ bottom 3 bits of mantissa 335 bx r14 3361: 337 movs r1,r3 @ return signed zero 3383: 339 movs r0,#0 340 bx r14 3412: 342 ldr r1,=0x7ff00000 @ return signed infinity 343 adds r1,r3 344 b 3b 345 346#endif