1/* 2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include "pico/asm_helper.S" 8 9#if !HAS_DOUBLE_COPROCESSOR 10#error attempt to compile double_aeabi_rp2350 when there is no DCP 11#else 12 13#include "hardware/dcp_instr.inc.S" 14#include "hardware/dcp_canned.inc.S" 15 16pico_default_asm_setup 17 18.macro double_section name 19#if PICO_DOUBLE_IN_RAM 20.section RAM_SECTION_NAME(\name), "ax" 21#else 22.section SECTION_NAME(\name), "ax" 23#endif 24.endm 25 26.macro double_wrapper_section func 27double_section WRAPPER_FUNC_NAME(\func) 28.endm 29 30// ============== STATE SAVE AND RESTORE =============== 31 32.macro saving_func type func 33 // Note we are usually 32-bit aligned already at this point, as most of the 34 // function bodies contain exactly two 16-bit instructions: bmi and bx lr. 35 // We want the PCMP word-aligned. 36.p2align 2 37 // When the engaged flag is set, branch back here to invoke save routine and 38 // hook lr with the restore routine, then fall back through to the entry 39 // point. The engaged flag will be clear when checked a second time. 401: 41 push {lr} // 16-bit instruction 42 bl generic_save_state // 32-bit instruction 43 b 1f // 16-bit instruction 44 // This is the actual entry point: 45\type\()_func \func 46 PCMP apsr_nzcv 47 bmi 1b 481: 49.endm 50 51.macro saving_func_return 52 bx lr 53.endm 54 55double_section __rp2350_dcp_engaged_state_save_restore 56.thumb_func 57generic_save_state: 58 sub sp, #24 59 push {r0, r1} 60 // do save here 61 PXMD r0, r1 62 strd r0, r1, [sp, #8 + 0] 63 PYMD r0, r1 64 strd r0, r1, [sp, #8 + 8] 65 REFD r0, r1 66 strd r0, r1, [sp, #8 + 16] 67 pop {r0, r1} 68 blx lr 69 // <- wrapped function returns here 70 // fall through into restore: 71.thumb_func 72generic_restore_state: 73 // do restore here 74 pop {r12, r14} 75 WXMD r12, r14 76 pop {r12, r14} 77 WYMD r12, r14 78 pop {r12, r14} 79 WEFD r12, r14 80 pop {pc} 81 82// ============== ARITHMETIC FUNCTIONS =============== 83 84double_wrapper_section __aeabi_dadd 85saving_func wrapper __aeabi_dadd 86 dcp_dadd_m r0,r1,r0,r1,r2,r3 87 saving_func_return 88 89double_wrapper_section __aeabi_dsub 90saving_func wrapper __aeabi_dsub 91 dcp_dsub_m r0,r1,r0,r1,r2,r3 92 saving_func_return 93 94double_wrapper_section __aeabi_drsub 95saving_func wrapper __aeabi_drsub 96 dcp_dsub_m r0,r1,r2,r3,r0,r1 97 saving_func_return 98 99double_wrapper_section __aeabi_dmul 100saving_func wrapper __aeabi_dmul 101 102 // todo optimize this based on final decision on saving_func_entry 103 push {r4,r14} 104 dcp_dmul_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r4,r12,r14 105 // todo optimize this based on final decision on saving_func_entry 106 pop {r4,lr} 107 saving_func_return 108 109double_section ddiv_fast 110saving_func regular ddiv_fast 111 dcp_ddiv_fast_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12 112 saving_func_return 113 114double_wrapper_section __aeabi_ddiv 115saving_func wrapper __aeabi_ddiv 116@ with correct rounding 117 dcp_ddiv_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12 118 saving_func_return 119 120double_section sqrt_fast 121saving_func regular sqrt_fast 122 dcp_dsqrt_fast_m r0,r1,r0,r1,r0,r1,r2,r3,r12 123 saving_func_return 124 125double_wrapper_section sqrt 126saving_func wrapper sqrt 127@ with correct rounding 128 dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12 129 saving_func_return 130 131// todo not a real thing 132double_wrapper_section __aeabi_dclassify 133saving_func wrapper __aeabi_dclassify 134@ with correct rounding 135 dcp_dclassify_m apsr_nzcv,r0,r1 136 saving_func_return 137 138// ============== CONVERSION FUNCTIONS =============== 139 140double_wrapper_section __aeabi_d2f 141saving_func wrapper __aeabi_d2f 142@ with rounding 143 dcp_double2float_m r0,r0,r1 144 saving_func_return 145 146double_wrapper_section __aeabi_i2d 147saving_func wrapper __aeabi_i2d 148 dcp_int2double_m r0,r1,r0 149 saving_func_return 150 151double_wrapper_section __aeabi_ui2d 152saving_func wrapper __aeabi_ui2d 153 dcp_uint2double_m r0,r1,r0 154 saving_func_return 155 156double_wrapper_section __aeabi_d2iz 157saving_func wrapper __aeabi_d2iz 158@ with truncation towards 0 159 dcp_double2int_m r0,r0,r1 160 saving_func_return 161 162double_wrapper_section __aeabi_d2uiz 163saving_func wrapper __aeabi_d2uiz 164@ with truncation towards 0 165 dcp_double2uint_m r0,r0,r1 166 saving_func_return 167 168// todo not a real thing 169double_wrapper_section __aeabi_d2i_r 170saving_func wrapper __aeabi_d2i_r 171@ with rounding 172 dcp_double2int_r_m r0,r0,r1 173 saving_func_return 174 175// todo not a real thing 176double_wrapper_section __aeabi_d2ui_r 177saving_func wrapper __aeabi_d2ui_r 178@ with rounding 179 dcp_double2uint_r_m r0,r0,r1 180 saving_func_return 181 182// ============== COMPARISON FUNCTIONS =============== 183 184double_wrapper_section __aeabi_dcmpun 185saving_func wrapper __aeabi_dcmpun 186 dcp_dcmp_m r0,r0,r1,r2,r3 187 // extract unordered bit 188 ubfx r0, r0, #28, #1 189 saving_func_return 190 191double_wrapper_section __aeabi_dcmp 192 193saving_func wrapper __aeabi_cdrcmple 194 dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed 195 bvs cmp_nan 196 saving_func_return 197 198// these next two can be the same function in the absence of exceptions 199saving_func wrapper __aeabi_cdcmple 200//wrapper_func __aeabi_dcmp 201 dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 202 bvs cmp_nan 203 saving_func_return 204 205// It is not clear from the ABI documentation whether cdcmpeq must set the C flag 206// in the same way as cdcmple. If not, we could save the "bvs" below; but we 207// err on the side of caution. 208saving_func wrapper __aeabi_cdcmpeq 209//wrapper_func __aeabi_dcmp 210 dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 211 bvs cmp_nan 212 saving_func_return 213 214// If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z. 215// We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a 216// return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0. 217cmp_nan: 218 movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons 219 lsrs r12, #1 // set C, clear Z 220 saving_func_return 221 222// int FUNC_NAME(__aeabi_dcmpeq)(double, double) result (1, 0) denotes (=, ?<>) [2], use for C == and != 223double_wrapper_section __aeabi_dcmpeq 224saving_func wrapper __aeabi_dcmpeq 225 dcp_dcmp_m r0,r0,r1,r2,r3 226 // extract Z 227 ubfx r0, r0, #30, #1 228 saving_func_return 229 230// int FUNC_NAME(__aeabi_dcmplt)(double, double) result (1, 0) denotes (<, ?>=) [2], use for C < 231double_wrapper_section __aeabi_dcmplt 232saving_func wrapper __aeabi_dcmplt 233 dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 234 ite hi 235 movhi r0,#1 236 movls r0,#0 237 saving_func_return 238 239// int FUNC_NAME(__aeabi_dcmple)(double, double) result (1, 0) denotes (<=, ?>) [2], use for C <= 240double_wrapper_section __aeabi_dcmple 241saving_func wrapper __aeabi_dcmple 242 dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 243 ite hs 244 movhs r0,#1 245 movlo r0,#0 246 saving_func_return 247 248// int FUNC_NAME(__aeabi_dcmpge)(double, double) result (1, 0) denotes (>=, ?<) [2], use for C >= 249double_wrapper_section __aeabi_dcmpge 250saving_func wrapper __aeabi_dcmpge 251 dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 252 ite hs 253 movhs r0,#1 254 movlo r0,#0 255 saving_func_return 256 257// int FUNC_NAME(__aeabi_dcmpgt)(double, double) result (1, 0) denotes (>, ?<=) [2], use for C > 258double_wrapper_section __aeabi_dcmpgt 259saving_func wrapper __aeabi_dcmpgt 260 dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 261 ite hi 262 movhi r0,#1 263 movls r0,#0 264 saving_func_return 265 266#endif 267