1/*
2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include "pico/asm_helper.S"
8#if HAS_DOUBLE_COPROCESSOR
9#include "hardware/dcp_instr.inc.S"
10#include "hardware/dcp_canned.inc.S"
11
12pico_default_asm_setup
13
14// todo alignment
15//__pre_init __aeabi_float_init, 00020
16// factor out save/restore (there is a copy in double code)
17
18.macro float_section name
19#if PICO_FLOAT_IN_RAM
20.section RAM_SECTION_NAME(\name), "ax"
21#else
22.section SECTION_NAME(\name), "ax"
23#endif
24.endm
25
26.macro float_wrapper_section func
27float_section WRAPPER_FUNC_NAME(\func)
28.endm
29
30// ============== STATE SAVE AND RESTORE ===============
31
32.macro saving_func func
33  // Note we are usually 32-bit aligned already at this point, as most of the
34  // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
35  // We want the PCMP word-aligned.
36.p2align 2
37  // When the engaged flag is set, branch back here to invoke save routine and
38  // hook lr with the restore routine, then fall back through to the entry
39  // point. The engaged flag will be clear when checked a second time.
401:
41  push {lr}              // 16-bit instruction
42  bl generic_save_state  // 32-bit instruction
43  b 1f                   // 16-bit instruction
44  // This is the actual entry point:
45wrapper_func \func
46  PCMP apsr_nzcv
47  bmi 1b
481:
49.endm
50
51.macro saving_func_return
52  bx lr
53.endm
54
55float_section __rp2350_dcp_engaged_state_save_restore
56.thumb_func
57generic_save_state:
58  sub sp, #24
59  push {r0, r1}
60  // do save here
61  PXMD r0, r1
62  strd r0, r1, [sp, #8 + 0]
63  PYMD r0, r1
64  strd r0, r1, [sp, #8 + 8]
65  REFD r0, r1
66  strd r0, r1, [sp, #8 + 16]
67  pop {r0, r1}
68  blx lr
69  // <- wrapped function returns here
70  // fall through into restore:
71.thumb_func
72generic_restore_state:
73  // do restore here
74  pop {r12, r14}
75  WXMD r12, r14
76  pop {r12, r14}
77  WYMD r12, r14
78  pop {r12, r14}
79  WEFD r12, r14
80  pop {pc}
81
82// ============== ARITHMETIC FUNCTIONS ===============
83
84float_wrapper_section __aeabi_fadd
85saving_func __aeabi_fadd
86  dcp_fadd_m r0,r0,r1
87  saving_func_return
88
89float_wrapper_section __aeabi_fsub
90saving_func __aeabi_fsub
91  dcp_fsub_m r0,r0,r1
92  saving_func_return
93
94float_wrapper_section __aeabi_frsub
95saving_func __aeabi_frsub
96  dcp_fsub_m r0,r1,r0
97  saving_func_return
98
99float_wrapper_section __aeabi_fmul
100saving_func __aeabi_fmul
101  dcp_fmul_m r0,r0,r1,r0,r1
102  saving_func_return
103
104float_section fdiv_fast
105saving_func fdiv_fast
106  dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2
107  saving_func_return
108
109float_wrapper_section __aeabi_fdiv
110saving_func __aeabi_fdiv
111@ with correct rounding
112  dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3
113  saving_func_return
114
115float_section sqrtf_fast
116saving_func sqrtf_fast
117  dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3
118  saving_func_return
119
120float_wrapper_section sqrtf
121saving_func sqrtf
122@ with correct rounding
123  dcp_fsqrt_m r0,r0,r0,r1,r2,r3
124  saving_func_return
125
126// todo not a real thing
127float_wrapper_section __aeabi_fclassify
128saving_func __aeabi_fclassify
129  dcp_fclassify_m apsr_nzcv,r0
130  saving_func_return
131
132// ============== CONVERSION FUNCTIONS ===============
133
134float_wrapper_section __aeabi_f2d
135saving_func __aeabi_f2d
136  dcp_float2double_m r0,r1,r0
137  saving_func_return
138
139float_wrapper_section __aeabi_i2f
140saving_func __aeabi_i2f
141@ with rounding
142  dcp_int2float_m r0,r0
143  saving_func_return
144
145float_wrapper_section __aeabi_ui2f
146saving_func __aeabi_ui2f
147@ with rounding
148  dcp_uint2float_m r0,r0
149  saving_func_return
150
151float_wrapper_section __aeabi_f2iz
152saving_func __aeabi_f2iz
153@ with truncation towards 0
154  dcp_float2int_m r0,r0
155  saving_func_return
156
157float_wrapper_section __aeabi_f2uiz
158saving_func __aeabi_f2uiz
159@ with truncation towards 0
160  dcp_float2uint_m r0,r0
161  saving_func_return
162
163// todo not a real thing
164float_wrapper_section __aeabi_f2i_r
165saving_func __aeabi_f2i_r
166@ with rounding
167  dcp_float2int_r_m r0,r0
168  saving_func_return
169
170// todo not a real thing
171float_wrapper_section __aeabi_f2ui_r
172saving_func __aeabi_f2ui_r
173@ with rounding
174  dcp_float2uint_r_m r0,r0
175  saving_func_return
176
177// ============== COMPARISON FUNCTIONS ===============
178
179float_wrapper_section __aeabi_fcmpun
180saving_func __aeabi_fcmpun
181  dcp_fcmp_m r0,r0,r1
182  // extract unordered bit
183  ubfx r0, r0, #28, #1
184  saving_func_return
185
186float_wrapper_section __aeabi_fcmp
187saving_func __aeabi_cfrcmple
188  dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed
189  bvs cmp_nan
190  saving_func_return
191
192// these next two can be the same function in the absence of exceptions
193saving_func __aeabi_cfcmple
194  dcp_fcmp_m apsr_nzcv,r0,r1
195  bvs cmp_nan
196  saving_func_return
197
198// It is not clear from the ABI documentation whether cfcmpeq must set the C flag
199// in the same way as cfcmple. If not, we could save the "bvs" below; but we
200// err on the side of caution.
201saving_func __aeabi_cfcmpeq
202  dcp_fcmp_m apsr_nzcv,r0,r1
203  bvs cmp_nan
204  saving_func_return
205
206// If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z.
207// We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a
208// return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0.
209cmp_nan:
210  movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons
211  lsrs r12, #1 // set C, clear Z
212  saving_func_return
213
214float_wrapper_section __aeabi_fcmpeq
215saving_func __aeabi_fcmpeq
216  dcp_fcmp_m r0,r0,r1
217  // extract Z
218  ubfx r0, r0, #30, #1
219  saving_func_return
220
221float_wrapper_section __aeabi_fcmplt
222saving_func __aeabi_fcmplt
223  dcp_fcmp_m apsr_nzcv,r1,r0
224  ite hi
225  movhi r0,#1
226  movls r0,#0
227  saving_func_return
228
229float_wrapper_section __aeabi_fcmple
230saving_func __aeabi_fcmple
231  dcp_fcmp_m apsr_nzcv,r1,r0
232  ite hs
233  movhs r0,#1
234  movlo r0,#0
235  saving_func_return
236
237float_wrapper_section __aeabi_fcmpge
238saving_func __aeabi_fcmpge
239  dcp_fcmp_m apsr_nzcv,r0,r1
240  ite hs
241  movhs r0,#1
242  movlo r0,#0
243  saving_func_return
244
245float_wrapper_section __aeabi_fcmpgt
246saving_func __aeabi_fcmpgt
247  dcp_fcmp_m apsr_nzcv,r0,r1
248  ite hi
249  movhi r0,#1
250  movls r0,#0
251  saving_func_return
252
253#endif
254