1/*
2 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include "pico/asm_helper.S"
8
9#if !HAS_DOUBLE_COPROCESSOR
10#error attempt to compile double_aeabi_rp2350 when there is no DCP
11#else
12
13#include "hardware/dcp_instr.inc.S"
14#include "hardware/dcp_canned.inc.S"
15
16pico_default_asm_setup
17
18.macro double_section name
19#if PICO_DOUBLE_IN_RAM
20.section RAM_SECTION_NAME(\name), "ax"
21#else
22.section SECTION_NAME(\name), "ax"
23#endif
24.endm
25
26.macro double_wrapper_section func
27double_section WRAPPER_FUNC_NAME(\func)
28.endm
29
30// ============== STATE SAVE AND RESTORE ===============
31
32.macro saving_func type func
33  // Note we are usually 32-bit aligned already at this point, as most of the
34  // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
35  // We want the PCMP word-aligned.
36.p2align 2
37  // When the engaged flag is set, branch back here to invoke save routine and
38  // hook lr with the restore routine, then fall back through to the entry
39  // point. The engaged flag will be clear when checked a second time.
401:
41  push {lr}              // 16-bit instruction
42  bl generic_save_state  // 32-bit instruction
43  b 1f                   // 16-bit instruction
44  // This is the actual entry point:
45\type\()_func \func
46  PCMP apsr_nzcv
47  bmi 1b
481:
49.endm
50
51.macro saving_func_return
52  bx lr
53.endm
54
55double_section __rp2350_dcp_engaged_state_save_restore
56.thumb_func
57generic_save_state:
58  sub sp, #24
59  push {r0, r1}
60  // do save here
61  PXMD r0, r1
62  strd r0, r1, [sp, #8 + 0]
63  PYMD r0, r1
64  strd r0, r1, [sp, #8 + 8]
65  REFD r0, r1
66  strd r0, r1, [sp, #8 + 16]
67  pop {r0, r1}
68  blx lr
69  // <- wrapped function returns here
70  // fall through into restore:
71.thumb_func
72generic_restore_state:
73  // do restore here
74  pop {r12, r14}
75  WXMD r12, r14
76  pop {r12, r14}
77  WYMD r12, r14
78  pop {r12, r14}
79  WEFD r12, r14
80  pop {pc}
81
82// ============== ARITHMETIC FUNCTIONS ===============
83
84double_wrapper_section __aeabi_dadd
85saving_func wrapper __aeabi_dadd
86  dcp_dadd_m r0,r1,r0,r1,r2,r3
87  saving_func_return
88
89double_wrapper_section __aeabi_dsub
90saving_func wrapper __aeabi_dsub
91  dcp_dsub_m r0,r1,r0,r1,r2,r3
92  saving_func_return
93
94double_wrapper_section __aeabi_drsub
95saving_func wrapper __aeabi_drsub
96  dcp_dsub_m r0,r1,r2,r3,r0,r1
97  saving_func_return
98
99double_wrapper_section __aeabi_dmul
100saving_func wrapper __aeabi_dmul
101
102 // todo optimize this based on final decision on saving_func_entry
103  push {r4,r14}
104  dcp_dmul_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r4,r12,r14
105 // todo optimize this based on final decision on saving_func_entry
106  pop {r4,lr}
107  saving_func_return
108
109double_section ddiv_fast
110saving_func regular ddiv_fast
111  dcp_ddiv_fast_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12
112  saving_func_return
113
114double_wrapper_section __aeabi_ddiv
115saving_func wrapper __aeabi_ddiv
116@ with correct rounding
117  dcp_ddiv_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12
118  saving_func_return
119
120double_section sqrt_fast
121saving_func regular sqrt_fast
122  dcp_dsqrt_fast_m r0,r1,r0,r1,r0,r1,r2,r3,r12
123  saving_func_return
124
125double_wrapper_section sqrt
126saving_func wrapper sqrt
127@ with correct rounding
128  dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
129  saving_func_return
130
131// todo not a real thing
132double_wrapper_section __aeabi_dclassify
133saving_func wrapper __aeabi_dclassify
134@ with correct rounding
135  dcp_dclassify_m apsr_nzcv,r0,r1
136  saving_func_return
137
138// ============== CONVERSION FUNCTIONS ===============
139
140double_wrapper_section __aeabi_d2f
141saving_func wrapper __aeabi_d2f
142@ with rounding
143  dcp_double2float_m r0,r0,r1
144  saving_func_return
145
146double_wrapper_section __aeabi_i2d
147saving_func wrapper __aeabi_i2d
148  dcp_int2double_m r0,r1,r0
149  saving_func_return
150
151double_wrapper_section __aeabi_ui2d
152saving_func wrapper __aeabi_ui2d
153  dcp_uint2double_m r0,r1,r0
154  saving_func_return
155
156double_wrapper_section __aeabi_d2iz
157saving_func wrapper __aeabi_d2iz
158@ with truncation towards 0
159  dcp_double2int_m r0,r0,r1
160  saving_func_return
161
162double_wrapper_section __aeabi_d2uiz
163saving_func wrapper __aeabi_d2uiz
164@ with truncation towards 0
165  dcp_double2uint_m r0,r0,r1
166  saving_func_return
167
168// todo not a real thing
169double_wrapper_section __aeabi_d2i_r
170saving_func wrapper __aeabi_d2i_r
171@ with rounding
172  dcp_double2int_r_m r0,r0,r1
173  saving_func_return
174
175// todo not a real thing
176double_wrapper_section __aeabi_d2ui_r
177saving_func wrapper __aeabi_d2ui_r
178@ with rounding
179  dcp_double2uint_r_m r0,r0,r1
180  saving_func_return
181
182// ============== COMPARISON FUNCTIONS ===============
183
184double_wrapper_section __aeabi_dcmpun
185saving_func wrapper __aeabi_dcmpun
186  dcp_dcmp_m r0,r0,r1,r2,r3
187  // extract unordered bit
188  ubfx r0, r0, #28, #1
189  saving_func_return
190
191double_wrapper_section __aeabi_dcmp
192
193saving_func wrapper __aeabi_cdrcmple
194  dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
195  bvs cmp_nan
196  saving_func_return
197
198// these next two can be the same function in the absence of exceptions
199saving_func wrapper __aeabi_cdcmple
200//wrapper_func __aeabi_dcmp
201  dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
202  bvs cmp_nan
203  saving_func_return
204
205// It is not clear from the ABI documentation whether cdcmpeq must set the C flag
206// in the same way as cdcmple. If not, we could save the "bvs" below; but we
207// err on the side of caution.
208saving_func wrapper __aeabi_cdcmpeq
209//wrapper_func __aeabi_dcmp
210  dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
211  bvs cmp_nan
212  saving_func_return
213
214// If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z.
215// We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a
216// return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0.
217cmp_nan:
218  movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons
219  lsrs r12, #1 // set C, clear Z
220  saving_func_return
221
222// int FUNC_NAME(__aeabi_dcmpeq)(double, double)         result (1, 0) denotes (=, ?<>) [2], use for C == and !=
223double_wrapper_section __aeabi_dcmpeq
224saving_func wrapper __aeabi_dcmpeq
225  dcp_dcmp_m r0,r0,r1,r2,r3
226  // extract Z
227  ubfx r0, r0, #30, #1
228  saving_func_return
229
230// int FUNC_NAME(__aeabi_dcmplt)(double, double)         result (1, 0) denotes (<, ?>=) [2], use for C <
231double_wrapper_section __aeabi_dcmplt
232saving_func wrapper __aeabi_dcmplt
233  dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1
234  ite hi
235  movhi r0,#1
236  movls r0,#0
237  saving_func_return
238
239// int FUNC_NAME(__aeabi_dcmple)(double, double)         result (1, 0) denotes (<=, ?>) [2], use for C <=
240double_wrapper_section __aeabi_dcmple
241saving_func wrapper __aeabi_dcmple
242  dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1
243  ite hs
244  movhs r0,#1
245  movlo r0,#0
246  saving_func_return
247
248// int FUNC_NAME(__aeabi_dcmpge)(double, double)         result (1, 0) denotes (>=, ?<) [2], use for C >=
249double_wrapper_section __aeabi_dcmpge
250saving_func wrapper __aeabi_dcmpge
251  dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
252  ite hs
253  movhs r0,#1
254  movlo r0,#0
255  saving_func_return
256
257// int FUNC_NAME(__aeabi_dcmpgt)(double, double)         result (1, 0) denotes (>, ?<=) [2], use for C >
258double_wrapper_section __aeabi_dcmpgt
259saving_func wrapper __aeabi_dcmpgt
260  dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
261  ite hi
262  movhi r0,#1
263  movls r0,#0
264  saving_func_return
265
266#endif
267