1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN 20
86
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE 256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114 GAUDI_QUEUE_ID_DMA_0_0,
115 GAUDI_QUEUE_ID_DMA_0_1,
116 GAUDI_QUEUE_ID_DMA_0_2,
117 GAUDI_QUEUE_ID_DMA_0_3,
118 GAUDI_QUEUE_ID_DMA_1_0,
119 GAUDI_QUEUE_ID_DMA_1_1,
120 GAUDI_QUEUE_ID_DMA_1_2,
121 GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143 [0] = GAUDI_QUEUE_ID_DMA_0_0,
144 [1] = GAUDI_QUEUE_ID_DMA_0_1,
145 [2] = GAUDI_QUEUE_ID_DMA_0_2,
146 [3] = GAUDI_QUEUE_ID_DMA_0_3,
147 [4] = GAUDI_QUEUE_ID_DMA_1_0,
148 [5] = GAUDI_QUEUE_ID_DMA_1_1,
149 [6] = GAUDI_QUEUE_ID_DMA_1_2,
150 [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
155 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
156 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
157 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
158 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
159 [PACKET_REPEAT] = sizeof(struct packet_repeat),
160 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
161 [PACKET_FENCE] = sizeof(struct packet_fence),
162 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
163 [PACKET_NOP] = sizeof(struct packet_nop),
164 [PACKET_STOP] = sizeof(struct packet_stop),
165 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
166 [PACKET_WAIT] = sizeof(struct packet_wait),
167 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
168 };
169
validate_packet_id(enum packet_id id)170 static inline bool validate_packet_id(enum packet_id id)
171 {
172 switch (id) {
173 case PACKET_WREG_32:
174 case PACKET_WREG_BULK:
175 case PACKET_MSG_LONG:
176 case PACKET_MSG_SHORT:
177 case PACKET_CP_DMA:
178 case PACKET_REPEAT:
179 case PACKET_MSG_PROT:
180 case PACKET_FENCE:
181 case PACKET_LIN_DMA:
182 case PACKET_NOP:
183 case PACKET_STOP:
184 case PACKET_ARB_POINT:
185 case PACKET_WAIT:
186 case PACKET_LOAD_AND_EXE:
187 return true;
188 default:
189 return false;
190 }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195 "tpc_address_exceed_slm",
196 "tpc_div_by_0",
197 "tpc_spu_mac_overflow",
198 "tpc_spu_addsub_overflow",
199 "tpc_spu_abs_overflow",
200 "tpc_spu_fp_dst_nan_inf",
201 "tpc_spu_fp_dst_denorm",
202 "tpc_vpu_mac_overflow",
203 "tpc_vpu_addsub_overflow",
204 "tpc_vpu_abs_overflow",
205 "tpc_vpu_fp_dst_nan_inf",
206 "tpc_vpu_fp_dst_denorm",
207 "tpc_assertions",
208 "tpc_illegal_instruction",
209 "tpc_pc_wrap_around",
210 "tpc_qm_sw_err",
211 "tpc_hbw_rresp_err",
212 "tpc_hbw_bresp_err",
213 "tpc_lbw_rresp_err",
214 "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219 "PQ AXI HBW error",
220 "CQ AXI HBW error",
221 "CP AXI HBW error",
222 "CP error due to undefined OPCODE",
223 "CP encountered STOP OPCODE",
224 "CP AXI LBW error",
225 "CP WRREG32 or WRBULK returned error",
226 "N/A",
227 "FENCE 0 inc over max value and clipped",
228 "FENCE 1 inc over max value and clipped",
229 "FENCE 2 inc over max value and clipped",
230 "FENCE 3 inc over max value and clipped",
231 "FENCE 0 dec under min value and clipped",
232 "FENCE 1 dec under min value and clipped",
233 "FENCE 2 dec under min value and clipped",
234 "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239 "Choice push while full error",
240 "Choice Q watchdog error",
241 "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245 GAUDI_SM_SEI_SO_OVERFLOW,
246 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247 GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414 [SP_MON_OBJ_WR_ADDR_LOW] =
415 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416 [SP_MON_OBJ_WR_ADDR_HIGH] =
417 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438 [SP_FENCE0_CNT_OFFSET] =
439 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440 [SP_FENCE0_RDATA_OFFSET] =
441 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443 [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448 */
449 static const char * const gaudi_sync_manager_names[] = {
450 "SYNC_MGR_E_N",
451 "SYNC_MGR_W_N",
452 "SYNC_MGR_E_S",
453 "SYNC_MGR_W_S",
454 NULL
455 };
456
457 struct ecc_info_extract_params {
458 u64 block_address;
459 u32 num_memories;
460 bool derr;
461 bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467 struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469 u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471 u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486 return HL_COLLECTIVE_MASTER;
487
488 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490 return HL_COLLECTIVE_SLAVE;
491
492 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494 return HL_COLLECTIVE_SLAVE;
495
496 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498 return HL_COLLECTIVE_SLAVE;
499
500 return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
set_default_power_values(struct hl_device * hdev)503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505 struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507 if (hdev->card_type == cpucp_card_type_pmc) {
508 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510 if (prop->fw_security_enabled)
511 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512 else
513 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514 } else {
515 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517 }
518 }
519
gaudi_set_fixed_properties(struct hl_device * hdev)520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522 struct asic_fixed_properties *prop = &hdev->asic_prop;
523 u32 num_sync_stream_queues = 0;
524 int i;
525
526 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527 prop->hw_queues_props = kcalloc(prop->max_queues,
528 sizeof(struct hw_queue_properties),
529 GFP_KERNEL);
530
531 if (!prop->hw_queues_props)
532 return -ENOMEM;
533
534 for (i = 0 ; i < prop->max_queues ; i++) {
535 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537 prop->hw_queues_props[i].driver_only = 0;
538 prop->hw_queues_props[i].supports_sync_stream = 1;
539 prop->hw_queues_props[i].cb_alloc_flags =
540 CB_ALLOC_KERNEL;
541 num_sync_stream_queues++;
542 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544 prop->hw_queues_props[i].driver_only = 1;
545 prop->hw_queues_props[i].supports_sync_stream = 0;
546 prop->hw_queues_props[i].cb_alloc_flags =
547 CB_ALLOC_KERNEL;
548 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550 prop->hw_queues_props[i].driver_only = 0;
551 prop->hw_queues_props[i].supports_sync_stream = 0;
552 prop->hw_queues_props[i].cb_alloc_flags =
553 CB_ALLOC_USER;
554
555 }
556 prop->hw_queues_props[i].collective_mode =
557 get_collective_mode(hdev, i);
558 }
559
560 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562 prop->collective_first_sob = 0;
563 prop->collective_first_mon = 0;
564
565 /* 2 SOBs per internal queue stream are reserved for collective */
566 prop->sync_stream_first_sob =
567 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568 * QMAN_STREAMS * HL_RSVD_SOBS;
569
570 /* 1 monitor per internal queue stream are reserved for collective
571 * 2 monitors per external queue stream are reserved for collective
572 */
573 prop->sync_stream_first_mon =
574 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575 (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577 prop->dram_base_address = DRAM_PHYS_BASE;
578 prop->dram_size = GAUDI_HBM_SIZE_32GB;
579 prop->dram_end_address = prop->dram_base_address +
580 prop->dram_size;
581 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583 prop->sram_base_address = SRAM_BASE_ADDR;
584 prop->sram_size = SRAM_SIZE;
585 prop->sram_end_address = prop->sram_base_address +
586 prop->sram_size;
587 prop->sram_user_base_address = prop->sram_base_address +
588 SRAM_USER_BASE_OFFSET;
589
590 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591 if (hdev->pldm)
592 prop->mmu_pgt_size = 0x800000; /* 8MB */
593 else
594 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595 prop->mmu_pte_size = HL_PTE_SIZE;
596 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598 prop->dram_page_size = PAGE_SIZE_2MB;
599 prop->dram_supports_virtual_memory = false;
600
601 prop->pmmu.hop0_shift = HOP0_SHIFT;
602 prop->pmmu.hop1_shift = HOP1_SHIFT;
603 prop->pmmu.hop2_shift = HOP2_SHIFT;
604 prop->pmmu.hop3_shift = HOP3_SHIFT;
605 prop->pmmu.hop4_shift = HOP4_SHIFT;
606 prop->pmmu.hop0_mask = HOP0_MASK;
607 prop->pmmu.hop1_mask = HOP1_MASK;
608 prop->pmmu.hop2_mask = HOP2_MASK;
609 prop->pmmu.hop3_mask = HOP3_MASK;
610 prop->pmmu.hop4_mask = HOP4_MASK;
611 prop->pmmu.start_addr = VA_HOST_SPACE_START;
612 prop->pmmu.end_addr =
613 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614 prop->pmmu.page_size = PAGE_SIZE_4KB;
615 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616
617 /* PMMU and HPMMU are the same except of page size */
618 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620
621 /* shifts and masks are the same in PMMU and DMMU */
622 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624 prop->dmmu.end_addr = VA_HOST_SPACE_END;
625 prop->dmmu.page_size = PAGE_SIZE_2MB;
626
627 prop->cfg_size = CFG_SIZE;
628 prop->max_asid = MAX_ASID;
629 prop->num_of_events = GAUDI_EVENT_SIZE;
630 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631
632 set_default_power_values(hdev);
633
634 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636
637 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639
640 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641 CARD_NAME_MAX_LEN);
642
643 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644
645 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646 prop->sync_stream_first_sob +
647 (num_sync_stream_queues * HL_RSVD_SOBS);
648 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649 prop->sync_stream_first_mon +
650 (num_sync_stream_queues * HL_RSVD_MONS);
651
652 prop->first_available_user_msix_interrupt = USHRT_MAX;
653
654 for (i = 0 ; i < HL_MAX_DCORES ; i++)
655 prop->first_available_cq[i] = USHRT_MAX;
656
657 prop->fw_cpu_boot_dev_sts0_valid = false;
658 prop->fw_cpu_boot_dev_sts1_valid = false;
659 prop->hard_reset_done_by_fw = false;
660 prop->gic_interrupts_enable = true;
661
662 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663
664 return 0;
665 }
666
gaudi_pci_bars_map(struct hl_device * hdev)667 static int gaudi_pci_bars_map(struct hl_device *hdev)
668 {
669 static const char * const name[] = {"SRAM", "CFG", "HBM"};
670 bool is_wc[3] = {false, false, true};
671 int rc;
672
673 rc = hl_pci_bars_map(hdev, name, is_wc);
674 if (rc)
675 return rc;
676
677 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
678 (CFG_BASE - SPI_FLASH_BASE_ADDR);
679
680 return 0;
681 }
682
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)683 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
684 {
685 struct gaudi_device *gaudi = hdev->asic_specific;
686 struct hl_inbound_pci_region pci_region;
687 u64 old_addr = addr;
688 int rc;
689
690 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
691 return old_addr;
692
693 if (hdev->asic_prop.iatu_done_by_fw)
694 return U64_MAX;
695
696 /* Inbound Region 2 - Bar 4 - Point to HBM */
697 pci_region.mode = PCI_BAR_MATCH_MODE;
698 pci_region.bar = HBM_BAR_ID;
699 pci_region.addr = addr;
700 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
701 if (rc)
702 return U64_MAX;
703
704 if (gaudi) {
705 old_addr = gaudi->hbm_bar_cur_addr;
706 gaudi->hbm_bar_cur_addr = addr;
707 }
708
709 return old_addr;
710 }
711
gaudi_init_iatu(struct hl_device * hdev)712 static int gaudi_init_iatu(struct hl_device *hdev)
713 {
714 struct hl_inbound_pci_region inbound_region;
715 struct hl_outbound_pci_region outbound_region;
716 int rc;
717
718 if (hdev->asic_prop.iatu_done_by_fw)
719 return 0;
720
721 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
722 inbound_region.mode = PCI_BAR_MATCH_MODE;
723 inbound_region.bar = SRAM_BAR_ID;
724 inbound_region.addr = SRAM_BASE_ADDR;
725 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
726 if (rc)
727 goto done;
728
729 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
730 inbound_region.mode = PCI_BAR_MATCH_MODE;
731 inbound_region.bar = CFG_BAR_ID;
732 inbound_region.addr = SPI_FLASH_BASE_ADDR;
733 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
734 if (rc)
735 goto done;
736
737 /* Inbound Region 2 - Bar 4 - Point to HBM */
738 inbound_region.mode = PCI_BAR_MATCH_MODE;
739 inbound_region.bar = HBM_BAR_ID;
740 inbound_region.addr = DRAM_PHYS_BASE;
741 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
742 if (rc)
743 goto done;
744
745 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
746
747 /* Outbound Region 0 - Point to Host */
748 outbound_region.addr = HOST_PHYS_BASE;
749 outbound_region.size = HOST_PHYS_SIZE;
750 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
751
752 done:
753 return rc;
754 }
755
gaudi_get_hw_state(struct hl_device * hdev)756 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
757 {
758 return RREG32(mmHW_STATE);
759 }
760
gaudi_early_init(struct hl_device * hdev)761 static int gaudi_early_init(struct hl_device *hdev)
762 {
763 struct asic_fixed_properties *prop = &hdev->asic_prop;
764 struct pci_dev *pdev = hdev->pdev;
765 u32 fw_boot_status;
766 int rc;
767
768 rc = gaudi_set_fixed_properties(hdev);
769 if (rc) {
770 dev_err(hdev->dev, "Failed setting fixed properties\n");
771 return rc;
772 }
773
774 /* Check BAR sizes */
775 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
776 dev_err(hdev->dev,
777 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
778 SRAM_BAR_ID,
779 (unsigned long long) pci_resource_len(pdev,
780 SRAM_BAR_ID),
781 SRAM_BAR_SIZE);
782 rc = -ENODEV;
783 goto free_queue_props;
784 }
785
786 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
787 dev_err(hdev->dev,
788 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
789 CFG_BAR_ID,
790 (unsigned long long) pci_resource_len(pdev,
791 CFG_BAR_ID),
792 CFG_BAR_SIZE);
793 rc = -ENODEV;
794 goto free_queue_props;
795 }
796
797 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
798
799 /* If FW security is enabled at this point it means no access to ELBI */
800 if (hdev->asic_prop.fw_security_enabled) {
801 hdev->asic_prop.iatu_done_by_fw = true;
802
803 /*
804 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
805 * decision can only be taken based on PCI ID security.
806 */
807 hdev->asic_prop.gic_interrupts_enable = false;
808 goto pci_init;
809 }
810
811 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
812 &fw_boot_status);
813 if (rc)
814 goto free_queue_props;
815
816 /* Check whether FW is configuring iATU */
817 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
818 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
819 hdev->asic_prop.iatu_done_by_fw = true;
820
821 pci_init:
822 rc = hl_pci_init(hdev);
823 if (rc)
824 goto free_queue_props;
825
826 /* Before continuing in the initialization, we need to read the preboot
827 * version to determine whether we run with a security-enabled firmware
828 */
829 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
830 mmCPU_BOOT_DEV_STS0,
831 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
832 mmCPU_BOOT_ERR1,
833 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
834 if (rc) {
835 if (hdev->reset_on_preboot_fail)
836 hdev->asic_funcs->hw_fini(hdev, true, false);
837 goto pci_fini;
838 }
839
840 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
841 dev_info(hdev->dev,
842 "H/W state is dirty, must reset before initializing\n");
843 hdev->asic_funcs->hw_fini(hdev, true, false);
844 }
845
846 return 0;
847
848 pci_fini:
849 hl_pci_fini(hdev);
850 free_queue_props:
851 kfree(hdev->asic_prop.hw_queues_props);
852 return rc;
853 }
854
gaudi_early_fini(struct hl_device * hdev)855 static int gaudi_early_fini(struct hl_device *hdev)
856 {
857 kfree(hdev->asic_prop.hw_queues_props);
858 hl_pci_fini(hdev);
859
860 return 0;
861 }
862
863 /**
864 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
865 *
866 * @hdev: pointer to hl_device structure
867 *
868 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)869 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
870 {
871 struct asic_fixed_properties *prop = &hdev->asic_prop;
872 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
873 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
874 int rc;
875
876 if (hdev->asic_prop.fw_security_enabled) {
877 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
878
879 if (rc)
880 return rc;
881
882 freq = pll_freq_arr[2];
883 } else {
884 /* Backward compatibility */
885 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
886 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
887 nr = RREG32(mmPSOC_CPU_PLL_NR);
888 nf = RREG32(mmPSOC_CPU_PLL_NF);
889 od = RREG32(mmPSOC_CPU_PLL_OD);
890
891 if (div_sel == DIV_SEL_REF_CLK ||
892 div_sel == DIV_SEL_DIVIDED_REF) {
893 if (div_sel == DIV_SEL_REF_CLK)
894 freq = PLL_REF_CLK;
895 else
896 freq = PLL_REF_CLK / (div_fctr + 1);
897 } else if (div_sel == DIV_SEL_PLL_CLK ||
898 div_sel == DIV_SEL_DIVIDED_PLL) {
899 pll_clk = PLL_REF_CLK * (nf + 1) /
900 ((nr + 1) * (od + 1));
901 if (div_sel == DIV_SEL_PLL_CLK)
902 freq = pll_clk;
903 else
904 freq = pll_clk / (div_fctr + 1);
905 } else {
906 dev_warn(hdev->dev,
907 "Received invalid div select value: %d",
908 div_sel);
909 freq = 0;
910 }
911 }
912
913 prop->psoc_timestamp_frequency = freq;
914 prop->psoc_pci_pll_nr = nr;
915 prop->psoc_pci_pll_nf = nf;
916 prop->psoc_pci_pll_od = od;
917 prop->psoc_pci_pll_div_factor = div_fctr;
918
919 return 0;
920 }
921
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)922 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
923 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
924 {
925 struct asic_fixed_properties *prop = &hdev->asic_prop;
926 struct packet_lin_dma *init_tpc_mem_pkt;
927 struct hl_cs_job *job;
928 struct hl_cb *cb;
929 u64 dst_addr;
930 u32 cb_size, ctl;
931 u8 tpc_id;
932 int rc;
933
934 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
935 if (!cb)
936 return -EFAULT;
937
938 init_tpc_mem_pkt = cb->kernel_address;
939 cb_size = sizeof(*init_tpc_mem_pkt);
940 memset(init_tpc_mem_pkt, 0, cb_size);
941
942 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
943
944 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
945 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
946 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
947 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
948
949 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
950
951 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
952 dst_addr = (prop->sram_user_base_address &
953 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
954 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
955 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
956
957 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
958 if (!job) {
959 dev_err(hdev->dev, "Failed to allocate a new job\n");
960 rc = -ENOMEM;
961 goto release_cb;
962 }
963
964 job->id = 0;
965 job->user_cb = cb;
966 atomic_inc(&job->user_cb->cs_cnt);
967 job->user_cb_size = cb_size;
968 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
969 job->patched_cb = job->user_cb;
970 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
971
972 hl_debugfs_add_job(hdev, job);
973
974 rc = gaudi_send_job_on_qman0(hdev, job);
975
976 if (rc)
977 goto free_job;
978
979 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
980 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
981 if (rc)
982 break;
983 }
984
985 free_job:
986 hl_userptr_delete_list(hdev, &job->userptr_list);
987 hl_debugfs_remove_job(hdev, job);
988 kfree(job);
989 atomic_dec(&cb->cs_cnt);
990
991 release_cb:
992 hl_cb_put(cb);
993 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
994
995 return rc;
996 }
997
998 /*
999 * gaudi_init_tpc_mem() - Initialize TPC memories.
1000 * @hdev: Pointer to hl_device structure.
1001 *
1002 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1003 *
1004 * Return: 0 for success, negative value for error.
1005 */
gaudi_init_tpc_mem(struct hl_device * hdev)1006 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1007 {
1008 const struct firmware *fw;
1009 size_t fw_size;
1010 void *cpu_addr;
1011 dma_addr_t dma_handle;
1012 int rc, count = 5;
1013
1014 again:
1015 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1016 if (rc == -EINTR && count-- > 0) {
1017 msleep(50);
1018 goto again;
1019 }
1020
1021 if (rc) {
1022 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1023 GAUDI_TPC_FW_FILE);
1024 goto out;
1025 }
1026
1027 fw_size = fw->size;
1028 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1029 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1030 if (!cpu_addr) {
1031 dev_err(hdev->dev,
1032 "Failed to allocate %zu of dma memory for TPC kernel\n",
1033 fw_size);
1034 rc = -ENOMEM;
1035 goto out;
1036 }
1037
1038 memcpy(cpu_addr, fw->data, fw_size);
1039
1040 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1041
1042 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1043 dma_handle);
1044
1045 out:
1046 release_firmware(fw);
1047 return rc;
1048 }
1049
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1050 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1051 {
1052 struct gaudi_device *gaudi = hdev->asic_specific;
1053 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1054 struct hl_hw_queue *q;
1055 u32 i, sob_id, sob_group_id, queue_id;
1056
1057 /* Iterate through SOB groups and assign a SOB for each slave queue */
1058 sob_group_id =
1059 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1060 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1061
1062 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1063 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1064 q = &hdev->kernel_queues[queue_id + (4 * i)];
1065 q->sync_stream_prop.collective_sob_id = sob_id + i;
1066 }
1067
1068 /* Both DMA5 and TPC7 use the same resources since only a single
1069 * engine need to participate in the reduction process
1070 */
1071 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1072 q = &hdev->kernel_queues[queue_id];
1073 q->sync_stream_prop.collective_sob_id =
1074 sob_id + NIC_NUMBER_OF_ENGINES;
1075
1076 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1077 q = &hdev->kernel_queues[queue_id];
1078 q->sync_stream_prop.collective_sob_id =
1079 sob_id + NIC_NUMBER_OF_ENGINES;
1080 }
1081
gaudi_sob_group_hw_reset(struct kref * ref)1082 static void gaudi_sob_group_hw_reset(struct kref *ref)
1083 {
1084 struct gaudi_hw_sob_group *hw_sob_group =
1085 container_of(ref, struct gaudi_hw_sob_group, kref);
1086 struct hl_device *hdev = hw_sob_group->hdev;
1087 int i;
1088
1089 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1090 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1091 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1092
1093 kref_init(&hw_sob_group->kref);
1094 }
1095
gaudi_sob_group_reset_error(struct kref * ref)1096 static void gaudi_sob_group_reset_error(struct kref *ref)
1097 {
1098 struct gaudi_hw_sob_group *hw_sob_group =
1099 container_of(ref, struct gaudi_hw_sob_group, kref);
1100 struct hl_device *hdev = hw_sob_group->hdev;
1101
1102 dev_crit(hdev->dev,
1103 "SOB release shouldn't be called here, base_sob_id: %d\n",
1104 hw_sob_group->base_sob_id);
1105 }
1106
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1107 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1108 {
1109 struct gaudi_collective_properties *prop;
1110 int i;
1111
1112 prop = &gaudi->collective_props;
1113
1114 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1115
1116 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1117 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1118 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1119 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1120 /* Set collective engine bit */
1121 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1122 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1123 }
1124
gaudi_collective_init(struct hl_device * hdev)1125 static int gaudi_collective_init(struct hl_device *hdev)
1126 {
1127 u32 i, sob_id, reserved_sobs_per_group;
1128 struct gaudi_collective_properties *prop;
1129 struct gaudi_device *gaudi;
1130
1131 gaudi = hdev->asic_specific;
1132 prop = &gaudi->collective_props;
1133 sob_id = hdev->asic_prop.collective_first_sob;
1134
1135 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1136 reserved_sobs_per_group =
1137 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1138
1139 /* Init SOB groups */
1140 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1141 prop->hw_sob_group[i].hdev = hdev;
1142 prop->hw_sob_group[i].base_sob_id = sob_id;
1143 sob_id += reserved_sobs_per_group;
1144 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1145 }
1146
1147 for (i = 0 ; i < QMAN_STREAMS; i++) {
1148 prop->next_sob_group_val[i] = 1;
1149 prop->curr_sob_group_idx[i] = 0;
1150 gaudi_collective_map_sobs(hdev, i);
1151 }
1152
1153 gaudi_collective_mstr_sob_mask_set(gaudi);
1154
1155 return 0;
1156 }
1157
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1158 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1159 {
1160 struct gaudi_device *gaudi = hdev->asic_specific;
1161 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1162
1163 kref_put(&cprop->hw_sob_group[sob_group].kref,
1164 gaudi_sob_group_hw_reset);
1165 }
1166
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1167 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1168 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1169 {
1170 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1171 struct gaudi_collective_properties *cprop;
1172 struct hl_gen_wait_properties wait_prop;
1173 struct hl_sync_stream_properties *prop;
1174 struct gaudi_device *gaudi;
1175
1176 gaudi = hdev->asic_specific;
1177 cprop = &gaudi->collective_props;
1178 queue_id = job->hw_queue_id;
1179 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1180
1181 master_sob_base =
1182 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1183 master_monitor = prop->collective_mstr_mon_id[0];
1184
1185 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1186
1187 dev_dbg(hdev->dev,
1188 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1189 master_sob_base, cprop->mstr_sob_mask[0],
1190 cprop->next_sob_group_val[stream],
1191 master_monitor, queue_id);
1192
1193 wait_prop.data = (void *) job->patched_cb;
1194 wait_prop.sob_base = master_sob_base;
1195 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1196 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1197 wait_prop.mon_id = master_monitor;
1198 wait_prop.q_idx = queue_id;
1199 wait_prop.size = cb_size;
1200 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201
1202 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1203 master_monitor = prop->collective_mstr_mon_id[1];
1204
1205 dev_dbg(hdev->dev,
1206 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207 master_sob_base, cprop->mstr_sob_mask[1],
1208 cprop->next_sob_group_val[stream],
1209 master_monitor, queue_id);
1210
1211 wait_prop.sob_base = master_sob_base;
1212 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1213 wait_prop.mon_id = master_monitor;
1214 wait_prop.size = cb_size;
1215 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 }
1217
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1218 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1219 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1220 {
1221 struct hl_gen_wait_properties wait_prop;
1222 struct hl_sync_stream_properties *prop;
1223 u32 queue_id, cb_size = 0;
1224
1225 queue_id = job->hw_queue_id;
1226 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1227
1228 if (job->cs->encaps_signals) {
1229 /* use the encaps signal handle store earlier in the flow
1230 * and set the SOB information from the encaps
1231 * signals handle
1232 */
1233 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1234 cs_cmpl);
1235
1236 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1237 job->cs->sequence,
1238 cs_cmpl->hw_sob->sob_id,
1239 cs_cmpl->sob_val);
1240 }
1241
1242 /* Add to wait CBs using slave monitor */
1243 wait_prop.data = (void *) job->user_cb;
1244 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1245 wait_prop.sob_mask = 0x1;
1246 wait_prop.sob_val = cs_cmpl->sob_val;
1247 wait_prop.mon_id = prop->collective_slave_mon_id;
1248 wait_prop.q_idx = queue_id;
1249 wait_prop.size = cb_size;
1250
1251 dev_dbg(hdev->dev,
1252 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1253 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1254 prop->collective_slave_mon_id, queue_id);
1255
1256 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257
1258 dev_dbg(hdev->dev,
1259 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1260 prop->collective_sob_id, queue_id);
1261
1262 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1263 prop->collective_sob_id, cb_size, false);
1264 }
1265
gaudi_collective_wait_init_cs(struct hl_cs * cs)1266 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1267 {
1268 struct hl_cs_compl *signal_cs_cmpl =
1269 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1270 struct hl_cs_compl *cs_cmpl =
1271 container_of(cs->fence, struct hl_cs_compl, base_fence);
1272 struct gaudi_collective_properties *cprop;
1273 u32 stream, queue_id, sob_group_offset;
1274 struct gaudi_device *gaudi;
1275 struct hl_device *hdev;
1276 struct hl_cs_job *job;
1277 struct hl_ctx *ctx;
1278
1279 ctx = cs->ctx;
1280 hdev = ctx->hdev;
1281 gaudi = hdev->asic_specific;
1282 cprop = &gaudi->collective_props;
1283
1284 /* In encaps signals case the SOB info will be retrieved from
1285 * the handle in gaudi_collective_slave_init_job.
1286 */
1287 if (!cs->encaps_signals) {
1288 /* copy the SOB id and value of the signal CS */
1289 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1290 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1291 }
1292
1293 /* check again if the signal cs already completed.
1294 * if yes then don't send any wait cs since the hw_sob
1295 * could be in reset already. if signal is not completed
1296 * then get refcount to hw_sob to prevent resetting the sob
1297 * while wait cs is not submitted.
1298 * note that this check is protected by two locks,
1299 * hw queue lock and completion object lock,
1300 * and the same completion object lock also protects
1301 * the hw_sob reset handler function.
1302 * The hw_queue lock prevent out of sync of hw_sob
1303 * refcount value, changed by signal/wait flows.
1304 */
1305 spin_lock(&signal_cs_cmpl->lock);
1306
1307 if (completion_done(&cs->signal_fence->completion)) {
1308 spin_unlock(&signal_cs_cmpl->lock);
1309 return -EINVAL;
1310 }
1311 /* Increment kref since all slave queues are now waiting on it */
1312 kref_get(&cs_cmpl->hw_sob->kref);
1313
1314 spin_unlock(&signal_cs_cmpl->lock);
1315
1316 /* Calculate the stream from collective master queue (1st job) */
1317 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1318 stream = job->hw_queue_id % 4;
1319 sob_group_offset =
1320 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1321
1322 list_for_each_entry(job, &cs->job_list, cs_node) {
1323 queue_id = job->hw_queue_id;
1324
1325 if (hdev->kernel_queues[queue_id].collective_mode ==
1326 HL_COLLECTIVE_MASTER)
1327 gaudi_collective_master_init_job(hdev, job, stream,
1328 sob_group_offset);
1329 else
1330 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1331 }
1332
1333 cs_cmpl->sob_group = sob_group_offset;
1334
1335 /* Handle sob group kref and wraparound */
1336 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1337 cprop->next_sob_group_val[stream]++;
1338
1339 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1340 /*
1341 * Decrement as we reached the max value.
1342 * The release function won't be called here as we've
1343 * just incremented the refcount.
1344 */
1345 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1346 gaudi_sob_group_reset_error);
1347 cprop->next_sob_group_val[stream] = 1;
1348 /* only two SOBs are currently in use */
1349 cprop->curr_sob_group_idx[stream] =
1350 (cprop->curr_sob_group_idx[stream] + 1) &
1351 (HL_RSVD_SOBS - 1);
1352
1353 gaudi_collective_map_sobs(hdev, stream);
1354
1355 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1356 cprop->curr_sob_group_idx[stream], stream);
1357 }
1358
1359 mb();
1360 hl_fence_put(cs->signal_fence);
1361 cs->signal_fence = NULL;
1362
1363 return 0;
1364 }
1365
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1366 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1367 struct hl_ctx *ctx, struct hl_cs *cs,
1368 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1369 u32 encaps_signal_offset)
1370 {
1371 struct hw_queue_properties *hw_queue_prop;
1372 struct hl_cs_counters_atomic *cntr;
1373 struct hl_cs_job *job;
1374 struct hl_cb *cb;
1375 u32 cb_size;
1376 bool patched_cb;
1377
1378 cntr = &hdev->aggregated_cs_counters;
1379
1380 if (mode == HL_COLLECTIVE_MASTER) {
1381 /* CB size of collective master queue contains
1382 * 4 msg short packets for monitor 1 configuration
1383 * 1 fence packet
1384 * 4 msg short packets for monitor 2 configuration
1385 * 1 fence packet
1386 * 2 msg prot packets for completion and MSI-X
1387 */
1388 cb_size = sizeof(struct packet_msg_short) * 8 +
1389 sizeof(struct packet_fence) * 2 +
1390 sizeof(struct packet_msg_prot) * 2;
1391 patched_cb = true;
1392 } else {
1393 /* CB size of collective slave queues contains
1394 * 4 msg short packets for monitor configuration
1395 * 1 fence packet
1396 * 1 additional msg short packet for sob signal
1397 */
1398 cb_size = sizeof(struct packet_msg_short) * 5 +
1399 sizeof(struct packet_fence);
1400 patched_cb = false;
1401 }
1402
1403 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1404 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1405 if (!job) {
1406 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1407 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1408 dev_err(hdev->dev, "Failed to allocate a new job\n");
1409 return -ENOMEM;
1410 }
1411
1412 /* Allocate internal mapped CB for non patched CBs */
1413 cb = hl_cb_kernel_create(hdev, cb_size,
1414 hdev->mmu_enable && !patched_cb);
1415 if (!cb) {
1416 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1417 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1418 kfree(job);
1419 return -EFAULT;
1420 }
1421
1422 job->id = 0;
1423 job->cs = cs;
1424 job->user_cb = cb;
1425 atomic_inc(&job->user_cb->cs_cnt);
1426 job->user_cb_size = cb_size;
1427 job->hw_queue_id = queue_id;
1428
1429 /* since its guaranteed to have only one chunk in the collective wait
1430 * cs, we can use this chunk to set the encapsulated signal offset
1431 * in the jobs.
1432 */
1433 if (cs->encaps_signals)
1434 job->encaps_sig_wait_offset = encaps_signal_offset;
1435
1436 /*
1437 * No need in parsing, user CB is the patched CB.
1438 * We call hl_cb_destroy() out of two reasons - we don't need
1439 * the CB in the CB idr anymore and to decrement its refcount as
1440 * it was incremented inside hl_cb_kernel_create().
1441 */
1442 if (patched_cb)
1443 job->patched_cb = job->user_cb;
1444 else
1445 job->patched_cb = NULL;
1446
1447 job->job_cb_size = job->user_cb_size;
1448 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1449
1450 /* increment refcount as for external queues we get completion */
1451 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1452 cs_get(cs);
1453
1454 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1455
1456 list_add_tail(&job->cs_node, &cs->job_list);
1457
1458 hl_debugfs_add_job(hdev, job);
1459
1460 return 0;
1461 }
1462
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1463 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1464 struct hl_ctx *ctx, struct hl_cs *cs,
1465 u32 wait_queue_id, u32 collective_engine_id,
1466 u32 encaps_signal_offset)
1467 {
1468 struct gaudi_device *gaudi = hdev->asic_specific;
1469 struct hw_queue_properties *hw_queue_prop;
1470 u32 queue_id, collective_queue, num_jobs;
1471 u32 stream, nic_queue, nic_idx = 0;
1472 bool skip;
1473 int i, rc = 0;
1474
1475 /* Verify wait queue id is configured as master */
1476 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1477 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1478 dev_err(hdev->dev,
1479 "Queue %d is not configured as collective master\n",
1480 wait_queue_id);
1481 return -EINVAL;
1482 }
1483
1484 /* Verify engine id is supported */
1485 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1486 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1487 dev_err(hdev->dev,
1488 "Collective wait does not support engine %u\n",
1489 collective_engine_id);
1490 return -EINVAL;
1491 }
1492
1493 stream = wait_queue_id % 4;
1494
1495 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1496 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1497 else
1498 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1499
1500 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1501 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1502
1503 /* First job goes to the collective master queue, it will wait for
1504 * the collective slave queues to finish execution.
1505 * The synchronization is done using two monitors:
1506 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1507 * reduction engine (DMA5/TPC7).
1508 *
1509 * Rest of the jobs goes to the collective slave queues which will
1510 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1511 */
1512 for (i = 0 ; i < num_jobs ; i++) {
1513 if (i == 0) {
1514 queue_id = wait_queue_id;
1515 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1516 HL_COLLECTIVE_MASTER, queue_id,
1517 wait_queue_id, encaps_signal_offset);
1518 } else {
1519 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1520 if (gaudi->hw_cap_initialized &
1521 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1522 skip = false;
1523 else
1524 skip = true;
1525
1526 queue_id = nic_queue;
1527 nic_queue += 4;
1528 nic_idx++;
1529
1530 if (skip)
1531 continue;
1532 } else {
1533 queue_id = collective_queue;
1534 }
1535
1536 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537 HL_COLLECTIVE_SLAVE, queue_id,
1538 wait_queue_id, encaps_signal_offset);
1539 }
1540
1541 if (rc)
1542 return rc;
1543 }
1544
1545 return rc;
1546 }
1547
gaudi_late_init(struct hl_device * hdev)1548 static int gaudi_late_init(struct hl_device *hdev)
1549 {
1550 struct gaudi_device *gaudi = hdev->asic_specific;
1551 int rc;
1552
1553 rc = gaudi->cpucp_info_get(hdev);
1554 if (rc) {
1555 dev_err(hdev->dev, "Failed to get cpucp info\n");
1556 return rc;
1557 }
1558
1559 if ((hdev->card_type == cpucp_card_type_pci) &&
1560 (hdev->nic_ports_mask & 0x3)) {
1561 dev_info(hdev->dev,
1562 "PCI card detected, only 8 ports are enabled\n");
1563 hdev->nic_ports_mask &= ~0x3;
1564
1565 /* Stop and disable unused NIC QMANs */
1566 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1567 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1568 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1569
1570 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573
1574 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1575 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1576
1577 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1578 }
1579
1580 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1581 if (rc) {
1582 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1583 return rc;
1584 }
1585
1586 /* Scrub both SRAM and DRAM */
1587 rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1588 if (rc)
1589 goto disable_pci_access;
1590
1591 rc = gaudi_fetch_psoc_frequency(hdev);
1592 if (rc) {
1593 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1594 goto disable_pci_access;
1595 }
1596
1597 rc = gaudi_mmu_clear_pgt_range(hdev);
1598 if (rc) {
1599 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1600 goto disable_pci_access;
1601 }
1602
1603 rc = gaudi_init_tpc_mem(hdev);
1604 if (rc) {
1605 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1606 goto disable_pci_access;
1607 }
1608
1609 rc = gaudi_collective_init(hdev);
1610 if (rc) {
1611 dev_err(hdev->dev, "Failed to init collective\n");
1612 goto disable_pci_access;
1613 }
1614
1615 /* We only support a single ASID for the user, so for the sake of optimization, just
1616 * initialize the ASID one time during device initialization with the fixed value of 1
1617 */
1618 gaudi_mmu_prepare(hdev, 1);
1619
1620 return 0;
1621
1622 disable_pci_access:
1623 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1624
1625 return rc;
1626 }
1627
gaudi_late_fini(struct hl_device * hdev)1628 static void gaudi_late_fini(struct hl_device *hdev)
1629 {
1630 const struct hwmon_channel_info **channel_info_arr;
1631 int i = 0;
1632
1633 if (!hdev->hl_chip_info->info)
1634 return;
1635
1636 channel_info_arr = hdev->hl_chip_info->info;
1637
1638 while (channel_info_arr[i]) {
1639 kfree(channel_info_arr[i]->config);
1640 kfree(channel_info_arr[i]);
1641 i++;
1642 }
1643
1644 kfree(channel_info_arr);
1645
1646 hdev->hl_chip_info->info = NULL;
1647 }
1648
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1649 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1650 {
1651 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1652 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1653 int i, j, rc = 0;
1654
1655 /*
1656 * The device CPU works with 40-bits addresses, while bit 39 must be set
1657 * to '1' when accessing the host.
1658 * Bits 49:39 of the full host address are saved for a later
1659 * configuration of the HW to perform extension to 50 bits.
1660 * Because there is a single HW register that holds the extension bits,
1661 * these bits must be identical in all allocated range.
1662 */
1663
1664 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1665 virt_addr_arr[i] =
1666 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1667 HL_CPU_ACCESSIBLE_MEM_SIZE,
1668 &dma_addr_arr[i],
1669 GFP_KERNEL | __GFP_ZERO);
1670 if (!virt_addr_arr[i]) {
1671 rc = -ENOMEM;
1672 goto free_dma_mem_arr;
1673 }
1674
1675 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1676 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1677 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1678 break;
1679 }
1680
1681 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1682 dev_err(hdev->dev,
1683 "MSB of CPU accessible DMA memory are not identical in all range\n");
1684 rc = -EFAULT;
1685 goto free_dma_mem_arr;
1686 }
1687
1688 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1689 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1690 hdev->cpu_pci_msb_addr =
1691 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1692
1693 if (!hdev->asic_prop.fw_security_enabled)
1694 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1695
1696 free_dma_mem_arr:
1697 for (j = 0 ; j < i ; j++)
1698 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1699 HL_CPU_ACCESSIBLE_MEM_SIZE,
1700 virt_addr_arr[j],
1701 dma_addr_arr[j]);
1702
1703 return rc;
1704 }
1705
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1706 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1707 {
1708 struct gaudi_device *gaudi = hdev->asic_specific;
1709 struct gaudi_internal_qman_info *q;
1710 u32 i;
1711
1712 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1713 q = &gaudi->internal_qmans[i];
1714 if (!q->pq_kernel_addr)
1715 continue;
1716 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1717 q->pq_kernel_addr,
1718 q->pq_dma_addr);
1719 }
1720 }
1721
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1722 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1723 {
1724 struct gaudi_device *gaudi = hdev->asic_specific;
1725 struct gaudi_internal_qman_info *q;
1726 int rc, i;
1727
1728 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1729 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1730 continue;
1731
1732 q = &gaudi->internal_qmans[i];
1733
1734 switch (i) {
1735 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1736 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1737 break;
1738 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1739 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1740 break;
1741 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1742 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1743 break;
1744 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1745 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1746 break;
1747 default:
1748 dev_err(hdev->dev, "Bad internal queue index %d", i);
1749 rc = -EINVAL;
1750 goto free_internal_qmans_pq_mem;
1751 }
1752
1753 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1754 hdev, q->pq_size,
1755 &q->pq_dma_addr,
1756 GFP_KERNEL | __GFP_ZERO);
1757 if (!q->pq_kernel_addr) {
1758 rc = -ENOMEM;
1759 goto free_internal_qmans_pq_mem;
1760 }
1761 }
1762
1763 return 0;
1764
1765 free_internal_qmans_pq_mem:
1766 gaudi_free_internal_qmans_pq_mem(hdev);
1767 return rc;
1768 }
1769
gaudi_set_pci_memory_regions(struct hl_device * hdev)1770 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1771 {
1772 struct asic_fixed_properties *prop = &hdev->asic_prop;
1773 struct pci_mem_region *region;
1774
1775 /* CFG */
1776 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1777 region->region_base = CFG_BASE;
1778 region->region_size = CFG_SIZE;
1779 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1780 region->bar_size = CFG_BAR_SIZE;
1781 region->bar_id = CFG_BAR_ID;
1782 region->used = 1;
1783
1784 /* SRAM */
1785 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1786 region->region_base = SRAM_BASE_ADDR;
1787 region->region_size = SRAM_SIZE;
1788 region->offset_in_bar = 0;
1789 region->bar_size = SRAM_BAR_SIZE;
1790 region->bar_id = SRAM_BAR_ID;
1791 region->used = 1;
1792
1793 /* DRAM */
1794 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1795 region->region_base = DRAM_PHYS_BASE;
1796 region->region_size = hdev->asic_prop.dram_size;
1797 region->offset_in_bar = 0;
1798 region->bar_size = prop->dram_pci_bar_size;
1799 region->bar_id = HBM_BAR_ID;
1800 region->used = 1;
1801
1802 /* SP SRAM */
1803 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1804 region->region_base = PSOC_SCRATCHPAD_ADDR;
1805 region->region_size = PSOC_SCRATCHPAD_SIZE;
1806 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1807 region->bar_size = CFG_BAR_SIZE;
1808 region->bar_id = CFG_BAR_ID;
1809 region->used = 1;
1810 }
1811
gaudi_sw_init(struct hl_device * hdev)1812 static int gaudi_sw_init(struct hl_device *hdev)
1813 {
1814 struct gaudi_device *gaudi;
1815 u32 i, event_id = 0;
1816 int rc;
1817
1818 /* Allocate device structure */
1819 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1820 if (!gaudi)
1821 return -ENOMEM;
1822
1823 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1824 if (gaudi_irq_map_table[i].valid) {
1825 if (event_id == GAUDI_EVENT_SIZE) {
1826 dev_err(hdev->dev,
1827 "Event array exceeds the limit of %u events\n",
1828 GAUDI_EVENT_SIZE);
1829 rc = -EINVAL;
1830 goto free_gaudi_device;
1831 }
1832
1833 gaudi->events[event_id++] =
1834 gaudi_irq_map_table[i].fc_id;
1835 }
1836 }
1837
1838 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1839
1840 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1841
1842 hdev->asic_specific = gaudi;
1843
1844 /* Create DMA pool for small allocations */
1845 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1846 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1847 if (!hdev->dma_pool) {
1848 dev_err(hdev->dev, "failed to create DMA pool\n");
1849 rc = -ENOMEM;
1850 goto free_gaudi_device;
1851 }
1852
1853 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1854 if (rc)
1855 goto free_dma_pool;
1856
1857 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1858 if (!hdev->cpu_accessible_dma_pool) {
1859 dev_err(hdev->dev,
1860 "Failed to create CPU accessible DMA pool\n");
1861 rc = -ENOMEM;
1862 goto free_cpu_dma_mem;
1863 }
1864
1865 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1866 (uintptr_t) hdev->cpu_accessible_dma_mem,
1867 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1868 if (rc) {
1869 dev_err(hdev->dev,
1870 "Failed to add memory to CPU accessible DMA pool\n");
1871 rc = -EFAULT;
1872 goto free_cpu_accessible_dma_pool;
1873 }
1874
1875 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1876 if (rc)
1877 goto free_cpu_accessible_dma_pool;
1878
1879 spin_lock_init(&gaudi->hw_queues_lock);
1880 mutex_init(&gaudi->clk_gate_mutex);
1881
1882 hdev->supports_sync_stream = true;
1883 hdev->supports_coresight = true;
1884 hdev->supports_staged_submission = true;
1885 hdev->supports_wait_for_multi_cs = true;
1886
1887 hdev->asic_funcs->set_pci_memory_regions(hdev);
1888 hdev->stream_master_qid_arr =
1889 hdev->asic_funcs->get_stream_master_qid_arr();
1890 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1891
1892 return 0;
1893
1894 free_cpu_accessible_dma_pool:
1895 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1896 free_cpu_dma_mem:
1897 if (!hdev->asic_prop.fw_security_enabled)
1898 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1899 hdev->cpu_pci_msb_addr);
1900 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1901 HL_CPU_ACCESSIBLE_MEM_SIZE,
1902 hdev->cpu_accessible_dma_mem,
1903 hdev->cpu_accessible_dma_address);
1904 free_dma_pool:
1905 dma_pool_destroy(hdev->dma_pool);
1906 free_gaudi_device:
1907 kfree(gaudi);
1908 return rc;
1909 }
1910
gaudi_sw_fini(struct hl_device * hdev)1911 static int gaudi_sw_fini(struct hl_device *hdev)
1912 {
1913 struct gaudi_device *gaudi = hdev->asic_specific;
1914
1915 gaudi_free_internal_qmans_pq_mem(hdev);
1916
1917 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1918
1919 if (!hdev->asic_prop.fw_security_enabled)
1920 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1921 hdev->cpu_pci_msb_addr);
1922
1923 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1924 HL_CPU_ACCESSIBLE_MEM_SIZE,
1925 hdev->cpu_accessible_dma_mem,
1926 hdev->cpu_accessible_dma_address);
1927
1928 dma_pool_destroy(hdev->dma_pool);
1929
1930 mutex_destroy(&gaudi->clk_gate_mutex);
1931
1932 kfree(gaudi);
1933
1934 return 0;
1935 }
1936
gaudi_irq_handler_single(int irq,void * arg)1937 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1938 {
1939 struct hl_device *hdev = arg;
1940 int i;
1941
1942 if (hdev->disabled)
1943 return IRQ_HANDLED;
1944
1945 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1946 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1947
1948 hl_irq_handler_eq(irq, &hdev->event_queue);
1949
1950 return IRQ_HANDLED;
1951 }
1952
1953 /*
1954 * For backward compatibility, new MSI interrupts should be set after the
1955 * existing CPU and NIC interrupts.
1956 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1957 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1958 bool cpu_eq)
1959 {
1960 int msi_vec;
1961
1962 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1963 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1964 GAUDI_EVENT_QUEUE_MSI_IDX);
1965
1966 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1967 (nr + NIC_NUMBER_OF_ENGINES + 1);
1968
1969 return pci_irq_vector(hdev->pdev, msi_vec);
1970 }
1971
gaudi_enable_msi_single(struct hl_device * hdev)1972 static int gaudi_enable_msi_single(struct hl_device *hdev)
1973 {
1974 int rc, irq;
1975
1976 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1977
1978 irq = gaudi_pci_irq_vector(hdev, 0, false);
1979 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1980 "gaudi single msi", hdev);
1981 if (rc)
1982 dev_err(hdev->dev,
1983 "Failed to request single MSI IRQ\n");
1984
1985 return rc;
1986 }
1987
gaudi_enable_msi_multi(struct hl_device * hdev)1988 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1989 {
1990 int cq_cnt = hdev->asic_prop.completion_queues_count;
1991 int rc, i, irq_cnt_init, irq;
1992
1993 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1994 irq = gaudi_pci_irq_vector(hdev, i, false);
1995 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1996 &hdev->completion_queue[i]);
1997 if (rc) {
1998 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1999 goto free_irqs;
2000 }
2001 }
2002
2003 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2004 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2005 &hdev->event_queue);
2006 if (rc) {
2007 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2008 goto free_irqs;
2009 }
2010
2011 return 0;
2012
2013 free_irqs:
2014 for (i = 0 ; i < irq_cnt_init ; i++)
2015 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2016 &hdev->completion_queue[i]);
2017 return rc;
2018 }
2019
gaudi_enable_msi(struct hl_device * hdev)2020 static int gaudi_enable_msi(struct hl_device *hdev)
2021 {
2022 struct gaudi_device *gaudi = hdev->asic_specific;
2023 int rc;
2024
2025 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2026 return 0;
2027
2028 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2029 if (rc < 0) {
2030 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031 return rc;
2032 }
2033
2034 if (rc < NUMBER_OF_INTERRUPTS) {
2035 gaudi->multi_msi_mode = false;
2036 rc = gaudi_enable_msi_single(hdev);
2037 } else {
2038 gaudi->multi_msi_mode = true;
2039 rc = gaudi_enable_msi_multi(hdev);
2040 }
2041
2042 if (rc)
2043 goto free_pci_irq_vectors;
2044
2045 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2046
2047 return 0;
2048
2049 free_pci_irq_vectors:
2050 pci_free_irq_vectors(hdev->pdev);
2051 return rc;
2052 }
2053
gaudi_sync_irqs(struct hl_device * hdev)2054 static void gaudi_sync_irqs(struct hl_device *hdev)
2055 {
2056 struct gaudi_device *gaudi = hdev->asic_specific;
2057 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2058
2059 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060 return;
2061
2062 /* Wait for all pending IRQs to be finished */
2063 if (gaudi->multi_msi_mode) {
2064 for (i = 0 ; i < cq_cnt ; i++)
2065 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2066
2067 synchronize_irq(gaudi_pci_irq_vector(hdev,
2068 GAUDI_EVENT_QUEUE_MSI_IDX,
2069 true));
2070 } else {
2071 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2072 }
2073 }
2074
gaudi_disable_msi(struct hl_device * hdev)2075 static void gaudi_disable_msi(struct hl_device *hdev)
2076 {
2077 struct gaudi_device *gaudi = hdev->asic_specific;
2078 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2079
2080 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2081 return;
2082
2083 gaudi_sync_irqs(hdev);
2084
2085 if (gaudi->multi_msi_mode) {
2086 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2087 true);
2088 free_irq(irq, &hdev->event_queue);
2089
2090 for (i = 0 ; i < cq_cnt ; i++) {
2091 irq = gaudi_pci_irq_vector(hdev, i, false);
2092 free_irq(irq, &hdev->completion_queue[i]);
2093 }
2094 } else {
2095 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2096 }
2097
2098 pci_free_irq_vectors(hdev->pdev);
2099
2100 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2101 }
2102
gaudi_init_scrambler_sram(struct hl_device * hdev)2103 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2104 {
2105 struct gaudi_device *gaudi = hdev->asic_specific;
2106
2107 if (hdev->asic_prop.fw_security_enabled)
2108 return;
2109
2110 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2111 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2112 return;
2113
2114 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2115 return;
2116
2117 if (!hdev->sram_scrambler_enable)
2118 return;
2119
2120 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2121 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2123 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2125 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2127 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2129 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2131 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2133 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2135 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136
2137 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2138 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2140 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2142 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2144 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2146 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2148 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2150 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153
2154 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2155 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2156 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2157 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2159 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2161 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2163 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2165 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2167 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2169 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170
2171 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2172 }
2173
gaudi_init_scrambler_hbm(struct hl_device * hdev)2174 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2175 {
2176 struct gaudi_device *gaudi = hdev->asic_specific;
2177
2178 if (hdev->asic_prop.fw_security_enabled)
2179 return;
2180
2181 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2182 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2183 return;
2184
2185 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2186 return;
2187
2188 if (!hdev->dram_scrambler_enable)
2189 return;
2190
2191 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2192 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2193 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2194 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2196 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2198 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2200 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2202 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2204 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2206 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207
2208 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224
2225 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2226 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2227 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2228 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2230 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2232 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2234 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2236 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2238 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2240 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241
2242 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2243 }
2244
gaudi_init_e2e(struct hl_device * hdev)2245 static void gaudi_init_e2e(struct hl_device *hdev)
2246 {
2247 if (hdev->asic_prop.fw_security_enabled)
2248 return;
2249
2250 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2251 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2252 return;
2253
2254 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2255 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2256 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2257 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2258
2259 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263
2264 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268
2269 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273
2274 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278
2279 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283
2284 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288
2289 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2290 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2291 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2292 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2293
2294 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2295 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2296 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2297 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2298
2299 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2300 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2301 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2302 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2303
2304 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2305 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2306 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2307 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2308
2309 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2310 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2311 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2312 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2313
2314 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2315 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2316 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2317 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2318
2319 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2320 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2321 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2322 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2323
2324 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2325 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2326 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2327 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2328
2329 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2330 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2331 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2332 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2333
2334 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338
2339 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343
2344 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348
2349 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353
2354 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2355 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2356 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2357 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2358
2359 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2360 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2361 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2362 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2363
2364 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2365 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2366 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2367 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2368
2369 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2370 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2371 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2372 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2373
2374 if (!hdev->dram_scrambler_enable) {
2375 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2376 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2377 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2378 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2379
2380 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2381 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2382 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2383 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2384
2385 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2386 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2387 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2388 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2389
2390 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2391 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2392 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2393 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2394
2395 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2396 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2397 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2398 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2399
2400 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2401 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2402 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2403 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2404
2405 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2406 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2407 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2408 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2409
2410 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2411 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2412 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2413 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2414
2415 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2416 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2417 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2418 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2419
2420 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2421 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2422 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2423 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2424
2425 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2426 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2427 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2428 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2429
2430 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2431 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2432 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2433 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2434
2435 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2436 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2437 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2438 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2439
2440 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2441 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2442 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2443 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2444
2445 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2446 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2447 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2448 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2449
2450 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2451 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2452 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2453 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2454
2455 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459
2460 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464
2465 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469
2470 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474
2475 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2476 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2477 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2478 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2479
2480 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2481 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2482 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2483 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2484
2485 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2486 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2487 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2488 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2489
2490 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2491 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2492 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2493 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2494 }
2495
2496 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2497 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2499 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500
2501 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2502 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2504 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505
2506 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2507 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2509 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510
2511 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2512 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2514 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515
2516 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2517 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2519 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520
2521 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2522 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2524 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525
2526 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2527 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2529 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530
2531 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2532 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2534 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535
2536 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2537 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2539 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540
2541 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2542 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2544 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545
2546 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2547 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2549 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550
2551 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2552 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2554 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555
2556 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2557 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2559 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560
2561 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2562 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2564 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565
2566 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2567 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2569 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570
2571 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2572 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2574 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575
2576 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2577 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2579 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580
2581 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2582 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2584 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585
2586 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2587 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2589 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590
2591 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2592 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2594 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595
2596 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2597 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2599 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600
2601 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2602 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2604 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605
2606 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2607 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2609 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610
2611 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2612 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2614 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615 }
2616
gaudi_init_hbm_cred(struct hl_device * hdev)2617 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2618 {
2619 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2620
2621 if (hdev->asic_prop.fw_security_enabled)
2622 return;
2623
2624 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2625 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2626 return;
2627
2628 hbm0_wr = 0x33333333;
2629 hbm0_rd = 0x77777777;
2630 hbm1_wr = 0x55555555;
2631 hbm1_rd = 0xDDDDDDDD;
2632
2633 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2634 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2635 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2636 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2637
2638 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2639 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2640 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2641 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2642
2643 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2644 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2645 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2646 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2647
2648 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2649 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2650 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2651 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2652
2653 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2654 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2655 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2656 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2657 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2660 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2663 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665
2666 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2667 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2668 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2669 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2670 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2673 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2676 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678 }
2679
gaudi_init_golden_registers(struct hl_device * hdev)2680 static void gaudi_init_golden_registers(struct hl_device *hdev)
2681 {
2682 u32 tpc_offset;
2683 int tpc_id, i;
2684
2685 gaudi_init_e2e(hdev);
2686 gaudi_init_hbm_cred(hdev);
2687
2688 for (tpc_id = 0, tpc_offset = 0;
2689 tpc_id < TPC_NUMBER_OF_ENGINES;
2690 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2691 /* Mask all arithmetic interrupts from TPC */
2692 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2693 /* Set 16 cache lines */
2694 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2695 ICACHE_FETCH_LINE_NUM, 2);
2696 }
2697
2698 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2699 for (i = 0 ; i < 128 ; i += 8)
2700 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2701
2702 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2703 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2704 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 }
2707
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2708 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2709 int qman_id, dma_addr_t qman_pq_addr)
2710 {
2711 struct cpu_dyn_regs *dyn_regs =
2712 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2714 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2715 u32 q_off, dma_qm_offset;
2716 u32 dma_qm_err_cfg, irq_handler_offset;
2717
2718 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2719
2720 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2721 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2722 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2723 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724 so_base_en_lo = lower_32_bits(CFG_BASE +
2725 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2726 so_base_en_hi = upper_32_bits(CFG_BASE +
2727 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2729 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2730 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2731 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732 so_base_ws_lo = lower_32_bits(CFG_BASE +
2733 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2734 so_base_ws_hi = upper_32_bits(CFG_BASE +
2735 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736
2737 q_off = dma_qm_offset + qman_id * 4;
2738
2739 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2740 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2741
2742 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2743 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2744 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2745
2746 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2747 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2748 QMAN_LDMA_SRC_OFFSET);
2749 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2750 QMAN_LDMA_DST_OFFSET);
2751
2752 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2753 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2754 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2755 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2756 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2757 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2758 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2759 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2760
2761 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2762
2763 /* The following configuration is needed only once per QMAN */
2764 if (qman_id == 0) {
2765 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2766 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2767 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2768
2769 /* Configure RAZWI IRQ */
2770 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2771 if (hdev->stop_on_err)
2772 dma_qm_err_cfg |=
2773 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2774
2775 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2776
2777 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2778 lower_32_bits(CFG_BASE + irq_handler_offset));
2779 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2780 upper_32_bits(CFG_BASE + irq_handler_offset));
2781
2782 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2783 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2784 dma_id);
2785
2786 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2787 QM_ARB_ERR_MSG_EN_MASK);
2788
2789 /* Increase ARB WDT to support streams architecture */
2790 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2791 GAUDI_ARB_WDT_TIMEOUT);
2792
2793 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2794 QMAN_EXTERNAL_MAKE_TRUSTED);
2795
2796 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2797 }
2798 }
2799
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2800 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2801 {
2802 struct cpu_dyn_regs *dyn_regs =
2803 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2804 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2805 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2806 u32 irq_handler_offset;
2807
2808 /* Set to maximum possible according to physical size */
2809 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2810 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2811
2812 /* WA for H/W bug H3-2116 */
2813 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2814
2815 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2816 if (hdev->stop_on_err)
2817 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2818
2819 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2820
2821 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2822 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2823 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2824
2825 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2826 lower_32_bits(CFG_BASE + irq_handler_offset));
2827 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2828 upper_32_bits(CFG_BASE + irq_handler_offset));
2829
2830 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2831 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2832 WREG32(mmDMA0_CORE_PROT + dma_offset,
2833 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2834 /* If the channel is secured, it should be in MMU bypass mode */
2835 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2836 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2837 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2838 }
2839
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2840 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2841 u32 enable_mask)
2842 {
2843 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2844
2845 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2846 }
2847
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2848 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2849 {
2850 struct gaudi_device *gaudi = hdev->asic_specific;
2851 struct hl_hw_queue *q;
2852 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2853
2854 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2855 return;
2856
2857 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2858 dma_id = gaudi_dma_assignment[i];
2859 /*
2860 * For queues after the CPU Q need to add 1 to get the correct
2861 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2862 * order to get the correct MSI register.
2863 */
2864 if (dma_id > 1) {
2865 cpu_skip = 1;
2866 nic_skip = NIC_NUMBER_OF_ENGINES;
2867 } else {
2868 cpu_skip = 0;
2869 nic_skip = 0;
2870 }
2871
2872 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2873 q_idx = 4 * dma_id + j + cpu_skip;
2874 q = &hdev->kernel_queues[q_idx];
2875 q->cq_id = cq_id++;
2876 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2877 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2878 q->bus_address);
2879 }
2880
2881 gaudi_init_dma_core(hdev, dma_id);
2882
2883 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2884 }
2885
2886 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2887 }
2888
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2889 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2890 int qman_id, u64 qman_base_addr)
2891 {
2892 struct cpu_dyn_regs *dyn_regs =
2893 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2894 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2895 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2896 u32 dma_qm_err_cfg, irq_handler_offset;
2897 u32 q_off, dma_qm_offset;
2898
2899 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2900
2901 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2902 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2903 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2904 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905 so_base_en_lo = lower_32_bits(CFG_BASE +
2906 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2907 so_base_en_hi = upper_32_bits(CFG_BASE +
2908 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2910 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2912 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913 so_base_ws_lo = lower_32_bits(CFG_BASE +
2914 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915 so_base_ws_hi = upper_32_bits(CFG_BASE +
2916 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917
2918 q_off = dma_qm_offset + qman_id * 4;
2919
2920 if (qman_id < 4) {
2921 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2922 lower_32_bits(qman_base_addr));
2923 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2924 upper_32_bits(qman_base_addr));
2925
2926 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2927 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2928 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2929
2930 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2931 QMAN_CPDMA_SIZE_OFFSET);
2932 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2933 QMAN_CPDMA_SRC_OFFSET);
2934 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2935 QMAN_CPDMA_DST_OFFSET);
2936 } else {
2937 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2938 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2939 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2940
2941 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2942 QMAN_LDMA_SIZE_OFFSET);
2943 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2944 QMAN_LDMA_SRC_OFFSET);
2945 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2946 QMAN_LDMA_DST_OFFSET);
2947
2948 /* Configure RAZWI IRQ */
2949 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2950 if (hdev->stop_on_err)
2951 dma_qm_err_cfg |=
2952 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2953
2954 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2955
2956 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2957 lower_32_bits(CFG_BASE + irq_handler_offset));
2958 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2959 upper_32_bits(CFG_BASE + irq_handler_offset));
2960
2961 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2962 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2963 dma_id);
2964
2965 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2966 QM_ARB_ERR_MSG_EN_MASK);
2967
2968 /* Increase ARB WDT to support streams architecture */
2969 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2970 GAUDI_ARB_WDT_TIMEOUT);
2971
2972 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2973 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2974 QMAN_INTERNAL_MAKE_TRUSTED);
2975 }
2976
2977 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2978 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2979 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2980 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2981
2982 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2983 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2984 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2985 mtr_base_ws_lo);
2986 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2987 mtr_base_ws_hi);
2988 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2989 so_base_ws_lo);
2990 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2991 so_base_ws_hi);
2992 }
2993 }
2994
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2995 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2996 {
2997 struct gaudi_device *gaudi = hdev->asic_specific;
2998 struct gaudi_internal_qman_info *q;
2999 u64 qman_base_addr;
3000 int i, j, dma_id, internal_q_index;
3001
3002 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3003 return;
3004
3005 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3006 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3007
3008 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3009 /*
3010 * Add the CPU queue in order to get the correct queue
3011 * number as all internal queue are placed after it
3012 */
3013 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3014
3015 q = &gaudi->internal_qmans[internal_q_index];
3016 qman_base_addr = (u64) q->pq_dma_addr;
3017 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3018 qman_base_addr);
3019 }
3020
3021 /* Initializing lower CP for HBM DMA QMAN */
3022 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3023
3024 gaudi_init_dma_core(hdev, dma_id);
3025
3026 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3027 }
3028
3029 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3030 }
3031
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)3032 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3033 int qman_id, u64 qman_base_addr)
3034 {
3035 struct cpu_dyn_regs *dyn_regs =
3036 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3037 u32 mtr_base_lo, mtr_base_hi;
3038 u32 so_base_lo, so_base_hi;
3039 u32 irq_handler_offset;
3040 u32 q_off, mme_id;
3041 u32 mme_qm_err_cfg;
3042
3043 mtr_base_lo = lower_32_bits(CFG_BASE +
3044 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3045 mtr_base_hi = upper_32_bits(CFG_BASE +
3046 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047 so_base_lo = lower_32_bits(CFG_BASE +
3048 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3049 so_base_hi = upper_32_bits(CFG_BASE +
3050 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051
3052 q_off = mme_offset + qman_id * 4;
3053
3054 if (qman_id < 4) {
3055 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3056 lower_32_bits(qman_base_addr));
3057 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3058 upper_32_bits(qman_base_addr));
3059
3060 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3061 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3062 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3063
3064 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3065 QMAN_CPDMA_SIZE_OFFSET);
3066 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3067 QMAN_CPDMA_SRC_OFFSET);
3068 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3069 QMAN_CPDMA_DST_OFFSET);
3070 } else {
3071 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3072 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3073 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3074
3075 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3076 QMAN_LDMA_SIZE_OFFSET);
3077 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3078 QMAN_LDMA_SRC_OFFSET);
3079 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3080 QMAN_LDMA_DST_OFFSET);
3081
3082 /* Configure RAZWI IRQ */
3083 mme_id = mme_offset /
3084 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3085
3086 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3087 if (hdev->stop_on_err)
3088 mme_qm_err_cfg |=
3089 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3090
3091 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3092
3093 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3094 lower_32_bits(CFG_BASE + irq_handler_offset));
3095 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3096 upper_32_bits(CFG_BASE + irq_handler_offset));
3097
3098 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3099 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3100 mme_id);
3101
3102 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3103 QM_ARB_ERR_MSG_EN_MASK);
3104
3105 /* Increase ARB WDT to support streams architecture */
3106 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3107 GAUDI_ARB_WDT_TIMEOUT);
3108
3109 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3110 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3111 QMAN_INTERNAL_MAKE_TRUSTED);
3112 }
3113
3114 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3115 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3116 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3117 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3118 }
3119
gaudi_init_mme_qmans(struct hl_device * hdev)3120 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3121 {
3122 struct gaudi_device *gaudi = hdev->asic_specific;
3123 struct gaudi_internal_qman_info *q;
3124 u64 qman_base_addr;
3125 u32 mme_offset;
3126 int i, internal_q_index;
3127
3128 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3129 return;
3130
3131 /*
3132 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3133 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3134 */
3135
3136 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3137
3138 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3139 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3140 q = &gaudi->internal_qmans[internal_q_index];
3141 qman_base_addr = (u64) q->pq_dma_addr;
3142 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3143 qman_base_addr);
3144 if (i == 3)
3145 mme_offset = 0;
3146 }
3147
3148 /* Initializing lower CP for MME QMANs */
3149 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3150 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3151 gaudi_init_mme_qman(hdev, 0, 4, 0);
3152
3153 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3154 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3155
3156 gaudi->hw_cap_initialized |= HW_CAP_MME;
3157 }
3158
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)3159 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3160 int qman_id, u64 qman_base_addr)
3161 {
3162 struct cpu_dyn_regs *dyn_regs =
3163 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3164 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3165 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3166 u32 tpc_qm_err_cfg, irq_handler_offset;
3167 u32 q_off, tpc_id;
3168
3169 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3170 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3172 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173 so_base_en_lo = lower_32_bits(CFG_BASE +
3174 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175 so_base_en_hi = upper_32_bits(CFG_BASE +
3176 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3178 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3179 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3180 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181 so_base_ws_lo = lower_32_bits(CFG_BASE +
3182 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3183 so_base_ws_hi = upper_32_bits(CFG_BASE +
3184 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185
3186 q_off = tpc_offset + qman_id * 4;
3187
3188 tpc_id = tpc_offset /
3189 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3190
3191 if (qman_id < 4) {
3192 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3193 lower_32_bits(qman_base_addr));
3194 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3195 upper_32_bits(qman_base_addr));
3196
3197 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3198 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3199 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3200
3201 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3202 QMAN_CPDMA_SIZE_OFFSET);
3203 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3204 QMAN_CPDMA_SRC_OFFSET);
3205 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3206 QMAN_CPDMA_DST_OFFSET);
3207 } else {
3208 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3209 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3210 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3211
3212 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3213 QMAN_LDMA_SIZE_OFFSET);
3214 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3215 QMAN_LDMA_SRC_OFFSET);
3216 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3217 QMAN_LDMA_DST_OFFSET);
3218
3219 /* Configure RAZWI IRQ */
3220 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3221 if (hdev->stop_on_err)
3222 tpc_qm_err_cfg |=
3223 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3224
3225 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3226
3227 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3228 lower_32_bits(CFG_BASE + irq_handler_offset));
3229 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3230 upper_32_bits(CFG_BASE + irq_handler_offset));
3231
3232 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3233 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3234 tpc_id);
3235
3236 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3237 QM_ARB_ERR_MSG_EN_MASK);
3238
3239 /* Increase ARB WDT to support streams architecture */
3240 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3241 GAUDI_ARB_WDT_TIMEOUT);
3242
3243 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3244 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3245 QMAN_INTERNAL_MAKE_TRUSTED);
3246 }
3247
3248 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3249 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3250 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3251 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3252
3253 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3254 if (tpc_id == 6) {
3255 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3256 mtr_base_ws_lo);
3257 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3258 mtr_base_ws_hi);
3259 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3260 so_base_ws_lo);
3261 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3262 so_base_ws_hi);
3263 }
3264 }
3265
gaudi_init_tpc_qmans(struct hl_device * hdev)3266 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3267 {
3268 struct gaudi_device *gaudi = hdev->asic_specific;
3269 struct gaudi_internal_qman_info *q;
3270 u64 qman_base_addr;
3271 u32 so_base_hi, tpc_offset = 0;
3272 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3273 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3274 int i, tpc_id, internal_q_index;
3275
3276 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3277 return;
3278
3279 so_base_hi = upper_32_bits(CFG_BASE +
3280 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3281
3282 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3283 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3284 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3285 tpc_id * QMAN_STREAMS + i;
3286 q = &gaudi->internal_qmans[internal_q_index];
3287 qman_base_addr = (u64) q->pq_dma_addr;
3288 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3289 qman_base_addr);
3290
3291 if (i == 3) {
3292 /* Initializing lower CP for TPC QMAN */
3293 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3294
3295 /* Enable the QMAN and TPC channel */
3296 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3297 QMAN_TPC_ENABLE);
3298 }
3299 }
3300
3301 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3302 so_base_hi);
3303
3304 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3305
3306 gaudi->hw_cap_initialized |=
3307 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3308 }
3309 }
3310
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3311 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3312 int qman_id, u64 qman_base_addr, int nic_id)
3313 {
3314 struct cpu_dyn_regs *dyn_regs =
3315 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3316 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3317 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3318 u32 nic_qm_err_cfg, irq_handler_offset;
3319 u32 q_off;
3320
3321 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3322 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3323 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3324 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325 so_base_en_lo = lower_32_bits(CFG_BASE +
3326 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3327 so_base_en_hi = upper_32_bits(CFG_BASE +
3328 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3330 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3331 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3332 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333 so_base_ws_lo = lower_32_bits(CFG_BASE +
3334 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3335 so_base_ws_hi = upper_32_bits(CFG_BASE +
3336 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337
3338 q_off = nic_offset + qman_id * 4;
3339
3340 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3341 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3342
3343 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3344 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3345 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3346
3347 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3348 QMAN_LDMA_SIZE_OFFSET);
3349 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3350 QMAN_LDMA_SRC_OFFSET);
3351 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3352 QMAN_LDMA_DST_OFFSET);
3353
3354 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3355 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3356 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3357 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3358
3359 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3360 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3361 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3362 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3363 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3364
3365 if (qman_id == 0) {
3366 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3367 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3368 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3369
3370 /* Configure RAZWI IRQ */
3371 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3372 if (hdev->stop_on_err)
3373 nic_qm_err_cfg |=
3374 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3375
3376 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3377
3378 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3379 lower_32_bits(CFG_BASE + irq_handler_offset));
3380 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3381 upper_32_bits(CFG_BASE + irq_handler_offset));
3382
3383 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3384 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3385 nic_id);
3386
3387 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3388 QM_ARB_ERR_MSG_EN_MASK);
3389
3390 /* Increase ARB WDT to support streams architecture */
3391 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3392 GAUDI_ARB_WDT_TIMEOUT);
3393
3394 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3395 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3396 QMAN_INTERNAL_MAKE_TRUSTED);
3397 }
3398 }
3399
gaudi_init_nic_qmans(struct hl_device * hdev)3400 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3401 {
3402 struct gaudi_device *gaudi = hdev->asic_specific;
3403 struct gaudi_internal_qman_info *q;
3404 u64 qman_base_addr;
3405 u32 nic_offset = 0;
3406 u32 nic_delta_between_qmans =
3407 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408 u32 nic_delta_between_nics =
3409 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410 int i, nic_id, internal_q_index;
3411
3412 if (!hdev->nic_ports_mask)
3413 return;
3414
3415 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3416 return;
3417
3418 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3419
3420 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3421 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3422 nic_offset += nic_delta_between_qmans;
3423 if (nic_id & 1) {
3424 nic_offset -= (nic_delta_between_qmans * 2);
3425 nic_offset += nic_delta_between_nics;
3426 }
3427 continue;
3428 }
3429
3430 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3431 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3432 nic_id * QMAN_STREAMS + i;
3433 q = &gaudi->internal_qmans[internal_q_index];
3434 qman_base_addr = (u64) q->pq_dma_addr;
3435 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3436 qman_base_addr, nic_id);
3437 }
3438
3439 /* Enable the QMAN */
3440 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3441
3442 nic_offset += nic_delta_between_qmans;
3443 if (nic_id & 1) {
3444 nic_offset -= (nic_delta_between_qmans * 2);
3445 nic_offset += nic_delta_between_nics;
3446 }
3447
3448 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3449 }
3450 }
3451
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3452 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3453 {
3454 struct gaudi_device *gaudi = hdev->asic_specific;
3455
3456 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457 return;
3458
3459 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3460 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3461 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3462 }
3463
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3464 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3465 {
3466 struct gaudi_device *gaudi = hdev->asic_specific;
3467
3468 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3469 return;
3470
3471 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3472 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3473 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3474 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3475 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3476 }
3477
gaudi_disable_mme_qmans(struct hl_device * hdev)3478 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3479 {
3480 struct gaudi_device *gaudi = hdev->asic_specific;
3481
3482 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483 return;
3484
3485 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3486 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3487 }
3488
gaudi_disable_tpc_qmans(struct hl_device * hdev)3489 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3490 {
3491 struct gaudi_device *gaudi = hdev->asic_specific;
3492 u32 tpc_offset = 0;
3493 int tpc_id;
3494
3495 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496 return;
3497
3498 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3499 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3500 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3501 }
3502 }
3503
gaudi_disable_nic_qmans(struct hl_device * hdev)3504 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3505 {
3506 struct gaudi_device *gaudi = hdev->asic_specific;
3507 u32 nic_mask, nic_offset = 0;
3508 u32 nic_delta_between_qmans =
3509 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3510 u32 nic_delta_between_nics =
3511 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512 int nic_id;
3513
3514 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3515 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3516
3517 if (gaudi->hw_cap_initialized & nic_mask)
3518 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3519
3520 nic_offset += nic_delta_between_qmans;
3521 if (nic_id & 1) {
3522 nic_offset -= (nic_delta_between_qmans * 2);
3523 nic_offset += nic_delta_between_nics;
3524 }
3525 }
3526 }
3527
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3528 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3529 {
3530 struct gaudi_device *gaudi = hdev->asic_specific;
3531
3532 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3533 return;
3534
3535 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3536 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3538 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 }
3540
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3541 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3542 {
3543 struct gaudi_device *gaudi = hdev->asic_specific;
3544
3545 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3546 return;
3547
3548 /* Stop CPs of HBM DMA QMANs */
3549
3550 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3551 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 }
3556
gaudi_stop_mme_qmans(struct hl_device * hdev)3557 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3558 {
3559 struct gaudi_device *gaudi = hdev->asic_specific;
3560
3561 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3562 return;
3563
3564 /* Stop CPs of MME QMANs */
3565 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567 }
3568
gaudi_stop_tpc_qmans(struct hl_device * hdev)3569 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3570 {
3571 struct gaudi_device *gaudi = hdev->asic_specific;
3572
3573 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3574 return;
3575
3576 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3578 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 }
3585
gaudi_stop_nic_qmans(struct hl_device * hdev)3586 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3587 {
3588 struct gaudi_device *gaudi = hdev->asic_specific;
3589
3590 /* Stop upper CPs of QMANs */
3591
3592 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3593 WREG32(mmNIC0_QM0_GLBL_CFG1,
3594 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597
3598 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3599 WREG32(mmNIC0_QM1_GLBL_CFG1,
3600 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603
3604 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3605 WREG32(mmNIC1_QM0_GLBL_CFG1,
3606 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609
3610 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3611 WREG32(mmNIC1_QM1_GLBL_CFG1,
3612 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615
3616 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3617 WREG32(mmNIC2_QM0_GLBL_CFG1,
3618 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621
3622 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3623 WREG32(mmNIC2_QM1_GLBL_CFG1,
3624 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627
3628 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3629 WREG32(mmNIC3_QM0_GLBL_CFG1,
3630 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633
3634 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3635 WREG32(mmNIC3_QM1_GLBL_CFG1,
3636 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639
3640 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3641 WREG32(mmNIC4_QM0_GLBL_CFG1,
3642 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645
3646 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3647 WREG32(mmNIC4_QM1_GLBL_CFG1,
3648 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651 }
3652
gaudi_pci_dma_stall(struct hl_device * hdev)3653 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3654 {
3655 struct gaudi_device *gaudi = hdev->asic_specific;
3656
3657 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3658 return;
3659
3660 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3661 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3662 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 }
3664
gaudi_hbm_dma_stall(struct hl_device * hdev)3665 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3666 {
3667 struct gaudi_device *gaudi = hdev->asic_specific;
3668
3669 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3670 return;
3671
3672 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3673 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3674 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 }
3678
gaudi_mme_stall(struct hl_device * hdev)3679 static void gaudi_mme_stall(struct hl_device *hdev)
3680 {
3681 struct gaudi_device *gaudi = hdev->asic_specific;
3682
3683 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3684 return;
3685
3686 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3687 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3688 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3689 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3690 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3691 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3693 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3695 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3697 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3699 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3701 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3703 }
3704
gaudi_tpc_stall(struct hl_device * hdev)3705 static void gaudi_tpc_stall(struct hl_device *hdev)
3706 {
3707 struct gaudi_device *gaudi = hdev->asic_specific;
3708
3709 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3710 return;
3711
3712 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3713 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3714 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 }
3721
gaudi_set_clock_gating(struct hl_device * hdev)3722 static void gaudi_set_clock_gating(struct hl_device *hdev)
3723 {
3724 struct gaudi_device *gaudi = hdev->asic_specific;
3725 u32 qman_offset;
3726 bool enable;
3727 int i;
3728
3729 /* In case we are during debug session, don't enable the clock gate
3730 * as it may interfere
3731 */
3732 if (hdev->in_debug)
3733 return;
3734
3735 if (hdev->asic_prop.fw_security_enabled)
3736 return;
3737
3738 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3739 enable = !!(hdev->clock_gating_mask &
3740 (BIT_ULL(gaudi_dma_assignment[i])));
3741
3742 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3743 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3744 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3745 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3746 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3747 }
3748
3749 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3750 enable = !!(hdev->clock_gating_mask &
3751 (BIT_ULL(gaudi_dma_assignment[i])));
3752
3753 /* GC sends work to DMA engine through Upper CP in DMA5 so
3754 * we need to not enable clock gating in that DMA
3755 */
3756 if (i == GAUDI_HBM_DMA_4)
3757 enable = 0;
3758
3759 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3760 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3761 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3763 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764 }
3765
3766 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3767 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3768 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3769
3770 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3771 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3772 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3773
3774 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3775 enable = !!(hdev->clock_gating_mask &
3776 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3777
3778 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3779 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3780 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3781 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3782
3783 qman_offset += TPC_QMAN_OFFSET;
3784 }
3785
3786 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3787 }
3788
gaudi_disable_clock_gating(struct hl_device * hdev)3789 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3790 {
3791 struct gaudi_device *gaudi = hdev->asic_specific;
3792 u32 qman_offset;
3793 int i;
3794
3795 if (hdev->asic_prop.fw_security_enabled)
3796 return;
3797
3798 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3799 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3800 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3801
3802 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3803 }
3804
3805 WREG32(mmMME0_QM_CGM_CFG, 0);
3806 WREG32(mmMME0_QM_CGM_CFG1, 0);
3807 WREG32(mmMME2_QM_CGM_CFG, 0);
3808 WREG32(mmMME2_QM_CGM_CFG1, 0);
3809
3810 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3811 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3812 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3813
3814 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3815 }
3816
3817 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3818 }
3819
gaudi_enable_timestamp(struct hl_device * hdev)3820 static void gaudi_enable_timestamp(struct hl_device *hdev)
3821 {
3822 /* Disable the timestamp counter */
3823 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3824
3825 /* Zero the lower/upper parts of the 64-bit counter */
3826 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3827 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3828
3829 /* Enable the counter */
3830 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3831 }
3832
gaudi_disable_timestamp(struct hl_device * hdev)3833 static void gaudi_disable_timestamp(struct hl_device *hdev)
3834 {
3835 /* Disable the timestamp counter */
3836 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3837 }
3838
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3839 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3840 {
3841 u32 wait_timeout_ms;
3842
3843 dev_info(hdev->dev,
3844 "Halting compute engines and disabling interrupts\n");
3845
3846 if (hdev->pldm)
3847 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3848 else
3849 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3850
3851 if (fw_reset)
3852 goto skip_engines;
3853
3854 gaudi_stop_nic_qmans(hdev);
3855 gaudi_stop_mme_qmans(hdev);
3856 gaudi_stop_tpc_qmans(hdev);
3857 gaudi_stop_hbm_dma_qmans(hdev);
3858 gaudi_stop_pci_dma_qmans(hdev);
3859
3860 hdev->asic_funcs->disable_clock_gating(hdev);
3861
3862 msleep(wait_timeout_ms);
3863
3864 gaudi_pci_dma_stall(hdev);
3865 gaudi_hbm_dma_stall(hdev);
3866 gaudi_tpc_stall(hdev);
3867 gaudi_mme_stall(hdev);
3868
3869 msleep(wait_timeout_ms);
3870
3871 gaudi_disable_nic_qmans(hdev);
3872 gaudi_disable_mme_qmans(hdev);
3873 gaudi_disable_tpc_qmans(hdev);
3874 gaudi_disable_hbm_dma_qmans(hdev);
3875 gaudi_disable_pci_dma_qmans(hdev);
3876
3877 gaudi_disable_timestamp(hdev);
3878
3879 skip_engines:
3880 gaudi_disable_msi(hdev);
3881 }
3882
gaudi_mmu_init(struct hl_device * hdev)3883 static int gaudi_mmu_init(struct hl_device *hdev)
3884 {
3885 struct asic_fixed_properties *prop = &hdev->asic_prop;
3886 struct gaudi_device *gaudi = hdev->asic_specific;
3887 u64 hop0_addr;
3888 int rc, i;
3889
3890 if (!hdev->mmu_enable)
3891 return 0;
3892
3893 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3894 return 0;
3895
3896 for (i = 0 ; i < prop->max_asid ; i++) {
3897 hop0_addr = prop->mmu_pgt_addr +
3898 (i * prop->mmu_hop_table_size);
3899
3900 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3901 if (rc) {
3902 dev_err(hdev->dev,
3903 "failed to set hop0 addr for asid %d\n", i);
3904 goto err;
3905 }
3906 }
3907
3908 /* init MMU cache manage page */
3909 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3910 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3911
3912 /* mem cache invalidation */
3913 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3914
3915 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3916
3917 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3918 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3919
3920 WREG32(mmSTLB_HOP_CONFIGURATION,
3921 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3922
3923 /*
3924 * The H/W expects the first PI after init to be 1. After wraparound
3925 * we'll write 0.
3926 */
3927 gaudi->mmu_cache_inv_pi = 1;
3928
3929 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3930
3931 return 0;
3932
3933 err:
3934 return rc;
3935 }
3936
gaudi_load_firmware_to_device(struct hl_device * hdev)3937 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3938 {
3939 void __iomem *dst;
3940
3941 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3942
3943 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3944 }
3945
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3946 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3947 {
3948 void __iomem *dst;
3949
3950 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3951
3952 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3953 }
3954
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3955 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3956 {
3957 struct dynamic_fw_load_mgr *dynamic_loader;
3958 struct cpu_dyn_regs *dyn_regs;
3959
3960 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3961
3962 /*
3963 * here we update initial values for few specific dynamic regs (as
3964 * before reading the first descriptor from FW those value has to be
3965 * hard-coded) in later stages of the protocol those values will be
3966 * updated automatically by reading the FW descriptor so data there
3967 * will always be up-to-date
3968 */
3969 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3970 dyn_regs->kmd_msg_to_cpu =
3971 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3972 dyn_regs->cpu_cmd_status_to_host =
3973 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3974
3975 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3976 }
3977
gaudi_init_static_firmware_loader(struct hl_device * hdev)3978 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3979 {
3980 struct static_fw_load_mgr *static_loader;
3981
3982 static_loader = &hdev->fw_loader.static_loader;
3983
3984 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3985 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3986 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3987 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3988 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3989 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3990 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3991 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3992 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3993 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3994 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3995 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3996 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3997 GAUDI_PLDM_RESET_WAIT_MSEC :
3998 GAUDI_CPU_RESET_WAIT_MSEC;
3999 }
4000
gaudi_init_firmware_loader(struct hl_device * hdev)4001 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4002 {
4003 struct asic_fixed_properties *prop = &hdev->asic_prop;
4004 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4005
4006 /* fill common fields */
4007 fw_loader->linux_loaded = false;
4008 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4009 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4010 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4011 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4012 fw_loader->skip_bmc = !hdev->bmc_enable;
4013 fw_loader->sram_bar_id = SRAM_BAR_ID;
4014 fw_loader->dram_bar_id = HBM_BAR_ID;
4015
4016 if (prop->dynamic_fw_load)
4017 gaudi_init_dynamic_firmware_loader(hdev);
4018 else
4019 gaudi_init_static_firmware_loader(hdev);
4020 }
4021
gaudi_init_cpu(struct hl_device * hdev)4022 static int gaudi_init_cpu(struct hl_device *hdev)
4023 {
4024 struct gaudi_device *gaudi = hdev->asic_specific;
4025 int rc;
4026
4027 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4028 return 0;
4029
4030 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4031 return 0;
4032
4033 /*
4034 * The device CPU works with 40 bits addresses.
4035 * This register sets the extension to 50 bits.
4036 */
4037 if (!hdev->asic_prop.fw_security_enabled)
4038 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4039
4040 rc = hl_fw_init_cpu(hdev);
4041
4042 if (rc)
4043 return rc;
4044
4045 gaudi->hw_cap_initialized |= HW_CAP_CPU;
4046
4047 return 0;
4048 }
4049
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4050 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4051 {
4052 struct cpu_dyn_regs *dyn_regs =
4053 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4054 struct asic_fixed_properties *prop = &hdev->asic_prop;
4055 struct gaudi_device *gaudi = hdev->asic_specific;
4056 u32 status, irq_handler_offset;
4057 struct hl_eq *eq;
4058 struct hl_hw_queue *cpu_pq =
4059 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4060 int err;
4061
4062 if (!hdev->cpu_queues_enable)
4063 return 0;
4064
4065 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4066 return 0;
4067
4068 eq = &hdev->event_queue;
4069
4070 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4071 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4072
4073 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4074 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4075
4076 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4077 lower_32_bits(hdev->cpu_accessible_dma_address));
4078 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4079 upper_32_bits(hdev->cpu_accessible_dma_address));
4080
4081 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4082 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4083 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4084
4085 /* Used for EQ CI */
4086 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4087
4088 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4089
4090 if (gaudi->multi_msi_mode)
4091 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4092 else
4093 WREG32(mmCPU_IF_QUEUE_INIT,
4094 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4095
4096 irq_handler_offset = prop->gic_interrupts_enable ?
4097 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4098 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4099
4100 WREG32(irq_handler_offset,
4101 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4102
4103 err = hl_poll_timeout(
4104 hdev,
4105 mmCPU_IF_QUEUE_INIT,
4106 status,
4107 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4108 1000,
4109 cpu_timeout);
4110
4111 if (err) {
4112 dev_err(hdev->dev,
4113 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4114 return -EIO;
4115 }
4116
4117 /* update FW application security bits */
4118 if (prop->fw_cpu_boot_dev_sts0_valid)
4119 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4120 if (prop->fw_cpu_boot_dev_sts1_valid)
4121 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4122
4123 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4124 return 0;
4125 }
4126
gaudi_pre_hw_init(struct hl_device * hdev)4127 static void gaudi_pre_hw_init(struct hl_device *hdev)
4128 {
4129 /* Perform read from the device to make sure device is up */
4130 RREG32(mmHW_STATE);
4131
4132 if (!hdev->asic_prop.fw_security_enabled) {
4133 /* Set the access through PCI bars (Linux driver only) as
4134 * secured
4135 */
4136 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4137 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4138 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4139
4140 /* Perform read to flush the waiting writes to ensure
4141 * configuration was set in the device
4142 */
4143 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4144 }
4145
4146 /*
4147 * Let's mark in the H/W that we have reached this point. We check
4148 * this value in the reset_before_init function to understand whether
4149 * we need to reset the chip before doing H/W init. This register is
4150 * cleared by the H/W upon H/W reset
4151 */
4152 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4153 }
4154
gaudi_hw_init(struct hl_device * hdev)4155 static int gaudi_hw_init(struct hl_device *hdev)
4156 {
4157 struct gaudi_device *gaudi = hdev->asic_specific;
4158 int rc;
4159
4160 gaudi_pre_hw_init(hdev);
4161
4162 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4163 * So we set it here and if anyone tries to move it later to
4164 * a different address, there will be an error
4165 */
4166 if (hdev->asic_prop.iatu_done_by_fw)
4167 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4168
4169 /*
4170 * Before pushing u-boot/linux to device, need to set the hbm bar to
4171 * base address of dram
4172 */
4173 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4174 dev_err(hdev->dev,
4175 "failed to map HBM bar to DRAM base address\n");
4176 return -EIO;
4177 }
4178
4179 rc = gaudi_init_cpu(hdev);
4180 if (rc) {
4181 dev_err(hdev->dev, "failed to initialize CPU\n");
4182 return rc;
4183 }
4184
4185 /* In case the clock gating was enabled in preboot we need to disable
4186 * it here before touching the MME/TPC registers.
4187 * There is no need to take clk gating mutex because when this function
4188 * runs, no other relevant code can run
4189 */
4190 hdev->asic_funcs->disable_clock_gating(hdev);
4191
4192 /* SRAM scrambler must be initialized after CPU is running from HBM */
4193 gaudi_init_scrambler_sram(hdev);
4194
4195 /* This is here just in case we are working without CPU */
4196 gaudi_init_scrambler_hbm(hdev);
4197
4198 gaudi_init_golden_registers(hdev);
4199
4200 rc = gaudi_mmu_init(hdev);
4201 if (rc)
4202 return rc;
4203
4204 gaudi_init_security(hdev);
4205
4206 gaudi_init_pci_dma_qmans(hdev);
4207
4208 gaudi_init_hbm_dma_qmans(hdev);
4209
4210 gaudi_init_mme_qmans(hdev);
4211
4212 gaudi_init_tpc_qmans(hdev);
4213
4214 gaudi_init_nic_qmans(hdev);
4215
4216 hdev->asic_funcs->set_clock_gating(hdev);
4217
4218 gaudi_enable_timestamp(hdev);
4219
4220 /* MSI must be enabled before CPU queues and NIC are initialized */
4221 rc = gaudi_enable_msi(hdev);
4222 if (rc)
4223 goto disable_queues;
4224
4225 /* must be called after MSI was enabled */
4226 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4227 if (rc) {
4228 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4229 rc);
4230 goto disable_msi;
4231 }
4232
4233 /* Perform read from the device to flush all configuration */
4234 RREG32(mmHW_STATE);
4235
4236 return 0;
4237
4238 disable_msi:
4239 gaudi_disable_msi(hdev);
4240 disable_queues:
4241 gaudi_disable_mme_qmans(hdev);
4242 gaudi_disable_pci_dma_qmans(hdev);
4243
4244 return rc;
4245 }
4246
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4247 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4248 {
4249 struct cpu_dyn_regs *dyn_regs =
4250 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4251 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4252 struct gaudi_device *gaudi = hdev->asic_specific;
4253 bool driver_performs_reset;
4254
4255 if (!hard_reset) {
4256 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4257 return;
4258 }
4259
4260 if (hdev->pldm) {
4261 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4262 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4263 } else {
4264 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4265 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4266 }
4267
4268 if (fw_reset) {
4269 dev_info(hdev->dev,
4270 "Firmware performs HARD reset, going to wait %dms\n",
4271 reset_timeout_ms);
4272
4273 goto skip_reset;
4274 }
4275
4276 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4277 !hdev->asic_prop.hard_reset_done_by_fw);
4278
4279 /* Set device to handle FLR by H/W as we will put the device CPU to
4280 * halt mode
4281 */
4282 if (driver_performs_reset)
4283 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4284 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4285
4286 /* If linux is loaded in the device CPU we need to communicate with it
4287 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4288 * registers in case of old F/Ws
4289 */
4290 if (hdev->fw_loader.linux_loaded) {
4291 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4292 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4293 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4294
4295 WREG32(irq_handler_offset,
4296 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4297 } else {
4298 if (hdev->asic_prop.hard_reset_done_by_fw)
4299 hl_fw_ask_hard_reset_without_linux(hdev);
4300 else
4301 hl_fw_ask_halt_machine_without_linux(hdev);
4302 }
4303
4304 if (driver_performs_reset) {
4305
4306 /* Configure the reset registers. Must be done as early as
4307 * possible in case we fail during H/W initialization
4308 */
4309 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4310 (CFG_RST_H_DMA_MASK |
4311 CFG_RST_H_MME_MASK |
4312 CFG_RST_H_SM_MASK |
4313 CFG_RST_H_TPC_7_MASK));
4314
4315 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4316
4317 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4318 (CFG_RST_H_HBM_MASK |
4319 CFG_RST_H_TPC_7_MASK |
4320 CFG_RST_H_NIC_MASK |
4321 CFG_RST_H_SM_MASK |
4322 CFG_RST_H_DMA_MASK |
4323 CFG_RST_H_MME_MASK |
4324 CFG_RST_H_CPU_MASK |
4325 CFG_RST_H_MMU_MASK));
4326
4327 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4328 (CFG_RST_L_IF_MASK |
4329 CFG_RST_L_PSOC_MASK |
4330 CFG_RST_L_TPC_MASK));
4331
4332 msleep(cpu_timeout_ms);
4333
4334 /* Tell ASIC not to re-initialize PCIe */
4335 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4336
4337 /* Restart BTL/BLR upon hard-reset */
4338 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4339
4340 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4341 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4342
4343 dev_info(hdev->dev,
4344 "Issued HARD reset command, going to wait %dms\n",
4345 reset_timeout_ms);
4346 } else {
4347 dev_info(hdev->dev,
4348 "Firmware performs HARD reset, going to wait %dms\n",
4349 reset_timeout_ms);
4350 }
4351
4352 skip_reset:
4353 /*
4354 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4355 * itself is in reset. Need to wait until the reset is deasserted
4356 */
4357 msleep(reset_timeout_ms);
4358
4359 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4360 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4361 dev_err(hdev->dev,
4362 "Timeout while waiting for device to reset 0x%x\n",
4363 status);
4364
4365 if (gaudi) {
4366 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4367 HW_CAP_HBM | HW_CAP_PCI_DMA |
4368 HW_CAP_MME | HW_CAP_TPC_MASK |
4369 HW_CAP_HBM_DMA | HW_CAP_PLL |
4370 HW_CAP_NIC_MASK | HW_CAP_MMU |
4371 HW_CAP_SRAM_SCRAMBLER |
4372 HW_CAP_HBM_SCRAMBLER |
4373 HW_CAP_CLK_GATE);
4374
4375 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4376
4377 hdev->device_cpu_is_halted = false;
4378 }
4379 }
4380
gaudi_suspend(struct hl_device * hdev)4381 static int gaudi_suspend(struct hl_device *hdev)
4382 {
4383 int rc;
4384
4385 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4386 if (rc)
4387 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4388
4389 return rc;
4390 }
4391
gaudi_resume(struct hl_device * hdev)4392 static int gaudi_resume(struct hl_device *hdev)
4393 {
4394 return gaudi_init_iatu(hdev);
4395 }
4396
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4397 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4398 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4399 {
4400 int rc;
4401
4402 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4403 VM_DONTCOPY | VM_NORESERVE;
4404
4405 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4406 (dma_addr - HOST_PHYS_BASE), size);
4407 if (rc)
4408 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4409
4410 return rc;
4411 }
4412
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4413 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4414 {
4415 struct cpu_dyn_regs *dyn_regs =
4416 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4417 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4418 struct gaudi_device *gaudi = hdev->asic_specific;
4419 bool invalid_queue = false;
4420 int dma_id;
4421
4422 switch (hw_queue_id) {
4423 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4424 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4425 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4426 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4427 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4428 break;
4429
4430 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4431 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4432 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4433 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4434 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4435 break;
4436
4437 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4438 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4439 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4440 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4441 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4442 break;
4443
4444 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4445 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4446 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4448 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449 break;
4450
4451 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4452 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4453 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4455 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456 break;
4457
4458 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4459 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4460 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463 break;
4464
4465 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4466 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4467 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470 break;
4471
4472 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4473 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4474 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477 break;
4478
4479 case GAUDI_QUEUE_ID_CPU_PQ:
4480 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4481 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4482 else
4483 invalid_queue = true;
4484 break;
4485
4486 case GAUDI_QUEUE_ID_MME_0_0:
4487 db_reg_offset = mmMME2_QM_PQ_PI_0;
4488 break;
4489
4490 case GAUDI_QUEUE_ID_MME_0_1:
4491 db_reg_offset = mmMME2_QM_PQ_PI_1;
4492 break;
4493
4494 case GAUDI_QUEUE_ID_MME_0_2:
4495 db_reg_offset = mmMME2_QM_PQ_PI_2;
4496 break;
4497
4498 case GAUDI_QUEUE_ID_MME_0_3:
4499 db_reg_offset = mmMME2_QM_PQ_PI_3;
4500 break;
4501
4502 case GAUDI_QUEUE_ID_MME_1_0:
4503 db_reg_offset = mmMME0_QM_PQ_PI_0;
4504 break;
4505
4506 case GAUDI_QUEUE_ID_MME_1_1:
4507 db_reg_offset = mmMME0_QM_PQ_PI_1;
4508 break;
4509
4510 case GAUDI_QUEUE_ID_MME_1_2:
4511 db_reg_offset = mmMME0_QM_PQ_PI_2;
4512 break;
4513
4514 case GAUDI_QUEUE_ID_MME_1_3:
4515 db_reg_offset = mmMME0_QM_PQ_PI_3;
4516 break;
4517
4518 case GAUDI_QUEUE_ID_TPC_0_0:
4519 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4520 break;
4521
4522 case GAUDI_QUEUE_ID_TPC_0_1:
4523 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4524 break;
4525
4526 case GAUDI_QUEUE_ID_TPC_0_2:
4527 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4528 break;
4529
4530 case GAUDI_QUEUE_ID_TPC_0_3:
4531 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4532 break;
4533
4534 case GAUDI_QUEUE_ID_TPC_1_0:
4535 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4536 break;
4537
4538 case GAUDI_QUEUE_ID_TPC_1_1:
4539 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4540 break;
4541
4542 case GAUDI_QUEUE_ID_TPC_1_2:
4543 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4544 break;
4545
4546 case GAUDI_QUEUE_ID_TPC_1_3:
4547 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4548 break;
4549
4550 case GAUDI_QUEUE_ID_TPC_2_0:
4551 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4552 break;
4553
4554 case GAUDI_QUEUE_ID_TPC_2_1:
4555 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4556 break;
4557
4558 case GAUDI_QUEUE_ID_TPC_2_2:
4559 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4560 break;
4561
4562 case GAUDI_QUEUE_ID_TPC_2_3:
4563 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4564 break;
4565
4566 case GAUDI_QUEUE_ID_TPC_3_0:
4567 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4568 break;
4569
4570 case GAUDI_QUEUE_ID_TPC_3_1:
4571 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4572 break;
4573
4574 case GAUDI_QUEUE_ID_TPC_3_2:
4575 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4576 break;
4577
4578 case GAUDI_QUEUE_ID_TPC_3_3:
4579 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4580 break;
4581
4582 case GAUDI_QUEUE_ID_TPC_4_0:
4583 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4584 break;
4585
4586 case GAUDI_QUEUE_ID_TPC_4_1:
4587 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4588 break;
4589
4590 case GAUDI_QUEUE_ID_TPC_4_2:
4591 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4592 break;
4593
4594 case GAUDI_QUEUE_ID_TPC_4_3:
4595 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4596 break;
4597
4598 case GAUDI_QUEUE_ID_TPC_5_0:
4599 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4600 break;
4601
4602 case GAUDI_QUEUE_ID_TPC_5_1:
4603 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4604 break;
4605
4606 case GAUDI_QUEUE_ID_TPC_5_2:
4607 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4608 break;
4609
4610 case GAUDI_QUEUE_ID_TPC_5_3:
4611 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4612 break;
4613
4614 case GAUDI_QUEUE_ID_TPC_6_0:
4615 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4616 break;
4617
4618 case GAUDI_QUEUE_ID_TPC_6_1:
4619 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4620 break;
4621
4622 case GAUDI_QUEUE_ID_TPC_6_2:
4623 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4624 break;
4625
4626 case GAUDI_QUEUE_ID_TPC_6_3:
4627 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4628 break;
4629
4630 case GAUDI_QUEUE_ID_TPC_7_0:
4631 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4632 break;
4633
4634 case GAUDI_QUEUE_ID_TPC_7_1:
4635 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4636 break;
4637
4638 case GAUDI_QUEUE_ID_TPC_7_2:
4639 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4640 break;
4641
4642 case GAUDI_QUEUE_ID_TPC_7_3:
4643 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4644 break;
4645
4646 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4647 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4648 invalid_queue = true;
4649
4650 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4652 break;
4653
4654 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4655 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4656 invalid_queue = true;
4657
4658 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4660 break;
4661
4662 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4663 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4664 invalid_queue = true;
4665
4666 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4668 break;
4669
4670 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4671 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4672 invalid_queue = true;
4673
4674 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4676 break;
4677
4678 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4679 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4680 invalid_queue = true;
4681
4682 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4684 break;
4685
4686 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4687 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4688 invalid_queue = true;
4689
4690 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4692 break;
4693
4694 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4695 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4696 invalid_queue = true;
4697
4698 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4699 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4700 break;
4701
4702 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4703 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4704 invalid_queue = true;
4705
4706 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4707 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4708 break;
4709
4710 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4711 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4712 invalid_queue = true;
4713
4714 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4715 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4716 break;
4717
4718 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4719 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4720 invalid_queue = true;
4721
4722 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4723 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4724 break;
4725
4726 default:
4727 invalid_queue = true;
4728 }
4729
4730 if (invalid_queue) {
4731 /* Should never get here */
4732 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4733 hw_queue_id);
4734 return;
4735 }
4736
4737 db_value = pi;
4738
4739 /* ring the doorbell */
4740 WREG32(db_reg_offset, db_value);
4741
4742 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4743 /* make sure device CPU will read latest data from host */
4744 mb();
4745
4746 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4747 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4748 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4749
4750 WREG32(irq_handler_offset,
4751 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4752 }
4753 }
4754
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4755 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4756 struct hl_bd *bd)
4757 {
4758 __le64 *pbd = (__le64 *) bd;
4759
4760 /* The QMANs are on the host memory so a simple copy suffice */
4761 pqe[0] = pbd[0];
4762 pqe[1] = pbd[1];
4763 }
4764
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4765 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4766 dma_addr_t *dma_handle, gfp_t flags)
4767 {
4768 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4769 dma_handle, flags);
4770
4771 /* Shift to the device's base physical address of host memory */
4772 if (kernel_addr)
4773 *dma_handle += HOST_PHYS_BASE;
4774
4775 return kernel_addr;
4776 }
4777
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4778 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4779 void *cpu_addr, dma_addr_t dma_handle)
4780 {
4781 /* Cancel the device's base physical address of host memory */
4782 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4783
4784 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4785 }
4786
gaudi_hbm_scrubbing(struct hl_device * hdev)4787 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4788 {
4789 struct asic_fixed_properties *prop = &hdev->asic_prop;
4790 u64 cur_addr = DRAM_BASE_ADDR_USER;
4791 u32 val;
4792 u32 chunk_size;
4793 int rc, dma_id;
4794
4795 while (cur_addr < prop->dram_end_address) {
4796 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4797 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4798
4799 chunk_size =
4800 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4801
4802 dev_dbg(hdev->dev,
4803 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4804 cur_addr, cur_addr + chunk_size);
4805
4806 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4807 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4808 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4809 lower_32_bits(cur_addr));
4810 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4811 upper_32_bits(cur_addr));
4812 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4813 chunk_size);
4814 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4815 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4816 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4817
4818 cur_addr += chunk_size;
4819
4820 if (cur_addr == prop->dram_end_address)
4821 break;
4822 }
4823
4824 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4825 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4826
4827 rc = hl_poll_timeout(
4828 hdev,
4829 mmDMA0_CORE_STS0 + dma_offset,
4830 val,
4831 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4832 1000,
4833 HBM_SCRUBBING_TIMEOUT_US);
4834
4835 if (rc) {
4836 dev_err(hdev->dev,
4837 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4838 dma_id);
4839 return -EIO;
4840 }
4841 }
4842 }
4843
4844 return 0;
4845 }
4846
gaudi_scrub_device_mem(struct hl_device * hdev,u64 addr,u64 size)4847 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4848 {
4849 struct asic_fixed_properties *prop = &hdev->asic_prop;
4850 struct gaudi_device *gaudi = hdev->asic_specific;
4851 int rc = 0;
4852 u64 val = 0;
4853
4854 if (!hdev->memory_scrub)
4855 return 0;
4856
4857 if (!addr && !size) {
4858 /* Wait till device is idle */
4859 rc = hl_poll_timeout(
4860 hdev,
4861 mmDMA0_CORE_STS0/* dummy */,
4862 val/* dummy */,
4863 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4864 0, NULL)),
4865 1000,
4866 HBM_SCRUBBING_TIMEOUT_US);
4867 if (rc) {
4868 dev_err(hdev->dev, "waiting for idle timeout\n");
4869 return -EIO;
4870 }
4871
4872 /* Scrub SRAM */
4873 addr = prop->sram_user_base_address;
4874 size = hdev->pldm ? 0x10000 :
4875 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4876 val = 0x7777777777777777ull;
4877
4878 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4879 if (rc) {
4880 dev_err(hdev->dev,
4881 "Failed to clear SRAM in mem scrub all\n");
4882 return rc;
4883 }
4884
4885 mutex_lock(&gaudi->clk_gate_mutex);
4886 hdev->asic_funcs->disable_clock_gating(hdev);
4887
4888 /* Scrub HBM using all DMA channels in parallel */
4889 rc = gaudi_hbm_scrubbing(hdev);
4890 if (rc)
4891 dev_err(hdev->dev,
4892 "Failed to clear HBM in mem scrub all\n");
4893
4894 hdev->asic_funcs->set_clock_gating(hdev);
4895 mutex_unlock(&gaudi->clk_gate_mutex);
4896 }
4897
4898 return rc;
4899 }
4900
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4901 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4902 u32 queue_id, dma_addr_t *dma_handle,
4903 u16 *queue_len)
4904 {
4905 struct gaudi_device *gaudi = hdev->asic_specific;
4906 struct gaudi_internal_qman_info *q;
4907
4908 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4909 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4910 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4911 return NULL;
4912 }
4913
4914 q = &gaudi->internal_qmans[queue_id];
4915 *dma_handle = q->pq_dma_addr;
4916 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4917
4918 return q->pq_kernel_addr;
4919 }
4920
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4921 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4922 u16 len, u32 timeout, u64 *result)
4923 {
4924 struct gaudi_device *gaudi = hdev->asic_specific;
4925
4926 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4927 if (result)
4928 *result = 0;
4929 return 0;
4930 }
4931
4932 if (!timeout)
4933 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4934
4935 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4936 timeout, result);
4937 }
4938
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4939 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4940 {
4941 struct packet_msg_prot *fence_pkt;
4942 dma_addr_t pkt_dma_addr;
4943 u32 fence_val, tmp, timeout_usec;
4944 dma_addr_t fence_dma_addr;
4945 u32 *fence_ptr;
4946 int rc;
4947
4948 if (hdev->pldm)
4949 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4950 else
4951 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4952
4953 fence_val = GAUDI_QMAN0_FENCE_VAL;
4954
4955 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956 &fence_dma_addr);
4957 if (!fence_ptr) {
4958 dev_err(hdev->dev,
4959 "Failed to allocate memory for H/W queue %d testing\n",
4960 hw_queue_id);
4961 return -ENOMEM;
4962 }
4963
4964 *fence_ptr = 0;
4965
4966 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4967 sizeof(struct packet_msg_prot),
4968 GFP_KERNEL, &pkt_dma_addr);
4969 if (!fence_pkt) {
4970 dev_err(hdev->dev,
4971 "Failed to allocate packet for H/W queue %d testing\n",
4972 hw_queue_id);
4973 rc = -ENOMEM;
4974 goto free_fence_ptr;
4975 }
4976
4977 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4978 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4979 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4980
4981 fence_pkt->ctl = cpu_to_le32(tmp);
4982 fence_pkt->value = cpu_to_le32(fence_val);
4983 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4984
4985 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4986 sizeof(struct packet_msg_prot),
4987 pkt_dma_addr);
4988 if (rc) {
4989 dev_err(hdev->dev,
4990 "Failed to send fence packet to H/W queue %d\n",
4991 hw_queue_id);
4992 goto free_pkt;
4993 }
4994
4995 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4996 1000, timeout_usec, true);
4997
4998 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4999
5000 if (rc == -ETIMEDOUT) {
5001 dev_err(hdev->dev,
5002 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5003 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5004 rc = -EIO;
5005 }
5006
5007 free_pkt:
5008 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5009 pkt_dma_addr);
5010 free_fence_ptr:
5011 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5012 fence_dma_addr);
5013 return rc;
5014 }
5015
gaudi_test_cpu_queue(struct hl_device * hdev)5016 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5017 {
5018 struct gaudi_device *gaudi = hdev->asic_specific;
5019
5020 /*
5021 * check capability here as send_cpu_message() won't update the result
5022 * value if no capability
5023 */
5024 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5025 return 0;
5026
5027 return hl_fw_test_cpu_queue(hdev);
5028 }
5029
gaudi_test_queues(struct hl_device * hdev)5030 static int gaudi_test_queues(struct hl_device *hdev)
5031 {
5032 int i, rc, ret_val = 0;
5033
5034 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5035 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5036 rc = gaudi_test_queue(hdev, i);
5037 if (rc)
5038 ret_val = -EINVAL;
5039 }
5040 }
5041
5042 rc = gaudi_test_cpu_queue(hdev);
5043 if (rc)
5044 ret_val = -EINVAL;
5045
5046 return ret_val;
5047 }
5048
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)5049 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5050 gfp_t mem_flags, dma_addr_t *dma_handle)
5051 {
5052 void *kernel_addr;
5053
5054 if (size > GAUDI_DMA_POOL_BLK_SIZE)
5055 return NULL;
5056
5057 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5058
5059 /* Shift to the device's base physical address of host memory */
5060 if (kernel_addr)
5061 *dma_handle += HOST_PHYS_BASE;
5062
5063 return kernel_addr;
5064 }
5065
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)5066 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5067 dma_addr_t dma_addr)
5068 {
5069 /* Cancel the device's base physical address of host memory */
5070 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5071
5072 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5073 }
5074
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)5075 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5076 size_t size, dma_addr_t *dma_handle)
5077 {
5078 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5079 }
5080
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)5081 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5082 size_t size, void *vaddr)
5083 {
5084 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5085 }
5086
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5087 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5088 int nents, enum dma_data_direction dir)
5089 {
5090 struct scatterlist *sg;
5091 int i;
5092
5093 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5094 return -ENOMEM;
5095
5096 /* Shift to the device's base physical address of host memory */
5097 for_each_sg(sgl, sg, nents, i)
5098 sg->dma_address += HOST_PHYS_BASE;
5099
5100 return 0;
5101 }
5102
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5103 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5104 int nents, enum dma_data_direction dir)
5105 {
5106 struct scatterlist *sg;
5107 int i;
5108
5109 /* Cancel the device's base physical address of host memory */
5110 for_each_sg(sgl, sg, nents, i)
5111 sg->dma_address -= HOST_PHYS_BASE;
5112
5113 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5114 }
5115
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)5116 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5117 struct sg_table *sgt)
5118 {
5119 struct scatterlist *sg, *sg_next_iter;
5120 u32 count, dma_desc_cnt;
5121 u64 len, len_next;
5122 dma_addr_t addr, addr_next;
5123
5124 dma_desc_cnt = 0;
5125
5126 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5127
5128 len = sg_dma_len(sg);
5129 addr = sg_dma_address(sg);
5130
5131 if (len == 0)
5132 break;
5133
5134 while ((count + 1) < sgt->nents) {
5135 sg_next_iter = sg_next(sg);
5136 len_next = sg_dma_len(sg_next_iter);
5137 addr_next = sg_dma_address(sg_next_iter);
5138
5139 if (len_next == 0)
5140 break;
5141
5142 if ((addr + len == addr_next) &&
5143 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5144 len += len_next;
5145 count++;
5146 sg = sg_next_iter;
5147 } else {
5148 break;
5149 }
5150 }
5151
5152 dma_desc_cnt++;
5153 }
5154
5155 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5156 }
5157
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)5158 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5159 struct hl_cs_parser *parser,
5160 struct packet_lin_dma *user_dma_pkt,
5161 u64 addr, enum dma_data_direction dir)
5162 {
5163 struct hl_userptr *userptr;
5164 int rc;
5165
5166 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5167 parser->job_userptr_list, &userptr))
5168 goto already_pinned;
5169
5170 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5171 if (!userptr)
5172 return -ENOMEM;
5173
5174 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5175 userptr);
5176 if (rc)
5177 goto free_userptr;
5178
5179 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5180
5181 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5182 userptr->sgt->nents, dir);
5183 if (rc) {
5184 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5185 goto unpin_memory;
5186 }
5187
5188 userptr->dma_mapped = true;
5189 userptr->dir = dir;
5190
5191 already_pinned:
5192 parser->patched_cb_size +=
5193 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5194
5195 return 0;
5196
5197 unpin_memory:
5198 list_del(&userptr->job_node);
5199 hl_unpin_host_memory(hdev, userptr);
5200 free_userptr:
5201 kfree(userptr);
5202 return rc;
5203 }
5204
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)5205 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5206 struct hl_cs_parser *parser,
5207 struct packet_lin_dma *user_dma_pkt,
5208 bool src_in_host)
5209 {
5210 enum dma_data_direction dir;
5211 bool skip_host_mem_pin = false, user_memset;
5212 u64 addr;
5213 int rc = 0;
5214
5215 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5216 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5217 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218
5219 if (src_in_host) {
5220 if (user_memset)
5221 skip_host_mem_pin = true;
5222
5223 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5224 dir = DMA_TO_DEVICE;
5225 addr = le64_to_cpu(user_dma_pkt->src_addr);
5226 } else {
5227 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5228 dir = DMA_FROM_DEVICE;
5229 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5230 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5231 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5232 }
5233
5234 if (skip_host_mem_pin)
5235 parser->patched_cb_size += sizeof(*user_dma_pkt);
5236 else
5237 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5238 addr, dir);
5239
5240 return rc;
5241 }
5242
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5243 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5244 struct hl_cs_parser *parser,
5245 struct packet_lin_dma *user_dma_pkt)
5246 {
5247 bool src_in_host = false;
5248 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5249 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5250 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5251
5252 dev_dbg(hdev->dev, "DMA packet details:\n");
5253 dev_dbg(hdev->dev, "source == 0x%llx\n",
5254 le64_to_cpu(user_dma_pkt->src_addr));
5255 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5256 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5257
5258 /*
5259 * Special handling for DMA with size 0. Bypass all validations
5260 * because no transactions will be done except for WR_COMP, which
5261 * is not a security issue
5262 */
5263 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5264 parser->patched_cb_size += sizeof(*user_dma_pkt);
5265 return 0;
5266 }
5267
5268 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5269 src_in_host = true;
5270
5271 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5272 src_in_host);
5273 }
5274
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5275 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5276 struct hl_cs_parser *parser,
5277 struct packet_load_and_exe *user_pkt)
5278 {
5279 u32 cfg;
5280
5281 cfg = le32_to_cpu(user_pkt->cfg);
5282
5283 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5284 dev_err(hdev->dev,
5285 "User not allowed to use Load and Execute\n");
5286 return -EPERM;
5287 }
5288
5289 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5290
5291 return 0;
5292 }
5293
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5294 static int gaudi_validate_cb(struct hl_device *hdev,
5295 struct hl_cs_parser *parser, bool is_mmu)
5296 {
5297 u32 cb_parsed_length = 0;
5298 int rc = 0;
5299
5300 parser->patched_cb_size = 0;
5301
5302 /* cb_user_size is more than 0 so loop will always be executed */
5303 while (cb_parsed_length < parser->user_cb_size) {
5304 enum packet_id pkt_id;
5305 u16 pkt_size;
5306 struct gaudi_packet *user_pkt;
5307
5308 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5309
5310 pkt_id = (enum packet_id) (
5311 (le64_to_cpu(user_pkt->header) &
5312 PACKET_HEADER_PACKET_ID_MASK) >>
5313 PACKET_HEADER_PACKET_ID_SHIFT);
5314
5315 if (!validate_packet_id(pkt_id)) {
5316 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5317 rc = -EINVAL;
5318 break;
5319 }
5320
5321 pkt_size = gaudi_packet_sizes[pkt_id];
5322 cb_parsed_length += pkt_size;
5323 if (cb_parsed_length > parser->user_cb_size) {
5324 dev_err(hdev->dev,
5325 "packet 0x%x is out of CB boundary\n", pkt_id);
5326 rc = -EINVAL;
5327 break;
5328 }
5329
5330 switch (pkt_id) {
5331 case PACKET_MSG_PROT:
5332 dev_err(hdev->dev,
5333 "User not allowed to use MSG_PROT\n");
5334 rc = -EPERM;
5335 break;
5336
5337 case PACKET_CP_DMA:
5338 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5339 rc = -EPERM;
5340 break;
5341
5342 case PACKET_STOP:
5343 dev_err(hdev->dev, "User not allowed to use STOP\n");
5344 rc = -EPERM;
5345 break;
5346
5347 case PACKET_WREG_BULK:
5348 dev_err(hdev->dev,
5349 "User not allowed to use WREG_BULK\n");
5350 rc = -EPERM;
5351 break;
5352
5353 case PACKET_LOAD_AND_EXE:
5354 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5355 (struct packet_load_and_exe *) user_pkt);
5356 break;
5357
5358 case PACKET_LIN_DMA:
5359 parser->contains_dma_pkt = true;
5360 if (is_mmu)
5361 parser->patched_cb_size += pkt_size;
5362 else
5363 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5364 (struct packet_lin_dma *) user_pkt);
5365 break;
5366
5367 case PACKET_WREG_32:
5368 case PACKET_MSG_LONG:
5369 case PACKET_MSG_SHORT:
5370 case PACKET_REPEAT:
5371 case PACKET_FENCE:
5372 case PACKET_NOP:
5373 case PACKET_ARB_POINT:
5374 parser->patched_cb_size += pkt_size;
5375 break;
5376
5377 default:
5378 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5379 pkt_id);
5380 rc = -EINVAL;
5381 break;
5382 }
5383
5384 if (rc)
5385 break;
5386 }
5387
5388 /*
5389 * The new CB should have space at the end for two MSG_PROT packets:
5390 * 1. A packet that will act as a completion packet
5391 * 2. A packet that will generate MSI-X interrupt
5392 */
5393 if (parser->completion)
5394 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5395
5396 return rc;
5397 }
5398
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5399 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5400 struct hl_cs_parser *parser,
5401 struct packet_lin_dma *user_dma_pkt,
5402 struct packet_lin_dma *new_dma_pkt,
5403 u32 *new_dma_pkt_size)
5404 {
5405 struct hl_userptr *userptr;
5406 struct scatterlist *sg, *sg_next_iter;
5407 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5408 u64 len, len_next;
5409 dma_addr_t dma_addr, dma_addr_next;
5410 u64 device_memory_addr, addr;
5411 enum dma_data_direction dir;
5412 struct sg_table *sgt;
5413 bool src_in_host = false;
5414 bool skip_host_mem_pin = false;
5415 bool user_memset;
5416
5417 ctl = le32_to_cpu(user_dma_pkt->ctl);
5418
5419 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5420 src_in_host = true;
5421
5422 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5423 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5424
5425 if (src_in_host) {
5426 addr = le64_to_cpu(user_dma_pkt->src_addr);
5427 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5428 dir = DMA_TO_DEVICE;
5429 if (user_memset)
5430 skip_host_mem_pin = true;
5431 } else {
5432 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5433 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5434 dir = DMA_FROM_DEVICE;
5435 }
5436
5437 if ((!skip_host_mem_pin) &&
5438 (!hl_userptr_is_pinned(hdev, addr,
5439 le32_to_cpu(user_dma_pkt->tsize),
5440 parser->job_userptr_list, &userptr))) {
5441 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5442 addr, user_dma_pkt->tsize);
5443 return -EFAULT;
5444 }
5445
5446 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5447 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5448 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5449 return 0;
5450 }
5451
5452 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5453
5454 sgt = userptr->sgt;
5455 dma_desc_cnt = 0;
5456
5457 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5458 len = sg_dma_len(sg);
5459 dma_addr = sg_dma_address(sg);
5460
5461 if (len == 0)
5462 break;
5463
5464 while ((count + 1) < sgt->nents) {
5465 sg_next_iter = sg_next(sg);
5466 len_next = sg_dma_len(sg_next_iter);
5467 dma_addr_next = sg_dma_address(sg_next_iter);
5468
5469 if (len_next == 0)
5470 break;
5471
5472 if ((dma_addr + len == dma_addr_next) &&
5473 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5474 len += len_next;
5475 count++;
5476 sg = sg_next_iter;
5477 } else {
5478 break;
5479 }
5480 }
5481
5482 ctl = le32_to_cpu(user_dma_pkt->ctl);
5483 if (likely(dma_desc_cnt))
5484 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5485 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5486 new_dma_pkt->ctl = cpu_to_le32(ctl);
5487 new_dma_pkt->tsize = cpu_to_le32(len);
5488
5489 if (dir == DMA_TO_DEVICE) {
5490 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5491 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5492 } else {
5493 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5494 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5495 }
5496
5497 if (!user_memset)
5498 device_memory_addr += len;
5499 dma_desc_cnt++;
5500 new_dma_pkt++;
5501 }
5502
5503 if (!dma_desc_cnt) {
5504 dev_err(hdev->dev,
5505 "Error of 0 SG entries when patching DMA packet\n");
5506 return -EFAULT;
5507 }
5508
5509 /* Fix the last dma packet - wrcomp must be as user set it */
5510 new_dma_pkt--;
5511 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5512
5513 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5514
5515 return 0;
5516 }
5517
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5518 static int gaudi_patch_cb(struct hl_device *hdev,
5519 struct hl_cs_parser *parser)
5520 {
5521 u32 cb_parsed_length = 0;
5522 u32 cb_patched_cur_length = 0;
5523 int rc = 0;
5524
5525 /* cb_user_size is more than 0 so loop will always be executed */
5526 while (cb_parsed_length < parser->user_cb_size) {
5527 enum packet_id pkt_id;
5528 u16 pkt_size;
5529 u32 new_pkt_size = 0;
5530 struct gaudi_packet *user_pkt, *kernel_pkt;
5531
5532 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5533 kernel_pkt = parser->patched_cb->kernel_address +
5534 cb_patched_cur_length;
5535
5536 pkt_id = (enum packet_id) (
5537 (le64_to_cpu(user_pkt->header) &
5538 PACKET_HEADER_PACKET_ID_MASK) >>
5539 PACKET_HEADER_PACKET_ID_SHIFT);
5540
5541 if (!validate_packet_id(pkt_id)) {
5542 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5543 rc = -EINVAL;
5544 break;
5545 }
5546
5547 pkt_size = gaudi_packet_sizes[pkt_id];
5548 cb_parsed_length += pkt_size;
5549 if (cb_parsed_length > parser->user_cb_size) {
5550 dev_err(hdev->dev,
5551 "packet 0x%x is out of CB boundary\n", pkt_id);
5552 rc = -EINVAL;
5553 break;
5554 }
5555
5556 switch (pkt_id) {
5557 case PACKET_LIN_DMA:
5558 rc = gaudi_patch_dma_packet(hdev, parser,
5559 (struct packet_lin_dma *) user_pkt,
5560 (struct packet_lin_dma *) kernel_pkt,
5561 &new_pkt_size);
5562 cb_patched_cur_length += new_pkt_size;
5563 break;
5564
5565 case PACKET_MSG_PROT:
5566 dev_err(hdev->dev,
5567 "User not allowed to use MSG_PROT\n");
5568 rc = -EPERM;
5569 break;
5570
5571 case PACKET_CP_DMA:
5572 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5573 rc = -EPERM;
5574 break;
5575
5576 case PACKET_STOP:
5577 dev_err(hdev->dev, "User not allowed to use STOP\n");
5578 rc = -EPERM;
5579 break;
5580
5581 case PACKET_WREG_32:
5582 case PACKET_WREG_BULK:
5583 case PACKET_MSG_LONG:
5584 case PACKET_MSG_SHORT:
5585 case PACKET_REPEAT:
5586 case PACKET_FENCE:
5587 case PACKET_NOP:
5588 case PACKET_ARB_POINT:
5589 case PACKET_LOAD_AND_EXE:
5590 memcpy(kernel_pkt, user_pkt, pkt_size);
5591 cb_patched_cur_length += pkt_size;
5592 break;
5593
5594 default:
5595 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5596 pkt_id);
5597 rc = -EINVAL;
5598 break;
5599 }
5600
5601 if (rc)
5602 break;
5603 }
5604
5605 return rc;
5606 }
5607
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5608 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5609 struct hl_cs_parser *parser)
5610 {
5611 u64 patched_cb_handle;
5612 u32 patched_cb_size;
5613 struct hl_cb *user_cb;
5614 int rc;
5615
5616 /*
5617 * The new CB should have space at the end for two MSG_PROT pkt:
5618 * 1. A packet that will act as a completion packet
5619 * 2. A packet that will generate MSI interrupt
5620 */
5621 if (parser->completion)
5622 parser->patched_cb_size = parser->user_cb_size +
5623 sizeof(struct packet_msg_prot) * 2;
5624 else
5625 parser->patched_cb_size = parser->user_cb_size;
5626
5627 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5628 parser->patched_cb_size, false, false,
5629 &patched_cb_handle);
5630
5631 if (rc) {
5632 dev_err(hdev->dev,
5633 "Failed to allocate patched CB for DMA CS %d\n",
5634 rc);
5635 return rc;
5636 }
5637
5638 patched_cb_handle >>= PAGE_SHIFT;
5639 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5640 (u32) patched_cb_handle);
5641 /* hl_cb_get should never fail */
5642 if (!parser->patched_cb) {
5643 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5644 (u32) patched_cb_handle);
5645 rc = -EFAULT;
5646 goto out;
5647 }
5648
5649 /*
5650 * The check that parser->user_cb_size <= parser->user_cb->size was done
5651 * in validate_queue_index().
5652 */
5653 memcpy(parser->patched_cb->kernel_address,
5654 parser->user_cb->kernel_address,
5655 parser->user_cb_size);
5656
5657 patched_cb_size = parser->patched_cb_size;
5658
5659 /* Validate patched CB instead of user CB */
5660 user_cb = parser->user_cb;
5661 parser->user_cb = parser->patched_cb;
5662 rc = gaudi_validate_cb(hdev, parser, true);
5663 parser->user_cb = user_cb;
5664
5665 if (rc) {
5666 hl_cb_put(parser->patched_cb);
5667 goto out;
5668 }
5669
5670 if (patched_cb_size != parser->patched_cb_size) {
5671 dev_err(hdev->dev, "user CB size mismatch\n");
5672 hl_cb_put(parser->patched_cb);
5673 rc = -EINVAL;
5674 goto out;
5675 }
5676
5677 out:
5678 /*
5679 * Always call cb destroy here because we still have 1 reference
5680 * to it by calling cb_get earlier. After the job will be completed,
5681 * cb_put will release it, but here we want to remove it from the
5682 * idr
5683 */
5684 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5685 patched_cb_handle << PAGE_SHIFT);
5686
5687 return rc;
5688 }
5689
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5690 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5691 struct hl_cs_parser *parser)
5692 {
5693 u64 patched_cb_handle;
5694 int rc;
5695
5696 rc = gaudi_validate_cb(hdev, parser, false);
5697
5698 if (rc)
5699 goto free_userptr;
5700
5701 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5702 parser->patched_cb_size, false, false,
5703 &patched_cb_handle);
5704 if (rc) {
5705 dev_err(hdev->dev,
5706 "Failed to allocate patched CB for DMA CS %d\n", rc);
5707 goto free_userptr;
5708 }
5709
5710 patched_cb_handle >>= PAGE_SHIFT;
5711 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5712 (u32) patched_cb_handle);
5713 /* hl_cb_get should never fail here */
5714 if (!parser->patched_cb) {
5715 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5716 (u32) patched_cb_handle);
5717 rc = -EFAULT;
5718 goto out;
5719 }
5720
5721 rc = gaudi_patch_cb(hdev, parser);
5722
5723 if (rc)
5724 hl_cb_put(parser->patched_cb);
5725
5726 out:
5727 /*
5728 * Always call cb destroy here because we still have 1 reference
5729 * to it by calling cb_get earlier. After the job will be completed,
5730 * cb_put will release it, but here we want to remove it from the
5731 * idr
5732 */
5733 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5734 patched_cb_handle << PAGE_SHIFT);
5735
5736 free_userptr:
5737 if (rc)
5738 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5739 return rc;
5740 }
5741
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5742 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5743 struct hl_cs_parser *parser)
5744 {
5745 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5746 struct gaudi_device *gaudi = hdev->asic_specific;
5747 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5748 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5749
5750 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5751 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5752 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5753 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5754 parser->hw_queue_id);
5755 return -EINVAL;
5756 }
5757
5758 /* For internal queue jobs just check if CB address is valid */
5759 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5760 parser->user_cb_size,
5761 asic_prop->sram_user_base_address,
5762 asic_prop->sram_end_address))
5763 return 0;
5764
5765 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5766 parser->user_cb_size,
5767 asic_prop->dram_user_base_address,
5768 asic_prop->dram_end_address))
5769 return 0;
5770
5771 /* PMMU and HPMMU addresses are equal, check only one of them */
5772 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5773 parser->user_cb_size,
5774 asic_prop->pmmu.start_addr,
5775 asic_prop->pmmu.end_addr))
5776 return 0;
5777
5778 dev_err(hdev->dev,
5779 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5780 parser->user_cb, parser->user_cb_size);
5781
5782 return -EFAULT;
5783 }
5784
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5785 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5786 {
5787 struct gaudi_device *gaudi = hdev->asic_specific;
5788
5789 if (parser->queue_type == QUEUE_TYPE_INT)
5790 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5791
5792 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5793 return gaudi_parse_cb_mmu(hdev, parser);
5794 else
5795 return gaudi_parse_cb_no_mmu(hdev, parser);
5796 }
5797
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5798 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5799 void *kernel_address, u32 len,
5800 u64 cq_addr, u32 cq_val, u32 msi_vec,
5801 bool eb)
5802 {
5803 struct gaudi_device *gaudi = hdev->asic_specific;
5804 struct packet_msg_prot *cq_pkt;
5805 u64 msi_addr;
5806 u32 tmp;
5807
5808 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5809
5810 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5811 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5812
5813 if (eb)
5814 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5815
5816 cq_pkt->ctl = cpu_to_le32(tmp);
5817 cq_pkt->value = cpu_to_le32(cq_val);
5818 cq_pkt->addr = cpu_to_le64(cq_addr);
5819
5820 cq_pkt++;
5821
5822 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5823 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5824 cq_pkt->ctl = cpu_to_le32(tmp);
5825 cq_pkt->value = cpu_to_le32(1);
5826
5827 if (gaudi->multi_msi_mode)
5828 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5829 else
5830 msi_addr = mmPCIE_CORE_MSI_REQ;
5831
5832 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5833 }
5834
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5835 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5836 {
5837 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5838 }
5839
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5840 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5841 u32 size, u64 val)
5842 {
5843 struct packet_lin_dma *lin_dma_pkt;
5844 struct hl_cs_job *job;
5845 u32 cb_size, ctl, err_cause;
5846 struct hl_cb *cb;
5847 u64 id;
5848 int rc;
5849
5850 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5851 if (!cb)
5852 return -EFAULT;
5853
5854 lin_dma_pkt = cb->kernel_address;
5855 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5856 cb_size = sizeof(*lin_dma_pkt);
5857
5858 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5859 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5860 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5861 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5862 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5863
5864 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5865 lin_dma_pkt->src_addr = cpu_to_le64(val);
5866 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5867 lin_dma_pkt->tsize = cpu_to_le32(size);
5868
5869 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5870 if (!job) {
5871 dev_err(hdev->dev, "Failed to allocate a new job\n");
5872 rc = -ENOMEM;
5873 goto release_cb;
5874 }
5875
5876 /* Verify DMA is OK */
5877 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5878 if (err_cause && !hdev->init_done) {
5879 dev_dbg(hdev->dev,
5880 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5881 err_cause);
5882 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5883 }
5884
5885 job->id = 0;
5886 job->user_cb = cb;
5887 atomic_inc(&job->user_cb->cs_cnt);
5888 job->user_cb_size = cb_size;
5889 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5890 job->patched_cb = job->user_cb;
5891 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5892
5893 hl_debugfs_add_job(hdev, job);
5894
5895 rc = gaudi_send_job_on_qman0(hdev, job);
5896 hl_debugfs_remove_job(hdev, job);
5897 kfree(job);
5898 atomic_dec(&cb->cs_cnt);
5899
5900 /* Verify DMA is OK */
5901 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5902 if (err_cause) {
5903 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5904 rc = -EIO;
5905 if (!hdev->init_done) {
5906 dev_dbg(hdev->dev,
5907 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5908 err_cause);
5909 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5910 }
5911 }
5912
5913 release_cb:
5914 id = cb->id;
5915 hl_cb_put(cb);
5916 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5917
5918 return rc;
5919 }
5920
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5921 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5922 u32 num_regs, u32 val)
5923 {
5924 struct packet_msg_long *pkt;
5925 struct hl_cs_job *job;
5926 u32 cb_size, ctl;
5927 struct hl_cb *cb;
5928 int i, rc;
5929
5930 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5931
5932 if (cb_size > SZ_2M) {
5933 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5934 return -ENOMEM;
5935 }
5936
5937 cb = hl_cb_kernel_create(hdev, cb_size, false);
5938 if (!cb)
5939 return -EFAULT;
5940
5941 pkt = cb->kernel_address;
5942
5943 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5944 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5945 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5946 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5947 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5948
5949 for (i = 0; i < num_regs ; i++, pkt++) {
5950 pkt->ctl = cpu_to_le32(ctl);
5951 pkt->value = cpu_to_le32(val);
5952 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5953 }
5954
5955 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5956 if (!job) {
5957 dev_err(hdev->dev, "Failed to allocate a new job\n");
5958 rc = -ENOMEM;
5959 goto release_cb;
5960 }
5961
5962 job->id = 0;
5963 job->user_cb = cb;
5964 atomic_inc(&job->user_cb->cs_cnt);
5965 job->user_cb_size = cb_size;
5966 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5967 job->patched_cb = job->user_cb;
5968 job->job_cb_size = cb_size;
5969
5970 hl_debugfs_add_job(hdev, job);
5971
5972 rc = gaudi_send_job_on_qman0(hdev, job);
5973 hl_debugfs_remove_job(hdev, job);
5974 kfree(job);
5975 atomic_dec(&cb->cs_cnt);
5976
5977 release_cb:
5978 hl_cb_put(cb);
5979 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5980
5981 return rc;
5982 }
5983
gaudi_restore_sm_registers(struct hl_device * hdev)5984 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5985 {
5986 u64 base_addr;
5987 u32 num_regs;
5988 int rc;
5989
5990 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5991 num_regs = NUM_OF_SOB_IN_BLOCK;
5992 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993 if (rc) {
5994 dev_err(hdev->dev, "failed resetting SM registers");
5995 return -ENOMEM;
5996 }
5997
5998 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5999 num_regs = NUM_OF_SOB_IN_BLOCK;
6000 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001 if (rc) {
6002 dev_err(hdev->dev, "failed resetting SM registers");
6003 return -ENOMEM;
6004 }
6005
6006 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6007 num_regs = NUM_OF_SOB_IN_BLOCK;
6008 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6009 if (rc) {
6010 dev_err(hdev->dev, "failed resetting SM registers");
6011 return -ENOMEM;
6012 }
6013
6014 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6015 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6016 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6017 if (rc) {
6018 dev_err(hdev->dev, "failed resetting SM registers");
6019 return -ENOMEM;
6020 }
6021
6022 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6023 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6024 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6025 if (rc) {
6026 dev_err(hdev->dev, "failed resetting SM registers");
6027 return -ENOMEM;
6028 }
6029
6030 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6031 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6032 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6033 if (rc) {
6034 dev_err(hdev->dev, "failed resetting SM registers");
6035 return -ENOMEM;
6036 }
6037
6038 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6039 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6040 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6041 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6042 if (rc) {
6043 dev_err(hdev->dev, "failed resetting SM registers");
6044 return -ENOMEM;
6045 }
6046
6047 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6048 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6049 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6050 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6051 if (rc) {
6052 dev_err(hdev->dev, "failed resetting SM registers");
6053 return -ENOMEM;
6054 }
6055
6056 return 0;
6057 }
6058
gaudi_restore_dma_registers(struct hl_device * hdev)6059 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6060 {
6061 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6062 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6063 int i;
6064
6065 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6066 u64 sob_addr = CFG_BASE +
6067 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6068 (i * sob_delta);
6069 u32 dma_offset = i * DMA_CORE_OFFSET;
6070
6071 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6072 lower_32_bits(sob_addr));
6073 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6074 upper_32_bits(sob_addr));
6075 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6076
6077 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6078 * modified by the user for SRAM reduction
6079 */
6080 if (i > 1)
6081 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6082 0x00000001);
6083 }
6084 }
6085
gaudi_restore_qm_registers(struct hl_device * hdev)6086 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6087 {
6088 u32 qman_offset;
6089 int i;
6090
6091 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6092 qman_offset = i * DMA_QMAN_OFFSET;
6093 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6094 }
6095
6096 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6097 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6098 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6099 }
6100
6101 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6102 qman_offset = i * TPC_QMAN_OFFSET;
6103 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6104 }
6105
6106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6107 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6108 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6109 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6110 }
6111 }
6112
gaudi_restore_user_registers(struct hl_device * hdev)6113 static int gaudi_restore_user_registers(struct hl_device *hdev)
6114 {
6115 int rc;
6116
6117 rc = gaudi_restore_sm_registers(hdev);
6118 if (rc)
6119 return rc;
6120
6121 gaudi_restore_dma_registers(hdev);
6122 gaudi_restore_qm_registers(hdev);
6123
6124 return 0;
6125 }
6126
gaudi_context_switch(struct hl_device * hdev,u32 asid)6127 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6128 {
6129 return 0;
6130 }
6131
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)6132 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6133 {
6134 struct asic_fixed_properties *prop = &hdev->asic_prop;
6135 struct gaudi_device *gaudi = hdev->asic_specific;
6136 u64 addr = prop->mmu_pgt_addr;
6137 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6138
6139 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6140 return 0;
6141
6142 return gaudi_memset_device_memory(hdev, addr, size, 0);
6143 }
6144
gaudi_restore_phase_topology(struct hl_device * hdev)6145 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6146 {
6147
6148 }
6149
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,bool user_address,u32 * val)6150 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6151 bool user_address, u32 *val)
6152 {
6153 struct asic_fixed_properties *prop = &hdev->asic_prop;
6154 struct gaudi_device *gaudi = hdev->asic_specific;
6155 u64 hbm_bar_addr, host_phys_end;
6156 int rc = 0;
6157
6158 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6159
6160 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6161
6162 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6163 (hdev->clock_gating_mask &
6164 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6165
6166 dev_err_ratelimited(hdev->dev,
6167 "Can't read register - clock gating is enabled!\n");
6168 rc = -EFAULT;
6169 } else {
6170 *val = RREG32(addr - CFG_BASE);
6171 }
6172
6173 } else if ((addr >= SRAM_BASE_ADDR) &&
6174 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6175 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6176 (addr - SRAM_BASE_ADDR));
6177 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6178 u64 bar_base_addr = DRAM_PHYS_BASE +
6179 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6180
6181 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6182 if (hbm_bar_addr != U64_MAX) {
6183 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6184 (addr - bar_base_addr));
6185
6186 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6187 hbm_bar_addr);
6188 }
6189 if (hbm_bar_addr == U64_MAX)
6190 rc = -EIO;
6191 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6192 user_address && !iommu_present(&pci_bus_type)) {
6193 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6194 } else {
6195 rc = -EFAULT;
6196 }
6197
6198 return rc;
6199 }
6200
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,bool user_address,u32 val)6201 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6202 bool user_address, u32 val)
6203 {
6204 struct asic_fixed_properties *prop = &hdev->asic_prop;
6205 struct gaudi_device *gaudi = hdev->asic_specific;
6206 u64 hbm_bar_addr, host_phys_end;
6207 int rc = 0;
6208
6209 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6210
6211 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6212
6213 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6214 (hdev->clock_gating_mask &
6215 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6216
6217 dev_err_ratelimited(hdev->dev,
6218 "Can't write register - clock gating is enabled!\n");
6219 rc = -EFAULT;
6220 } else {
6221 WREG32(addr - CFG_BASE, val);
6222 }
6223
6224 } else if ((addr >= SRAM_BASE_ADDR) &&
6225 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6226 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6227 (addr - SRAM_BASE_ADDR));
6228 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6229 u64 bar_base_addr = DRAM_PHYS_BASE +
6230 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6231
6232 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6233 if (hbm_bar_addr != U64_MAX) {
6234 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6235 (addr - bar_base_addr));
6236
6237 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6238 hbm_bar_addr);
6239 }
6240 if (hbm_bar_addr == U64_MAX)
6241 rc = -EIO;
6242 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6243 user_address && !iommu_present(&pci_bus_type)) {
6244 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6245 } else {
6246 rc = -EFAULT;
6247 }
6248
6249 return rc;
6250 }
6251
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,bool user_address,u64 * val)6252 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6253 bool user_address, u64 *val)
6254 {
6255 struct asic_fixed_properties *prop = &hdev->asic_prop;
6256 struct gaudi_device *gaudi = hdev->asic_specific;
6257 u64 hbm_bar_addr, host_phys_end;
6258 int rc = 0;
6259
6260 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6261
6262 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6263
6264 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6265 (hdev->clock_gating_mask &
6266 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6267
6268 dev_err_ratelimited(hdev->dev,
6269 "Can't read register - clock gating is enabled!\n");
6270 rc = -EFAULT;
6271 } else {
6272 u32 val_l = RREG32(addr - CFG_BASE);
6273 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6274
6275 *val = (((u64) val_h) << 32) | val_l;
6276 }
6277
6278 } else if ((addr >= SRAM_BASE_ADDR) &&
6279 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6280 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6281 (addr - SRAM_BASE_ADDR));
6282 } else if (addr <=
6283 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6284 u64 bar_base_addr = DRAM_PHYS_BASE +
6285 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6286
6287 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6288 if (hbm_bar_addr != U64_MAX) {
6289 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6290 (addr - bar_base_addr));
6291
6292 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6293 hbm_bar_addr);
6294 }
6295 if (hbm_bar_addr == U64_MAX)
6296 rc = -EIO;
6297 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6298 user_address && !iommu_present(&pci_bus_type)) {
6299 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6300 } else {
6301 rc = -EFAULT;
6302 }
6303
6304 return rc;
6305 }
6306
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,bool user_address,u64 val)6307 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6308 bool user_address, u64 val)
6309 {
6310 struct asic_fixed_properties *prop = &hdev->asic_prop;
6311 struct gaudi_device *gaudi = hdev->asic_specific;
6312 u64 hbm_bar_addr, host_phys_end;
6313 int rc = 0;
6314
6315 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6316
6317 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6318
6319 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6320 (hdev->clock_gating_mask &
6321 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6322
6323 dev_err_ratelimited(hdev->dev,
6324 "Can't write register - clock gating is enabled!\n");
6325 rc = -EFAULT;
6326 } else {
6327 WREG32(addr - CFG_BASE, lower_32_bits(val));
6328 WREG32(addr + sizeof(u32) - CFG_BASE,
6329 upper_32_bits(val));
6330 }
6331
6332 } else if ((addr >= SRAM_BASE_ADDR) &&
6333 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6334 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6335 (addr - SRAM_BASE_ADDR));
6336 } else if (addr <=
6337 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6338 u64 bar_base_addr = DRAM_PHYS_BASE +
6339 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6340
6341 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6342 if (hbm_bar_addr != U64_MAX) {
6343 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6344 (addr - bar_base_addr));
6345
6346 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6347 hbm_bar_addr);
6348 }
6349 if (hbm_bar_addr == U64_MAX)
6350 rc = -EIO;
6351 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6352 user_address && !iommu_present(&pci_bus_type)) {
6353 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6354 } else {
6355 rc = -EFAULT;
6356 }
6357
6358 return rc;
6359 }
6360
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)6361 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6362 u32 size_to_dma, dma_addr_t dma_addr)
6363 {
6364 u32 err_cause, val;
6365 u64 dma_offset;
6366 int rc;
6367
6368 dma_offset = dma_id * DMA_CORE_OFFSET;
6369
6370 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6371 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6372 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6373 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6374 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6375 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6376 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6377
6378 rc = hl_poll_timeout(
6379 hdev,
6380 mmDMA0_CORE_STS0 + dma_offset,
6381 val,
6382 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6383 0,
6384 1000000);
6385
6386 if (rc) {
6387 dev_err(hdev->dev,
6388 "DMA %d timed-out during reading of 0x%llx\n",
6389 dma_id, addr);
6390 return -EIO;
6391 }
6392
6393 /* Verify DMA is OK */
6394 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6395 if (err_cause) {
6396 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6397 dev_dbg(hdev->dev,
6398 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6399 err_cause);
6400 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6401
6402 return -EIO;
6403 }
6404
6405 return 0;
6406 }
6407
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)6408 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6409 void *blob_addr)
6410 {
6411 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6412 struct gaudi_device *gaudi = hdev->asic_specific;
6413 u64 dma_offset, qm_offset;
6414 dma_addr_t dma_addr;
6415 void *kernel_addr;
6416 bool is_eng_idle;
6417 int rc = 0, dma_id;
6418
6419 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6420 hdev, SZ_2M,
6421 &dma_addr,
6422 GFP_KERNEL | __GFP_ZERO);
6423
6424 if (!kernel_addr)
6425 return -ENOMEM;
6426
6427 mutex_lock(&gaudi->clk_gate_mutex);
6428
6429 hdev->asic_funcs->disable_clock_gating(hdev);
6430
6431 hdev->asic_funcs->hw_queues_lock(hdev);
6432
6433 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6434 dma_offset = dma_id * DMA_CORE_OFFSET;
6435 qm_offset = dma_id * DMA_QMAN_OFFSET;
6436 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6437 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6438
6439 if (!is_eng_idle) {
6440 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6441 dma_offset = dma_id * DMA_CORE_OFFSET;
6442 qm_offset = dma_id * DMA_QMAN_OFFSET;
6443 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6444 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6445
6446 if (!is_eng_idle) {
6447 dev_err_ratelimited(hdev->dev,
6448 "Can't read via DMA because it is BUSY\n");
6449 rc = -EAGAIN;
6450 goto out;
6451 }
6452 }
6453
6454 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6455 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6456 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6457
6458 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6459 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6460 * ASID
6461 */
6462 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6463
6464 /* Verify DMA is OK */
6465 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466 if (err_cause) {
6467 dev_dbg(hdev->dev,
6468 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6469 err_cause);
6470 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6471 }
6472
6473 pos = 0;
6474 size_left = size;
6475 size_to_dma = SZ_2M;
6476
6477 while (size_left > 0) {
6478
6479 if (size_left < SZ_2M)
6480 size_to_dma = size_left;
6481
6482 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6483 dma_addr);
6484 if (rc)
6485 break;
6486
6487 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6488
6489 if (size_left <= SZ_2M)
6490 break;
6491
6492 pos += SZ_2M;
6493 addr += SZ_2M;
6494 size_left -= SZ_2M;
6495 }
6496
6497 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6498 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6499 * ASID
6500 */
6501 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6502 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6503
6504 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6505
6506 out:
6507 hdev->asic_funcs->hw_queues_unlock(hdev);
6508
6509 hdev->asic_funcs->set_clock_gating(hdev);
6510
6511 mutex_unlock(&gaudi->clk_gate_mutex);
6512
6513 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6514 dma_addr);
6515
6516 return rc;
6517 }
6518
gaudi_read_pte(struct hl_device * hdev,u64 addr)6519 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6520 {
6521 struct gaudi_device *gaudi = hdev->asic_specific;
6522
6523 if (hdev->hard_reset_pending)
6524 return U64_MAX;
6525
6526 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6527 (addr - gaudi->hbm_bar_cur_addr));
6528 }
6529
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6530 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6531 {
6532 struct gaudi_device *gaudi = hdev->asic_specific;
6533
6534 if (hdev->hard_reset_pending)
6535 return;
6536
6537 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6538 (addr - gaudi->hbm_bar_cur_addr));
6539 }
6540
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6541 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6542 {
6543 /* mask to zero the MMBP and ASID bits */
6544 WREG32_AND(reg, ~0x7FF);
6545 WREG32_OR(reg, asid);
6546 }
6547
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6548 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6549 {
6550 struct gaudi_device *gaudi = hdev->asic_specific;
6551
6552 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6553 return;
6554
6555 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6556 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6557 return;
6558 }
6559
6560 mutex_lock(&gaudi->clk_gate_mutex);
6561
6562 hdev->asic_funcs->disable_clock_gating(hdev);
6563
6564 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569
6570 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6571 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575
6576 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581
6582 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587
6588 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593
6594 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6595 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6599
6600 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605
6606 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6607 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6611
6612 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6613 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6619 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6620
6621 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6622 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6623 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6625 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6626 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6627 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6628
6629 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6630 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6631 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6633 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6634 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6635 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6636
6637 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6638 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6639 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6641 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6642 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6643 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6644
6645 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6646 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6647 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6649 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6650 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6651 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6652
6653 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6654 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6655 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6657 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6658 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6659 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6660
6661 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6662 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6663 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6665 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6668
6669 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6670 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6671 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6676
6677 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6678 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6679 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6682 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6683 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6684
6685 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6686 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6687 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6688 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6689 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6690 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6691 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6692 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6693 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6694 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6695
6696 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6697 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6698 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6699 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6700 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6701 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6702 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6703 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6704 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6705 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6706 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6707 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6708
6709 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6710 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6711 asid);
6712 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6713 asid);
6714 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6715 asid);
6716 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6717 asid);
6718 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6719 asid);
6720 }
6721
6722 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6723 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6724 asid);
6725 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6726 asid);
6727 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6728 asid);
6729 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6730 asid);
6731 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6732 asid);
6733 }
6734
6735 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6736 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6737 asid);
6738 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6739 asid);
6740 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6741 asid);
6742 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6743 asid);
6744 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6745 asid);
6746 }
6747
6748 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6749 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6750 asid);
6751 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6752 asid);
6753 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6754 asid);
6755 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6756 asid);
6757 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6758 asid);
6759 }
6760
6761 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6762 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6763 asid);
6764 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6765 asid);
6766 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6767 asid);
6768 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6769 asid);
6770 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6771 asid);
6772 }
6773
6774 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6775 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6776 asid);
6777 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6778 asid);
6779 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6780 asid);
6781 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6782 asid);
6783 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6784 asid);
6785 }
6786
6787 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6788 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6789 asid);
6790 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6791 asid);
6792 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6793 asid);
6794 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6795 asid);
6796 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6797 asid);
6798 }
6799
6800 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6801 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6802 asid);
6803 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6804 asid);
6805 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6806 asid);
6807 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6808 asid);
6809 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6810 asid);
6811 }
6812
6813 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6814 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6815 asid);
6816 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6817 asid);
6818 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6819 asid);
6820 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6821 asid);
6822 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6823 asid);
6824 }
6825
6826 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6827 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6828 asid);
6829 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6830 asid);
6831 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6832 asid);
6833 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6834 asid);
6835 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6836 asid);
6837 }
6838
6839 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6840 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6841
6842 hdev->asic_funcs->set_clock_gating(hdev);
6843
6844 mutex_unlock(&gaudi->clk_gate_mutex);
6845 }
6846
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6847 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6848 struct hl_cs_job *job)
6849 {
6850 struct packet_msg_prot *fence_pkt;
6851 u32 *fence_ptr;
6852 dma_addr_t fence_dma_addr;
6853 struct hl_cb *cb;
6854 u32 tmp, timeout, dma_offset;
6855 int rc;
6856
6857 if (hdev->pldm)
6858 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6859 else
6860 timeout = HL_DEVICE_TIMEOUT_USEC;
6861
6862 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6863 dev_err_ratelimited(hdev->dev,
6864 "Can't send driver job on QMAN0 because the device is not idle\n");
6865 return -EBUSY;
6866 }
6867
6868 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6869 &fence_dma_addr);
6870 if (!fence_ptr) {
6871 dev_err(hdev->dev,
6872 "Failed to allocate fence memory for QMAN0\n");
6873 return -ENOMEM;
6874 }
6875
6876 cb = job->patched_cb;
6877
6878 fence_pkt = cb->kernel_address +
6879 job->job_cb_size - sizeof(struct packet_msg_prot);
6880
6881 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6882 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6883 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6884
6885 fence_pkt->ctl = cpu_to_le32(tmp);
6886 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6887 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6888
6889 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6890
6891 WREG32(mmDMA0_CORE_PROT + dma_offset,
6892 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6893
6894 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6895 job->job_cb_size, cb->bus_address);
6896 if (rc) {
6897 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6898 goto free_fence_ptr;
6899 }
6900
6901 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6902 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6903 timeout, true);
6904
6905 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6906
6907 if (rc == -ETIMEDOUT) {
6908 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6909 goto free_fence_ptr;
6910 }
6911
6912 free_fence_ptr:
6913 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6914
6915 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6916 fence_dma_addr);
6917 return rc;
6918 }
6919
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6920 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6921 {
6922 if (event_type >= GAUDI_EVENT_SIZE)
6923 goto event_not_supported;
6924
6925 if (!gaudi_irq_map_table[event_type].valid)
6926 goto event_not_supported;
6927
6928 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6929
6930 return;
6931
6932 event_not_supported:
6933 snprintf(desc, size, "N/A");
6934 }
6935
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)6936 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6937 u32 x_y, bool is_write)
6938 {
6939 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6940
6941 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6942 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6943
6944 switch (x_y) {
6945 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6946 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6947 dma_id[0] = 0;
6948 dma_id[1] = 2;
6949 break;
6950 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6951 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6952 dma_id[0] = 1;
6953 dma_id[1] = 3;
6954 break;
6955 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6956 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6957 dma_id[0] = 4;
6958 dma_id[1] = 6;
6959 break;
6960 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6961 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6962 dma_id[0] = 5;
6963 dma_id[1] = 7;
6964 break;
6965 default:
6966 goto unknown_initiator;
6967 }
6968
6969 for (i = 0 ; i < 2 ; i++) {
6970 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6971 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6972 }
6973
6974 switch (x_y) {
6975 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6976 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6977 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6978 return "DMA0";
6979 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6980 return "DMA2";
6981 else
6982 return "DMA0 or DMA2";
6983 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6984 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6985 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6986 return "DMA1";
6987 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6988 return "DMA3";
6989 else
6990 return "DMA1 or DMA3";
6991 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6992 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6993 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6994 return "DMA4";
6995 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6996 return "DMA6";
6997 else
6998 return "DMA4 or DMA6";
6999 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7000 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7001 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7002 return "DMA5";
7003 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7004 return "DMA7";
7005 else
7006 return "DMA5 or DMA7";
7007 }
7008
7009 unknown_initiator:
7010 return "unknown initiator";
7011 }
7012
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)7013 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7014 bool is_write)
7015 {
7016 u32 val, x_y, axi_id;
7017
7018 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7019 RREG32(mmMMU_UP_RAZWI_READ_ID);
7020 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7021 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7022 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7023 RAZWI_INITIATOR_AXI_ID_SHIFT);
7024
7025 switch (x_y) {
7026 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7027 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7028 return "TPC0";
7029 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7030 return "NIC0";
7031 break;
7032 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7033 return "TPC1";
7034 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7035 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7036 return "MME0";
7037 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7038 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7039 return "MME1";
7040 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7041 return "TPC2";
7042 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7043 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7044 return "TPC3";
7045 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7046 return "PCI";
7047 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7048 return "CPU";
7049 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7050 return "PSOC";
7051 break;
7052 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7053 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7056 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7057 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7058 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7059 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7060 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7061 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7062 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7063 return "TPC4";
7064 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7065 return "NIC1";
7066 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7067 return "NIC2";
7068 break;
7069 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7070 return "TPC5";
7071 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7072 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7073 return "MME2";
7074 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7075 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7076 return "MME3";
7077 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7078 return "TPC6";
7079 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7080 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7081 return "TPC7";
7082 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7083 return "NIC4";
7084 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7085 return "NIC5";
7086 break;
7087 default:
7088 break;
7089 }
7090
7091 dev_err(hdev->dev,
7092 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7093 val,
7094 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7095 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7096 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7097 RAZWI_INITIATOR_AXI_ID_MASK);
7098
7099 return "unknown initiator";
7100 }
7101
gaudi_print_razwi_info(struct hl_device * hdev)7102 static void gaudi_print_razwi_info(struct hl_device *hdev)
7103 {
7104 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7105 dev_err_ratelimited(hdev->dev,
7106 "RAZWI event caused by illegal write of %s\n",
7107 gaudi_get_razwi_initiator_name(hdev, true));
7108 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7109 }
7110
7111 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7112 dev_err_ratelimited(hdev->dev,
7113 "RAZWI event caused by illegal read of %s\n",
7114 gaudi_get_razwi_initiator_name(hdev, false));
7115 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7116 }
7117 }
7118
gaudi_print_mmu_error_info(struct hl_device * hdev)7119 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7120 {
7121 struct gaudi_device *gaudi = hdev->asic_specific;
7122 u64 addr;
7123 u32 val;
7124
7125 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7126 return;
7127
7128 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7129 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7130 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7131 addr <<= 32;
7132 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7133
7134 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7135 addr);
7136
7137 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7138 }
7139
7140 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7141 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7142 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7143 addr <<= 32;
7144 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7145
7146 dev_err_ratelimited(hdev->dev,
7147 "MMU access error on va 0x%llx\n", addr);
7148
7149 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7150 }
7151 }
7152
7153 /*
7154 * +-------------------+------------------------------------------------------+
7155 * | Configuration Reg | Description |
7156 * | Address | |
7157 * +-------------------+------------------------------------------------------+
7158 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7159 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7160 * | |0xF34 memory wrappers 63:32 |
7161 * | |0xF38 memory wrappers 95:64 |
7162 * | |0xF3C memory wrappers 127:96 |
7163 * +-------------------+------------------------------------------------------+
7164 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7165 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7166 * | |0xF44 memory wrappers 63:32 |
7167 * | |0xF48 memory wrappers 95:64 |
7168 * | |0xF4C memory wrappers 127:96 |
7169 * +-------------------+------------------------------------------------------+
7170 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)7171 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7172 struct ecc_info_extract_params *params, u64 *ecc_address,
7173 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7174 {
7175 struct gaudi_device *gaudi = hdev->asic_specific;
7176 u32 i, num_mem_regs, reg, err_bit;
7177 u64 err_addr, err_word = 0;
7178 int rc = 0;
7179
7180 num_mem_regs = params->num_memories / 32 +
7181 ((params->num_memories % 32) ? 1 : 0);
7182
7183 if (params->block_address >= CFG_BASE)
7184 params->block_address -= CFG_BASE;
7185
7186 if (params->derr)
7187 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7188 else
7189 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7190
7191 if (params->disable_clock_gating) {
7192 mutex_lock(&gaudi->clk_gate_mutex);
7193 hdev->asic_funcs->disable_clock_gating(hdev);
7194 }
7195
7196 /* Set invalid wrapper index */
7197 *memory_wrapper_idx = 0xFF;
7198
7199 /* Iterate through memory wrappers, a single bit must be set */
7200 for (i = 0 ; i < num_mem_regs ; i++) {
7201 err_addr += i * 4;
7202 err_word = RREG32(err_addr);
7203 if (err_word) {
7204 err_bit = __ffs(err_word);
7205 *memory_wrapper_idx = err_bit + (32 * i);
7206 break;
7207 }
7208 }
7209
7210 if (*memory_wrapper_idx == 0xFF) {
7211 dev_err(hdev->dev, "ECC error information cannot be found\n");
7212 rc = -EINVAL;
7213 goto enable_clk_gate;
7214 }
7215
7216 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7217 *memory_wrapper_idx);
7218
7219 *ecc_address =
7220 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7221 *ecc_syndrom =
7222 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7223
7224 /* Clear error indication */
7225 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7226 if (params->derr)
7227 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7228 else
7229 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7230
7231 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7232
7233 enable_clk_gate:
7234 if (params->disable_clock_gating) {
7235 hdev->asic_funcs->set_clock_gating(hdev);
7236
7237 mutex_unlock(&gaudi->clk_gate_mutex);
7238 }
7239
7240 return rc;
7241 }
7242
7243 /*
7244 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7245 *
7246 * @idx: the current pi/ci value
7247 * @q_len: the queue length (power of 2)
7248 *
7249 * @return the cyclically decremented index
7250 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)7251 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7252 {
7253 u32 mask = q_len - 1;
7254
7255 /*
7256 * modular decrement is equivalent to adding (queue_size -1)
7257 * later we take LSBs to make sure the value is in the
7258 * range [0, queue_len - 1]
7259 */
7260 return (idx + q_len - 1) & mask;
7261 }
7262
7263 /**
7264 * gaudi_print_sw_config_stream_data - print SW config stream data
7265 *
7266 * @hdev: pointer to the habanalabs device structure
7267 * @stream: the QMAN's stream
7268 * @qman_base: base address of QMAN registers block
7269 */
gaudi_print_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base)7270 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7271 u64 qman_base)
7272 {
7273 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7274 u32 cq_ptr_lo_off, size;
7275
7276 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7277
7278 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7279 stream * cq_ptr_lo_off;
7280 cq_ptr_hi = cq_ptr_lo +
7281 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7282 cq_tsize = cq_ptr_lo +
7283 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284
7285 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7286 size = RREG32(cq_tsize);
7287 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7288 stream, cq_ptr, size);
7289 }
7290
7291 /**
7292 * gaudi_print_last_pqes_on_err - print last PQEs on error
7293 *
7294 * @hdev: pointer to the habanalabs device structure
7295 * @qid_base: first QID of the QMAN (out of 4 streams)
7296 * @stream: the QMAN's stream
7297 * @qman_base: base address of QMAN registers block
7298 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7299 */
gaudi_print_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,bool pr_sw_conf)7300 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7301 u32 stream, u64 qman_base,
7302 bool pr_sw_conf)
7303 {
7304 u32 ci, qm_ci_stream_off, queue_len;
7305 struct hl_hw_queue *q;
7306 u64 pq_ci;
7307 int i;
7308
7309 q = &hdev->kernel_queues[qid_base + stream];
7310
7311 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7312 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7313 stream * qm_ci_stream_off;
7314
7315 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7316 q->int_queue_len : HL_QUEUE_LENGTH;
7317
7318 hdev->asic_funcs->hw_queues_lock(hdev);
7319
7320 if (pr_sw_conf)
7321 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7322
7323 ci = RREG32(pq_ci);
7324
7325 /* we should start printing form ci -1 */
7326 ci = gaudi_queue_idx_dec(ci, queue_len);
7327
7328 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7329 struct hl_bd *bd;
7330 u64 addr;
7331 u32 len;
7332
7333 bd = q->kernel_address;
7334 bd += ci;
7335
7336 len = le32_to_cpu(bd->len);
7337 /* len 0 means uninitialized entry- break */
7338 if (!len)
7339 break;
7340
7341 addr = le64_to_cpu(bd->ptr);
7342
7343 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7344 stream, ci, addr, len);
7345
7346 /* get previous ci, wrap if needed */
7347 ci = gaudi_queue_idx_dec(ci, queue_len);
7348 }
7349
7350 hdev->asic_funcs->hw_queues_unlock(hdev);
7351 }
7352
7353 /**
7354 * print_qman_data_on_err - extract QMAN data on error
7355 *
7356 * @hdev: pointer to the habanalabs device structure
7357 * @qid_base: first QID of the QMAN (out of 4 streams)
7358 * @stream: the QMAN's stream
7359 * @qman_base: base address of QMAN registers block
7360 *
7361 * This function attempt to exatract as much data as possible on QMAN error.
7362 * On upper CP print the SW config stream data and last 8 PQEs.
7363 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7364 */
print_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base)7365 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7366 u32 stream, u64 qman_base)
7367 {
7368 u32 i;
7369
7370 if (stream != QMAN_STREAMS) {
7371 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7372 true);
7373 return;
7374 }
7375
7376 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7377
7378 for (i = 0; i < QMAN_STREAMS; i++)
7379 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7380 false);
7381 }
7382
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base)7383 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7384 const char *qm_name,
7385 u64 qman_base,
7386 u32 qid_base)
7387 {
7388 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7389 u64 glbl_sts_addr, arb_err_addr;
7390 char reg_desc[32];
7391
7392 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7393 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7394
7395 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7396 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7397 glbl_sts_clr_val = 0;
7398 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7399
7400 if (!glbl_sts_val)
7401 continue;
7402
7403 if (i == QMAN_STREAMS)
7404 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7405 else
7406 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7407
7408 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7409 if (glbl_sts_val & BIT(j)) {
7410 dev_err_ratelimited(hdev->dev,
7411 "%s %s. err cause: %s\n",
7412 qm_name, reg_desc,
7413 gaudi_qman_error_cause[j]);
7414 glbl_sts_clr_val |= BIT(j);
7415 }
7416 }
7417
7418 /* Write 1 clear errors */
7419 if (!hdev->stop_on_err)
7420 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7421 else
7422 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7423 }
7424
7425 arb_err_val = RREG32(arb_err_addr);
7426
7427 if (!arb_err_val)
7428 return;
7429
7430 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7431 if (arb_err_val & BIT(j)) {
7432 dev_err_ratelimited(hdev->dev,
7433 "%s ARB_ERR. err cause: %s\n",
7434 qm_name,
7435 gaudi_qman_arb_error_cause[j]);
7436 }
7437 }
7438 }
7439
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7440 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7441 struct hl_eq_sm_sei_data *sei_data)
7442 {
7443 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7444
7445 /* Flip the bits as the enum is ordered in the opposite way */
7446 index = (index ^ 0x3) & 0x3;
7447
7448 switch (sei_data->sei_cause) {
7449 case SM_SEI_SO_OVERFLOW:
7450 dev_err_ratelimited(hdev->dev,
7451 "%s SEI Error: SOB Group %u overflow/underflow",
7452 gaudi_sync_manager_names[index],
7453 le32_to_cpu(sei_data->sei_log));
7454 break;
7455 case SM_SEI_LBW_4B_UNALIGNED:
7456 dev_err_ratelimited(hdev->dev,
7457 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7458 gaudi_sync_manager_names[index],
7459 le32_to_cpu(sei_data->sei_log));
7460 break;
7461 case SM_SEI_AXI_RESPONSE_ERR:
7462 dev_err_ratelimited(hdev->dev,
7463 "%s SEI Error: AXI ID %u response error",
7464 gaudi_sync_manager_names[index],
7465 le32_to_cpu(sei_data->sei_log));
7466 break;
7467 default:
7468 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7469 le32_to_cpu(sei_data->sei_log));
7470 break;
7471 }
7472 }
7473
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7474 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7475 struct hl_eq_ecc_data *ecc_data)
7476 {
7477 struct ecc_info_extract_params params;
7478 u64 ecc_address = 0, ecc_syndrom = 0;
7479 u8 index, memory_wrapper_idx = 0;
7480 bool extract_info_from_fw;
7481 int rc;
7482
7483 if (hdev->asic_prop.fw_security_enabled) {
7484 extract_info_from_fw = true;
7485 goto extract_ecc_info;
7486 }
7487
7488 switch (event_type) {
7489 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7490 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7491 extract_info_from_fw = true;
7492 break;
7493 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7494 index = event_type - GAUDI_EVENT_TPC0_SERR;
7495 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7496 params.num_memories = 90;
7497 params.derr = false;
7498 params.disable_clock_gating = true;
7499 extract_info_from_fw = false;
7500 break;
7501 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7502 index = event_type - GAUDI_EVENT_TPC0_DERR;
7503 params.block_address =
7504 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7505 params.num_memories = 90;
7506 params.derr = true;
7507 params.disable_clock_gating = true;
7508 extract_info_from_fw = false;
7509 break;
7510 case GAUDI_EVENT_MME0_ACC_SERR:
7511 case GAUDI_EVENT_MME1_ACC_SERR:
7512 case GAUDI_EVENT_MME2_ACC_SERR:
7513 case GAUDI_EVENT_MME3_ACC_SERR:
7514 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7515 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7516 params.num_memories = 128;
7517 params.derr = false;
7518 params.disable_clock_gating = true;
7519 extract_info_from_fw = false;
7520 break;
7521 case GAUDI_EVENT_MME0_ACC_DERR:
7522 case GAUDI_EVENT_MME1_ACC_DERR:
7523 case GAUDI_EVENT_MME2_ACC_DERR:
7524 case GAUDI_EVENT_MME3_ACC_DERR:
7525 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7526 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7527 params.num_memories = 128;
7528 params.derr = true;
7529 params.disable_clock_gating = true;
7530 extract_info_from_fw = false;
7531 break;
7532 case GAUDI_EVENT_MME0_SBAB_SERR:
7533 case GAUDI_EVENT_MME1_SBAB_SERR:
7534 case GAUDI_EVENT_MME2_SBAB_SERR:
7535 case GAUDI_EVENT_MME3_SBAB_SERR:
7536 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7537 params.block_address =
7538 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7539 params.num_memories = 33;
7540 params.derr = false;
7541 params.disable_clock_gating = true;
7542 extract_info_from_fw = false;
7543 break;
7544 case GAUDI_EVENT_MME0_SBAB_DERR:
7545 case GAUDI_EVENT_MME1_SBAB_DERR:
7546 case GAUDI_EVENT_MME2_SBAB_DERR:
7547 case GAUDI_EVENT_MME3_SBAB_DERR:
7548 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7549 params.block_address =
7550 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7551 params.num_memories = 33;
7552 params.derr = true;
7553 params.disable_clock_gating = true;
7554 extract_info_from_fw = false;
7555 break;
7556 default:
7557 return;
7558 }
7559
7560 extract_ecc_info:
7561 if (extract_info_from_fw) {
7562 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7563 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7564 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7565 } else {
7566 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7567 &ecc_syndrom, &memory_wrapper_idx);
7568 if (rc)
7569 return;
7570 }
7571
7572 dev_err(hdev->dev,
7573 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7574 ecc_address, ecc_syndrom, memory_wrapper_idx);
7575 }
7576
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)7577 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7578 {
7579 u64 qman_base;
7580 char desc[32];
7581 u32 qid_base;
7582 u8 index;
7583
7584 switch (event_type) {
7585 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7586 index = event_type - GAUDI_EVENT_TPC0_QM;
7587 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7588 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7589 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7590 break;
7591 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7592 index = event_type - GAUDI_EVENT_MME0_QM;
7593 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7594 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7595 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7596 break;
7597 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7598 index = event_type - GAUDI_EVENT_DMA0_QM;
7599 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7600 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7601 if (index > 1)
7602 qid_base++;
7603 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7604 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7605 break;
7606 case GAUDI_EVENT_NIC0_QM0:
7607 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7608 qman_base = mmNIC0_QM0_BASE;
7609 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7610 break;
7611 case GAUDI_EVENT_NIC0_QM1:
7612 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7613 qman_base = mmNIC0_QM1_BASE;
7614 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7615 break;
7616 case GAUDI_EVENT_NIC1_QM0:
7617 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7618 qman_base = mmNIC1_QM0_BASE;
7619 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7620 break;
7621 case GAUDI_EVENT_NIC1_QM1:
7622 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7623 qman_base = mmNIC1_QM1_BASE;
7624 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7625 break;
7626 case GAUDI_EVENT_NIC2_QM0:
7627 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7628 qman_base = mmNIC2_QM0_BASE;
7629 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7630 break;
7631 case GAUDI_EVENT_NIC2_QM1:
7632 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7633 qman_base = mmNIC2_QM1_BASE;
7634 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7635 break;
7636 case GAUDI_EVENT_NIC3_QM0:
7637 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7638 qman_base = mmNIC3_QM0_BASE;
7639 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7640 break;
7641 case GAUDI_EVENT_NIC3_QM1:
7642 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7643 qman_base = mmNIC3_QM1_BASE;
7644 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7645 break;
7646 case GAUDI_EVENT_NIC4_QM0:
7647 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7648 qman_base = mmNIC4_QM0_BASE;
7649 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7650 break;
7651 case GAUDI_EVENT_NIC4_QM1:
7652 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7653 qman_base = mmNIC4_QM1_BASE;
7654 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7655 break;
7656 default:
7657 return;
7658 }
7659
7660 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7661 }
7662
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7663 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7664 bool razwi)
7665 {
7666 char desc[64] = "";
7667
7668 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7669 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7670 event_type, desc);
7671
7672 if (razwi) {
7673 gaudi_print_razwi_info(hdev);
7674 gaudi_print_mmu_error_info(hdev);
7675 }
7676 }
7677
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7678 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7679 struct cpucp_pkt_sync_err *sync_err)
7680 {
7681 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7682
7683 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7684 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7685 }
7686
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7687 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7688 struct hl_eq_fw_alive *fw_alive)
7689 {
7690 dev_err(hdev->dev,
7691 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7692 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7693 "Minor" : "Critical", fw_alive->process_id,
7694 fw_alive->thread_id, fw_alive->uptime_seconds);
7695 }
7696
gaudi_soft_reset_late_init(struct hl_device * hdev)7697 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7698 {
7699 struct gaudi_device *gaudi = hdev->asic_specific;
7700
7701 /* Unmask all IRQs since some could have been received
7702 * during the soft reset
7703 */
7704 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7705 }
7706
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7707 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7708 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7709 {
7710 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7711 int rc = 0;
7712
7713 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7714 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7715 if (!hbm_ecc_data) {
7716 dev_err(hdev->dev, "No FW ECC data");
7717 return 0;
7718 }
7719
7720 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7721 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7722 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7723 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7725 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7727 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7729 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7731 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7733 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734
7735 dev_err(hdev->dev,
7736 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7737 device, ch, wr_par, rd_par, ca_par, serr, derr);
7738 dev_err(hdev->dev,
7739 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7740 device, ch, hbm_ecc_data->first_addr, type,
7741 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7742 hbm_ecc_data->dec_cnt);
7743 return 0;
7744 }
7745
7746 if (hdev->asic_prop.fw_security_enabled) {
7747 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7748 return 0;
7749 }
7750
7751 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7752 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7753 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7754 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7755 if (val) {
7756 rc = -EIO;
7757 dev_err(hdev->dev,
7758 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7759 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7760 (val >> 2) & 0x1, (val >> 3) & 0x1,
7761 (val >> 4) & 0x1);
7762
7763 val2 = RREG32(base + ch * 0x1000 + 0x060);
7764 dev_err(hdev->dev,
7765 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7766 device, ch * 2,
7767 RREG32(base + ch * 0x1000 + 0x064),
7768 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7769 (val2 & 0xFF0000) >> 16,
7770 (val2 & 0xFF000000) >> 24);
7771 }
7772
7773 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7774 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7775 if (val) {
7776 rc = -EIO;
7777 dev_err(hdev->dev,
7778 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7779 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7780 (val >> 2) & 0x1, (val >> 3) & 0x1,
7781 (val >> 4) & 0x1);
7782
7783 val2 = RREG32(base + ch * 0x1000 + 0x070);
7784 dev_err(hdev->dev,
7785 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7786 device, ch * 2 + 1,
7787 RREG32(base + ch * 0x1000 + 0x074),
7788 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7789 (val2 & 0xFF0000) >> 16,
7790 (val2 & 0xFF000000) >> 24);
7791 }
7792
7793 /* Clear interrupts */
7794 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7795 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7796 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7797 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7798 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7799 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7800 }
7801
7802 val = RREG32(base + 0x8F30);
7803 val2 = RREG32(base + 0x8F34);
7804 if (val | val2) {
7805 rc = -EIO;
7806 dev_err(hdev->dev,
7807 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7808 device, val, val2);
7809 }
7810 val = RREG32(base + 0x8F40);
7811 val2 = RREG32(base + 0x8F44);
7812 if (val | val2) {
7813 rc = -EIO;
7814 dev_err(hdev->dev,
7815 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7816 device, val, val2);
7817 }
7818
7819 return rc;
7820 }
7821
gaudi_hbm_event_to_dev(u16 hbm_event_type)7822 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7823 {
7824 switch (hbm_event_type) {
7825 case GAUDI_EVENT_HBM0_SPI_0:
7826 case GAUDI_EVENT_HBM0_SPI_1:
7827 return 0;
7828 case GAUDI_EVENT_HBM1_SPI_0:
7829 case GAUDI_EVENT_HBM1_SPI_1:
7830 return 1;
7831 case GAUDI_EVENT_HBM2_SPI_0:
7832 case GAUDI_EVENT_HBM2_SPI_1:
7833 return 2;
7834 case GAUDI_EVENT_HBM3_SPI_0:
7835 case GAUDI_EVENT_HBM3_SPI_1:
7836 return 3;
7837 default:
7838 break;
7839 }
7840
7841 /* Should never happen */
7842 return 0;
7843 }
7844
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7845 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7846 char *interrupt_name)
7847 {
7848 struct gaudi_device *gaudi = hdev->asic_specific;
7849 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7850 bool soft_reset_required = false;
7851
7852 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7853 * gating, and thus cannot be done in CPU-CP and should be done instead
7854 * by the driver.
7855 */
7856
7857 mutex_lock(&gaudi->clk_gate_mutex);
7858
7859 hdev->asic_funcs->disable_clock_gating(hdev);
7860
7861 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7862 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7863
7864 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7865 if (tpc_interrupts_cause & BIT(i)) {
7866 dev_err_ratelimited(hdev->dev,
7867 "TPC%d_%s interrupt cause: %s\n",
7868 tpc_id, interrupt_name,
7869 gaudi_tpc_interrupts_cause[i]);
7870 /* If this is QM error, we need to soft-reset */
7871 if (i == 15)
7872 soft_reset_required = true;
7873 }
7874
7875 /* Clear interrupts */
7876 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7877
7878 hdev->asic_funcs->set_clock_gating(hdev);
7879
7880 mutex_unlock(&gaudi->clk_gate_mutex);
7881
7882 return soft_reset_required;
7883 }
7884
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7885 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7886 {
7887 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7888 }
7889
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7890 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7891 {
7892 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7893 }
7894
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7895 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7896 u16 event_type)
7897 {
7898 switch (event_type) {
7899 case GAUDI_EVENT_FIX_POWER_ENV_S:
7900 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7901 dev_info_ratelimited(hdev->dev,
7902 "Clock throttling due to power consumption\n");
7903 break;
7904
7905 case GAUDI_EVENT_FIX_POWER_ENV_E:
7906 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7907 dev_info_ratelimited(hdev->dev,
7908 "Power envelop is safe, back to optimal clock\n");
7909 break;
7910
7911 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7912 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7913 dev_info_ratelimited(hdev->dev,
7914 "Clock throttling due to overheating\n");
7915 break;
7916
7917 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7918 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7919 dev_info_ratelimited(hdev->dev,
7920 "Thermal envelop is safe, back to optimal clock\n");
7921 break;
7922
7923 default:
7924 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7925 event_type);
7926 break;
7927 }
7928 }
7929
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7930 static void gaudi_handle_eqe(struct hl_device *hdev,
7931 struct hl_eq_entry *eq_entry)
7932 {
7933 struct gaudi_device *gaudi = hdev->asic_specific;
7934 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7935 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7936 >> EQ_CTL_EVENT_TYPE_SHIFT);
7937 bool reset_required;
7938 u8 cause;
7939 int rc;
7940
7941 if (event_type >= GAUDI_EVENT_SIZE) {
7942 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7943 event_type, GAUDI_EVENT_SIZE - 1);
7944 return;
7945 }
7946
7947 gaudi->events_stat[event_type]++;
7948 gaudi->events_stat_aggregate[event_type]++;
7949
7950 switch (event_type) {
7951 case GAUDI_EVENT_PCIE_CORE_DERR:
7952 case GAUDI_EVENT_PCIE_IF_DERR:
7953 case GAUDI_EVENT_PCIE_PHY_DERR:
7954 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7955 case GAUDI_EVENT_MME0_ACC_DERR:
7956 case GAUDI_EVENT_MME0_SBAB_DERR:
7957 case GAUDI_EVENT_MME1_ACC_DERR:
7958 case GAUDI_EVENT_MME1_SBAB_DERR:
7959 case GAUDI_EVENT_MME2_ACC_DERR:
7960 case GAUDI_EVENT_MME2_SBAB_DERR:
7961 case GAUDI_EVENT_MME3_ACC_DERR:
7962 case GAUDI_EVENT_MME3_SBAB_DERR:
7963 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7964 fallthrough;
7965 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7966 case GAUDI_EVENT_PSOC_MEM_DERR:
7967 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7968 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7969 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7970 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7971 case GAUDI_EVENT_MMU_DERR:
7972 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7973 gaudi_print_irq_info(hdev, event_type, true);
7974 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7975 goto reset_device;
7976
7977 case GAUDI_EVENT_GIC500:
7978 case GAUDI_EVENT_AXI_ECC:
7979 case GAUDI_EVENT_L2_RAM_ECC:
7980 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7981 gaudi_print_irq_info(hdev, event_type, false);
7982 goto reset_device;
7983
7984 case GAUDI_EVENT_HBM0_SPI_0:
7985 case GAUDI_EVENT_HBM1_SPI_0:
7986 case GAUDI_EVENT_HBM2_SPI_0:
7987 case GAUDI_EVENT_HBM3_SPI_0:
7988 gaudi_print_irq_info(hdev, event_type, false);
7989 gaudi_hbm_read_interrupts(hdev,
7990 gaudi_hbm_event_to_dev(event_type),
7991 &eq_entry->hbm_ecc_data);
7992 goto reset_device;
7993
7994 case GAUDI_EVENT_HBM0_SPI_1:
7995 case GAUDI_EVENT_HBM1_SPI_1:
7996 case GAUDI_EVENT_HBM2_SPI_1:
7997 case GAUDI_EVENT_HBM3_SPI_1:
7998 gaudi_print_irq_info(hdev, event_type, false);
7999 gaudi_hbm_read_interrupts(hdev,
8000 gaudi_hbm_event_to_dev(event_type),
8001 &eq_entry->hbm_ecc_data);
8002 hl_fw_unmask_irq(hdev, event_type);
8003 break;
8004
8005 case GAUDI_EVENT_TPC0_DEC:
8006 case GAUDI_EVENT_TPC1_DEC:
8007 case GAUDI_EVENT_TPC2_DEC:
8008 case GAUDI_EVENT_TPC3_DEC:
8009 case GAUDI_EVENT_TPC4_DEC:
8010 case GAUDI_EVENT_TPC5_DEC:
8011 case GAUDI_EVENT_TPC6_DEC:
8012 case GAUDI_EVENT_TPC7_DEC:
8013 gaudi_print_irq_info(hdev, event_type, true);
8014 reset_required = gaudi_tpc_read_interrupts(hdev,
8015 tpc_dec_event_to_tpc_id(event_type),
8016 "AXI_SLV_DEC_Error");
8017 if (reset_required) {
8018 dev_err(hdev->dev, "reset required due to %s\n",
8019 gaudi_irq_map_table[event_type].name);
8020
8021 hl_device_reset(hdev, 0);
8022 } else {
8023 hl_fw_unmask_irq(hdev, event_type);
8024 }
8025 break;
8026
8027 case GAUDI_EVENT_TPC0_KRN_ERR:
8028 case GAUDI_EVENT_TPC1_KRN_ERR:
8029 case GAUDI_EVENT_TPC2_KRN_ERR:
8030 case GAUDI_EVENT_TPC3_KRN_ERR:
8031 case GAUDI_EVENT_TPC4_KRN_ERR:
8032 case GAUDI_EVENT_TPC5_KRN_ERR:
8033 case GAUDI_EVENT_TPC6_KRN_ERR:
8034 case GAUDI_EVENT_TPC7_KRN_ERR:
8035 gaudi_print_irq_info(hdev, event_type, true);
8036 reset_required = gaudi_tpc_read_interrupts(hdev,
8037 tpc_krn_event_to_tpc_id(event_type),
8038 "KRN_ERR");
8039 if (reset_required) {
8040 dev_err(hdev->dev, "reset required due to %s\n",
8041 gaudi_irq_map_table[event_type].name);
8042
8043 hl_device_reset(hdev, 0);
8044 } else {
8045 hl_fw_unmask_irq(hdev, event_type);
8046 }
8047 break;
8048
8049 case GAUDI_EVENT_PCIE_CORE_SERR:
8050 case GAUDI_EVENT_PCIE_IF_SERR:
8051 case GAUDI_EVENT_PCIE_PHY_SERR:
8052 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8053 case GAUDI_EVENT_MME0_ACC_SERR:
8054 case GAUDI_EVENT_MME0_SBAB_SERR:
8055 case GAUDI_EVENT_MME1_ACC_SERR:
8056 case GAUDI_EVENT_MME1_SBAB_SERR:
8057 case GAUDI_EVENT_MME2_ACC_SERR:
8058 case GAUDI_EVENT_MME2_SBAB_SERR:
8059 case GAUDI_EVENT_MME3_ACC_SERR:
8060 case GAUDI_EVENT_MME3_SBAB_SERR:
8061 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8062 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8063 case GAUDI_EVENT_PSOC_MEM_SERR:
8064 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8065 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8066 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8067 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8068 fallthrough;
8069 case GAUDI_EVENT_MMU_SERR:
8070 gaudi_print_irq_info(hdev, event_type, true);
8071 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8072 hl_fw_unmask_irq(hdev, event_type);
8073 break;
8074
8075 case GAUDI_EVENT_PCIE_DEC:
8076 case GAUDI_EVENT_MME0_WBC_RSP:
8077 case GAUDI_EVENT_MME0_SBAB0_RSP:
8078 case GAUDI_EVENT_MME1_WBC_RSP:
8079 case GAUDI_EVENT_MME1_SBAB0_RSP:
8080 case GAUDI_EVENT_MME2_WBC_RSP:
8081 case GAUDI_EVENT_MME2_SBAB0_RSP:
8082 case GAUDI_EVENT_MME3_WBC_RSP:
8083 case GAUDI_EVENT_MME3_SBAB0_RSP:
8084 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8085 case GAUDI_EVENT_PSOC_AXI_DEC:
8086 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8087 case GAUDI_EVENT_MMU_PAGE_FAULT:
8088 case GAUDI_EVENT_MMU_WR_PERM:
8089 case GAUDI_EVENT_RAZWI_OR_ADC:
8090 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8091 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8092 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8093 fallthrough;
8094 case GAUDI_EVENT_NIC0_QM0:
8095 case GAUDI_EVENT_NIC0_QM1:
8096 case GAUDI_EVENT_NIC1_QM0:
8097 case GAUDI_EVENT_NIC1_QM1:
8098 case GAUDI_EVENT_NIC2_QM0:
8099 case GAUDI_EVENT_NIC2_QM1:
8100 case GAUDI_EVENT_NIC3_QM0:
8101 case GAUDI_EVENT_NIC3_QM1:
8102 case GAUDI_EVENT_NIC4_QM0:
8103 case GAUDI_EVENT_NIC4_QM1:
8104 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8105 gaudi_print_irq_info(hdev, event_type, true);
8106 gaudi_handle_qman_err(hdev, event_type);
8107 hl_fw_unmask_irq(hdev, event_type);
8108 break;
8109
8110 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8111 gaudi_print_irq_info(hdev, event_type, true);
8112 goto reset_device;
8113
8114 case GAUDI_EVENT_TPC0_BMON_SPMU:
8115 case GAUDI_EVENT_TPC1_BMON_SPMU:
8116 case GAUDI_EVENT_TPC2_BMON_SPMU:
8117 case GAUDI_EVENT_TPC3_BMON_SPMU:
8118 case GAUDI_EVENT_TPC4_BMON_SPMU:
8119 case GAUDI_EVENT_TPC5_BMON_SPMU:
8120 case GAUDI_EVENT_TPC6_BMON_SPMU:
8121 case GAUDI_EVENT_TPC7_BMON_SPMU:
8122 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8123 gaudi_print_irq_info(hdev, event_type, false);
8124 hl_fw_unmask_irq(hdev, event_type);
8125 break;
8126
8127 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8128 gaudi_print_irq_info(hdev, event_type, false);
8129 gaudi_print_sm_sei_info(hdev, event_type,
8130 &eq_entry->sm_sei_data);
8131 rc = hl_state_dump(hdev);
8132 if (rc)
8133 dev_err(hdev->dev,
8134 "Error during system state dump %d\n", rc);
8135 hl_fw_unmask_irq(hdev, event_type);
8136 break;
8137
8138 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8139 gaudi_print_clk_change_info(hdev, event_type);
8140 hl_fw_unmask_irq(hdev, event_type);
8141 break;
8142
8143 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8144 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8145 dev_err(hdev->dev,
8146 "Received high temp H/W interrupt %d (cause %d)\n",
8147 event_type, cause);
8148 break;
8149
8150 case GAUDI_EVENT_DEV_RESET_REQ:
8151 gaudi_print_irq_info(hdev, event_type, false);
8152 goto reset_device;
8153
8154 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8155 gaudi_print_irq_info(hdev, event_type, false);
8156 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8157 goto reset_device;
8158
8159 case GAUDI_EVENT_FW_ALIVE_S:
8160 gaudi_print_irq_info(hdev, event_type, false);
8161 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8162 goto reset_device;
8163
8164 default:
8165 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8166 event_type);
8167 break;
8168 }
8169
8170 return;
8171
8172 reset_device:
8173 if (hdev->asic_prop.fw_security_enabled)
8174 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
8175 else if (hdev->hard_reset_on_fw_events)
8176 hl_device_reset(hdev, HL_RESET_HARD);
8177 else
8178 hl_fw_unmask_irq(hdev, event_type);
8179 }
8180
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)8181 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8182 u32 *size)
8183 {
8184 struct gaudi_device *gaudi = hdev->asic_specific;
8185
8186 if (aggregate) {
8187 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8188 return gaudi->events_stat_aggregate;
8189 }
8190
8191 *size = (u32) sizeof(gaudi->events_stat);
8192 return gaudi->events_stat;
8193 }
8194
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)8195 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8196 u32 flags)
8197 {
8198 struct gaudi_device *gaudi = hdev->asic_specific;
8199 u32 status, timeout_usec;
8200 int rc;
8201
8202 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8203 hdev->hard_reset_pending)
8204 return 0;
8205
8206 if (hdev->pldm)
8207 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8208 else
8209 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8210
8211 /* L0 & L1 invalidation */
8212 WREG32(mmSTLB_INV_PS, 3);
8213 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8214 WREG32(mmSTLB_INV_PS, 2);
8215
8216 rc = hl_poll_timeout(
8217 hdev,
8218 mmSTLB_INV_PS,
8219 status,
8220 !status,
8221 1000,
8222 timeout_usec);
8223
8224 WREG32(mmSTLB_INV_SET, 0);
8225
8226 if (rc) {
8227 dev_err_ratelimited(hdev->dev,
8228 "MMU cache invalidation timeout\n");
8229 hl_device_reset(hdev, HL_RESET_HARD);
8230 }
8231
8232 return rc;
8233 }
8234
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)8235 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8236 bool is_hard, u32 flags,
8237 u32 asid, u64 va, u64 size)
8238 {
8239 /* Treat as invalidate all because there is no range invalidation
8240 * in Gaudi
8241 */
8242 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8243 }
8244
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)8245 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8246 u32 asid, u64 phys_addr)
8247 {
8248 u32 status, timeout_usec;
8249 int rc;
8250
8251 if (hdev->pldm)
8252 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8253 else
8254 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8255
8256 WREG32(MMU_ASID, asid);
8257 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8258 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8259 WREG32(MMU_BUSY, 0x80000000);
8260
8261 rc = hl_poll_timeout(
8262 hdev,
8263 MMU_BUSY,
8264 status,
8265 !(status & 0x80000000),
8266 1000,
8267 timeout_usec);
8268
8269 if (rc) {
8270 dev_err(hdev->dev,
8271 "Timeout during MMU hop0 config of asid %d\n", asid);
8272 return rc;
8273 }
8274
8275 return 0;
8276 }
8277
gaudi_send_heartbeat(struct hl_device * hdev)8278 static int gaudi_send_heartbeat(struct hl_device *hdev)
8279 {
8280 struct gaudi_device *gaudi = hdev->asic_specific;
8281
8282 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8283 return 0;
8284
8285 return hl_fw_send_heartbeat(hdev);
8286 }
8287
gaudi_cpucp_info_get(struct hl_device * hdev)8288 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8289 {
8290 struct gaudi_device *gaudi = hdev->asic_specific;
8291 struct asic_fixed_properties *prop = &hdev->asic_prop;
8292 int rc;
8293
8294 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8295 return 0;
8296
8297 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8298 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8299 mmCPU_BOOT_ERR1);
8300 if (rc)
8301 return rc;
8302
8303 if (!strlen(prop->cpucp_info.card_name))
8304 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8305 CARD_NAME_MAX_LEN);
8306
8307 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8308
8309 set_default_power_values(hdev);
8310
8311 hdev->max_power = prop->max_power_default;
8312
8313 return 0;
8314 }
8315
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct seq_file * s)8316 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8317 u8 mask_len, struct seq_file *s)
8318 {
8319 struct gaudi_device *gaudi = hdev->asic_specific;
8320 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8321 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8322 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8323 unsigned long *mask = (unsigned long *)mask_arr;
8324 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8325 bool is_idle = true, is_eng_idle, is_slave;
8326 u64 offset;
8327 int i, dma_id, port;
8328
8329 mutex_lock(&gaudi->clk_gate_mutex);
8330
8331 hdev->asic_funcs->disable_clock_gating(hdev);
8332
8333 if (s)
8334 seq_puts(s,
8335 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8336 "--- ------- ------------ ---------- -------------\n");
8337
8338 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8339 dma_id = gaudi_dma_assignment[i];
8340 offset = dma_id * DMA_QMAN_OFFSET;
8341
8342 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8343 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8344 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8345 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8346 IS_DMA_IDLE(dma_core_sts0);
8347 is_idle &= is_eng_idle;
8348
8349 if (mask && !is_eng_idle)
8350 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8351 if (s)
8352 seq_printf(s, fmt, dma_id,
8353 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8354 qm_cgm_sts, dma_core_sts0);
8355 }
8356
8357 if (s)
8358 seq_puts(s,
8359 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8360 "--- ------- ------------ ---------- ----------\n");
8361
8362 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8363 offset = i * TPC_QMAN_OFFSET;
8364 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8365 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8366 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8367 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8368 IS_TPC_IDLE(tpc_cfg_sts);
8369 is_idle &= is_eng_idle;
8370
8371 if (mask && !is_eng_idle)
8372 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8373 if (s)
8374 seq_printf(s, fmt, i,
8375 is_eng_idle ? "Y" : "N",
8376 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8377 }
8378
8379 if (s)
8380 seq_puts(s,
8381 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8382 "--- ------- ------------ ---------- -----------\n");
8383
8384 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8385 offset = i * MME_QMAN_OFFSET;
8386 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8387 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8388
8389 /* MME 1 & 3 are slaves, no need to check their QMANs */
8390 is_slave = i % 2;
8391 if (!is_slave) {
8392 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8393 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8394 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8395 }
8396
8397 is_idle &= is_eng_idle;
8398
8399 if (mask && !is_eng_idle)
8400 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8401 if (s) {
8402 if (!is_slave)
8403 seq_printf(s, fmt, i,
8404 is_eng_idle ? "Y" : "N",
8405 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8406 else
8407 seq_printf(s, mme_slave_fmt, i,
8408 is_eng_idle ? "Y" : "N", "-",
8409 "-", mme_arch_sts);
8410 }
8411 }
8412
8413 if (s)
8414 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8415 "--- ------- ------------ ----------\n");
8416
8417 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8418 offset = i * NIC_MACRO_QMAN_OFFSET;
8419 port = 2 * i;
8420 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8421 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8422 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8423 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8424 is_idle &= is_eng_idle;
8425
8426 if (mask && !is_eng_idle)
8427 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8428 if (s)
8429 seq_printf(s, nic_fmt, port,
8430 is_eng_idle ? "Y" : "N",
8431 qm_glbl_sts0, qm_cgm_sts);
8432 }
8433
8434 port = 2 * i + 1;
8435 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8436 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8437 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8438 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8439 is_idle &= is_eng_idle;
8440
8441 if (mask && !is_eng_idle)
8442 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8443 if (s)
8444 seq_printf(s, nic_fmt, port,
8445 is_eng_idle ? "Y" : "N",
8446 qm_glbl_sts0, qm_cgm_sts);
8447 }
8448 }
8449
8450 if (s)
8451 seq_puts(s, "\n");
8452
8453 hdev->asic_funcs->set_clock_gating(hdev);
8454
8455 mutex_unlock(&gaudi->clk_gate_mutex);
8456
8457 return is_idle;
8458 }
8459
gaudi_hw_queues_lock(struct hl_device * hdev)8460 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8461 __acquires(&gaudi->hw_queues_lock)
8462 {
8463 struct gaudi_device *gaudi = hdev->asic_specific;
8464
8465 spin_lock(&gaudi->hw_queues_lock);
8466 }
8467
gaudi_hw_queues_unlock(struct hl_device * hdev)8468 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8469 __releases(&gaudi->hw_queues_lock)
8470 {
8471 struct gaudi_device *gaudi = hdev->asic_specific;
8472
8473 spin_unlock(&gaudi->hw_queues_lock);
8474 }
8475
gaudi_get_pci_id(struct hl_device * hdev)8476 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8477 {
8478 return hdev->pdev->device;
8479 }
8480
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8481 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8482 size_t max_size)
8483 {
8484 struct gaudi_device *gaudi = hdev->asic_specific;
8485
8486 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8487 return 0;
8488
8489 return hl_fw_get_eeprom_data(hdev, data, max_size);
8490 }
8491
8492 /*
8493 * this function should be used only during initialization and/or after reset,
8494 * when there are no active users.
8495 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8496 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8497 u32 tpc_id)
8498 {
8499 struct gaudi_device *gaudi = hdev->asic_specific;
8500 u64 kernel_timeout;
8501 u32 status, offset;
8502 int rc;
8503
8504 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8505
8506 if (hdev->pldm)
8507 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8508 else
8509 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8510
8511 mutex_lock(&gaudi->clk_gate_mutex);
8512
8513 hdev->asic_funcs->disable_clock_gating(hdev);
8514
8515 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8516 lower_32_bits(tpc_kernel));
8517 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8518 upper_32_bits(tpc_kernel));
8519
8520 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8521 lower_32_bits(tpc_kernel));
8522 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8523 upper_32_bits(tpc_kernel));
8524 /* set a valid LUT pointer, content is of no significance */
8525 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8526 lower_32_bits(tpc_kernel));
8527 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8528 upper_32_bits(tpc_kernel));
8529
8530 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8531 lower_32_bits(CFG_BASE +
8532 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8533
8534 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8535 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8536 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8537 /* wait a bit for the engine to start executing */
8538 usleep_range(1000, 1500);
8539
8540 /* wait until engine has finished executing */
8541 rc = hl_poll_timeout(
8542 hdev,
8543 mmTPC0_CFG_STATUS + offset,
8544 status,
8545 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8546 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8547 1000,
8548 kernel_timeout);
8549
8550 if (rc) {
8551 dev_err(hdev->dev,
8552 "Timeout while waiting for TPC%d icache prefetch\n",
8553 tpc_id);
8554 hdev->asic_funcs->set_clock_gating(hdev);
8555 mutex_unlock(&gaudi->clk_gate_mutex);
8556 return -EIO;
8557 }
8558
8559 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8560 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8561
8562 /* wait a bit for the engine to start executing */
8563 usleep_range(1000, 1500);
8564
8565 /* wait until engine has finished executing */
8566 rc = hl_poll_timeout(
8567 hdev,
8568 mmTPC0_CFG_STATUS + offset,
8569 status,
8570 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8571 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8572 1000,
8573 kernel_timeout);
8574
8575 if (rc) {
8576 dev_err(hdev->dev,
8577 "Timeout while waiting for TPC%d vector pipe\n",
8578 tpc_id);
8579 hdev->asic_funcs->set_clock_gating(hdev);
8580 mutex_unlock(&gaudi->clk_gate_mutex);
8581 return -EIO;
8582 }
8583
8584 rc = hl_poll_timeout(
8585 hdev,
8586 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8587 status,
8588 (status == 0),
8589 1000,
8590 kernel_timeout);
8591
8592 hdev->asic_funcs->set_clock_gating(hdev);
8593 mutex_unlock(&gaudi->clk_gate_mutex);
8594
8595 if (rc) {
8596 dev_err(hdev->dev,
8597 "Timeout while waiting for TPC%d kernel to execute\n",
8598 tpc_id);
8599 return -EIO;
8600 }
8601
8602 return 0;
8603 }
8604
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8605 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8606 struct hl_ctx *ctx)
8607 {
8608 struct gaudi_device *gaudi = hdev->asic_specific;
8609 int min_alloc_order, rc, collective_cb_size;
8610
8611 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8612 return 0;
8613
8614 hdev->internal_cb_pool_virt_addr =
8615 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8616 HOST_SPACE_INTERNAL_CB_SZ,
8617 &hdev->internal_cb_pool_dma_addr,
8618 GFP_KERNEL | __GFP_ZERO);
8619
8620 if (!hdev->internal_cb_pool_virt_addr)
8621 return -ENOMEM;
8622
8623 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8624 sizeof(struct packet_fence);
8625 min_alloc_order = ilog2(collective_cb_size);
8626
8627 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8628 if (!hdev->internal_cb_pool) {
8629 dev_err(hdev->dev,
8630 "Failed to create internal CB pool\n");
8631 rc = -ENOMEM;
8632 goto free_internal_cb_pool;
8633 }
8634
8635 rc = gen_pool_add(hdev->internal_cb_pool,
8636 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8637 HOST_SPACE_INTERNAL_CB_SZ, -1);
8638 if (rc) {
8639 dev_err(hdev->dev,
8640 "Failed to add memory to internal CB pool\n");
8641 rc = -EFAULT;
8642 goto destroy_internal_cb_pool;
8643 }
8644
8645 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8646 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8647 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8648
8649 if (!hdev->internal_cb_va_base) {
8650 rc = -ENOMEM;
8651 goto destroy_internal_cb_pool;
8652 }
8653
8654 mutex_lock(&ctx->mmu_lock);
8655 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8656 hdev->internal_cb_pool_dma_addr,
8657 HOST_SPACE_INTERNAL_CB_SZ);
8658
8659 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8660 mutex_unlock(&ctx->mmu_lock);
8661
8662 if (rc)
8663 goto unreserve_internal_cb_pool;
8664
8665 return 0;
8666
8667 unreserve_internal_cb_pool:
8668 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8669 HOST_SPACE_INTERNAL_CB_SZ);
8670 destroy_internal_cb_pool:
8671 gen_pool_destroy(hdev->internal_cb_pool);
8672 free_internal_cb_pool:
8673 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8674 HOST_SPACE_INTERNAL_CB_SZ,
8675 hdev->internal_cb_pool_virt_addr,
8676 hdev->internal_cb_pool_dma_addr);
8677
8678 return rc;
8679 }
8680
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8681 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8682 struct hl_ctx *ctx)
8683 {
8684 struct gaudi_device *gaudi = hdev->asic_specific;
8685
8686 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8687 return;
8688
8689 mutex_lock(&ctx->mmu_lock);
8690 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8691 HOST_SPACE_INTERNAL_CB_SZ);
8692 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8693 HOST_SPACE_INTERNAL_CB_SZ);
8694 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8695 mutex_unlock(&ctx->mmu_lock);
8696
8697 gen_pool_destroy(hdev->internal_cb_pool);
8698
8699 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8700 HOST_SPACE_INTERNAL_CB_SZ,
8701 hdev->internal_cb_pool_virt_addr,
8702 hdev->internal_cb_pool_dma_addr);
8703 }
8704
gaudi_ctx_init(struct hl_ctx * ctx)8705 static int gaudi_ctx_init(struct hl_ctx *ctx)
8706 {
8707 int rc;
8708
8709 if (ctx->asid == HL_KERNEL_ASID_ID)
8710 return 0;
8711
8712 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8713 if (rc)
8714 return rc;
8715
8716 rc = gaudi_restore_user_registers(ctx->hdev);
8717 if (rc)
8718 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8719
8720 return rc;
8721 }
8722
gaudi_ctx_fini(struct hl_ctx * ctx)8723 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8724 {
8725 if (ctx->asid == HL_KERNEL_ASID_ID)
8726 return;
8727
8728 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8729 }
8730
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8731 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8732 {
8733 return gaudi_cq_assignment[cq_idx];
8734 }
8735
gaudi_get_signal_cb_size(struct hl_device * hdev)8736 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8737 {
8738 return sizeof(struct packet_msg_short) +
8739 sizeof(struct packet_msg_prot) * 2;
8740 }
8741
gaudi_get_wait_cb_size(struct hl_device * hdev)8742 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8743 {
8744 return sizeof(struct packet_msg_short) * 4 +
8745 sizeof(struct packet_fence) +
8746 sizeof(struct packet_msg_prot) * 2;
8747 }
8748
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8749 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8750 {
8751 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8752 }
8753
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8754 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8755 u32 size, bool eb)
8756 {
8757 struct hl_cb *cb = (struct hl_cb *) data;
8758 struct packet_msg_short *pkt;
8759 u32 value, ctl, pkt_size = sizeof(*pkt);
8760
8761 pkt = cb->kernel_address + size;
8762 memset(pkt, 0, pkt_size);
8763
8764 /* Inc by 1, Mode ADD */
8765 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8766 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8767
8768 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8769 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8770 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8771 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8772 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8773 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8774 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8775
8776 pkt->value = cpu_to_le32(value);
8777 pkt->ctl = cpu_to_le32(ctl);
8778
8779 return size + pkt_size;
8780 }
8781
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8782 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8783 u16 addr)
8784 {
8785 u32 ctl, pkt_size = sizeof(*pkt);
8786
8787 memset(pkt, 0, pkt_size);
8788
8789 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8790 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8791 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8792 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8793 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8794 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8795
8796 pkt->value = cpu_to_le32(value);
8797 pkt->ctl = cpu_to_le32(ctl);
8798
8799 return pkt_size;
8800 }
8801
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8802 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8803 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8804 u16 sob_val, u16 mon_id)
8805 {
8806 u64 monitor_base;
8807 u32 ctl, value, pkt_size = sizeof(*pkt);
8808 u16 msg_addr_offset;
8809 u8 mask;
8810
8811 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8812 dev_err(hdev->dev,
8813 "sob_base %u (mask %#x) is not valid\n",
8814 sob_base, sob_mask);
8815 return 0;
8816 }
8817
8818 /*
8819 * monitor_base should be the content of the base0 address registers,
8820 * so it will be added to the msg short offsets
8821 */
8822 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8823
8824 msg_addr_offset =
8825 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8826 monitor_base;
8827
8828 memset(pkt, 0, pkt_size);
8829
8830 /* Monitor config packet: bind the monitor to a sync object */
8831 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8832 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8833 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8834 0); /* GREATER OR EQUAL*/
8835 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8836
8837 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8838 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8839 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8840 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8841 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8842 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8843 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8844
8845 pkt->value = cpu_to_le32(value);
8846 pkt->ctl = cpu_to_le32(ctl);
8847
8848 return pkt_size;
8849 }
8850
gaudi_add_fence_pkt(struct packet_fence * pkt)8851 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8852 {
8853 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8854
8855 memset(pkt, 0, pkt_size);
8856
8857 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8858 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8859 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8860
8861 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8862 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8863 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8864 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8865
8866 pkt->cfg = cpu_to_le32(cfg);
8867 pkt->ctl = cpu_to_le32(ctl);
8868
8869 return pkt_size;
8870 }
8871
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8872 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8873 {
8874 u32 offset, nic_index;
8875
8876 switch (queue_id) {
8877 case GAUDI_QUEUE_ID_DMA_0_0:
8878 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8879 break;
8880 case GAUDI_QUEUE_ID_DMA_0_1:
8881 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8882 break;
8883 case GAUDI_QUEUE_ID_DMA_0_2:
8884 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8885 break;
8886 case GAUDI_QUEUE_ID_DMA_0_3:
8887 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8888 break;
8889 case GAUDI_QUEUE_ID_DMA_1_0:
8890 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8891 break;
8892 case GAUDI_QUEUE_ID_DMA_1_1:
8893 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8894 break;
8895 case GAUDI_QUEUE_ID_DMA_1_2:
8896 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8897 break;
8898 case GAUDI_QUEUE_ID_DMA_1_3:
8899 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8900 break;
8901 case GAUDI_QUEUE_ID_DMA_5_0:
8902 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8903 break;
8904 case GAUDI_QUEUE_ID_DMA_5_1:
8905 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8906 break;
8907 case GAUDI_QUEUE_ID_DMA_5_2:
8908 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8909 break;
8910 case GAUDI_QUEUE_ID_DMA_5_3:
8911 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8912 break;
8913 case GAUDI_QUEUE_ID_TPC_7_0:
8914 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8915 break;
8916 case GAUDI_QUEUE_ID_TPC_7_1:
8917 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8918 break;
8919 case GAUDI_QUEUE_ID_TPC_7_2:
8920 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8921 break;
8922 case GAUDI_QUEUE_ID_TPC_7_3:
8923 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8924 break;
8925 case GAUDI_QUEUE_ID_NIC_0_0:
8926 case GAUDI_QUEUE_ID_NIC_1_0:
8927 case GAUDI_QUEUE_ID_NIC_2_0:
8928 case GAUDI_QUEUE_ID_NIC_3_0:
8929 case GAUDI_QUEUE_ID_NIC_4_0:
8930 case GAUDI_QUEUE_ID_NIC_5_0:
8931 case GAUDI_QUEUE_ID_NIC_6_0:
8932 case GAUDI_QUEUE_ID_NIC_7_0:
8933 case GAUDI_QUEUE_ID_NIC_8_0:
8934 case GAUDI_QUEUE_ID_NIC_9_0:
8935 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8936 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8937 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8938 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8939 break;
8940 case GAUDI_QUEUE_ID_NIC_0_1:
8941 case GAUDI_QUEUE_ID_NIC_1_1:
8942 case GAUDI_QUEUE_ID_NIC_2_1:
8943 case GAUDI_QUEUE_ID_NIC_3_1:
8944 case GAUDI_QUEUE_ID_NIC_4_1:
8945 case GAUDI_QUEUE_ID_NIC_5_1:
8946 case GAUDI_QUEUE_ID_NIC_6_1:
8947 case GAUDI_QUEUE_ID_NIC_7_1:
8948 case GAUDI_QUEUE_ID_NIC_8_1:
8949 case GAUDI_QUEUE_ID_NIC_9_1:
8950 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8951 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8952 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8953 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8954 break;
8955 case GAUDI_QUEUE_ID_NIC_0_2:
8956 case GAUDI_QUEUE_ID_NIC_1_2:
8957 case GAUDI_QUEUE_ID_NIC_2_2:
8958 case GAUDI_QUEUE_ID_NIC_3_2:
8959 case GAUDI_QUEUE_ID_NIC_4_2:
8960 case GAUDI_QUEUE_ID_NIC_5_2:
8961 case GAUDI_QUEUE_ID_NIC_6_2:
8962 case GAUDI_QUEUE_ID_NIC_7_2:
8963 case GAUDI_QUEUE_ID_NIC_8_2:
8964 case GAUDI_QUEUE_ID_NIC_9_2:
8965 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8966 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8967 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8968 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8969 break;
8970 case GAUDI_QUEUE_ID_NIC_0_3:
8971 case GAUDI_QUEUE_ID_NIC_1_3:
8972 case GAUDI_QUEUE_ID_NIC_2_3:
8973 case GAUDI_QUEUE_ID_NIC_3_3:
8974 case GAUDI_QUEUE_ID_NIC_4_3:
8975 case GAUDI_QUEUE_ID_NIC_5_3:
8976 case GAUDI_QUEUE_ID_NIC_6_3:
8977 case GAUDI_QUEUE_ID_NIC_7_3:
8978 case GAUDI_QUEUE_ID_NIC_8_3:
8979 case GAUDI_QUEUE_ID_NIC_9_3:
8980 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8981 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8982 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8983 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8984 break;
8985 default:
8986 return -EINVAL;
8987 }
8988
8989 *addr = CFG_BASE + offset;
8990
8991 return 0;
8992 }
8993
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8994 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8995 {
8996 u64 monitor_base;
8997 u32 size = 0;
8998 u16 msg_addr_offset;
8999
9000 /*
9001 * monitor_base should be the content of the base0 address registers,
9002 * so it will be added to the msg short offsets
9003 */
9004 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9005
9006 /* First monitor config packet: low address of the sync */
9007 msg_addr_offset =
9008 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9009 monitor_base;
9010
9011 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9012 msg_addr_offset);
9013
9014 /* Second monitor config packet: high address of the sync */
9015 msg_addr_offset =
9016 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9017 monitor_base;
9018
9019 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9020 msg_addr_offset);
9021
9022 /*
9023 * Third monitor config packet: the payload, i.e. what to write when the
9024 * sync triggers
9025 */
9026 msg_addr_offset =
9027 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9028 monitor_base;
9029
9030 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9031
9032 return size;
9033 }
9034
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)9035 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9036 struct hl_gen_wait_properties *prop)
9037 {
9038 struct hl_cb *cb = (struct hl_cb *) prop->data;
9039 void *buf = cb->kernel_address;
9040 u64 fence_addr = 0;
9041 u32 size = prop->size;
9042
9043 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9044 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9045 prop->q_idx);
9046 return 0;
9047 }
9048
9049 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9050 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9051 prop->sob_mask, prop->sob_val, prop->mon_id);
9052 size += gaudi_add_fence_pkt(buf + size);
9053
9054 return size;
9055 }
9056
gaudi_reset_sob(struct hl_device * hdev,void * data)9057 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9058 {
9059 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9060
9061 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9062 hw_sob->sob_id);
9063
9064 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9065 hw_sob->sob_id * 4, 0);
9066
9067 kref_init(&hw_sob->kref);
9068 }
9069
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)9070 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9071 {
9072 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9073 HL_POWER9_HOST_MAGIC) {
9074 hdev->power9_64bit_dma_enable = 1;
9075 hdev->dma_mask = 64;
9076 } else {
9077 hdev->power9_64bit_dma_enable = 0;
9078 hdev->dma_mask = 48;
9079 }
9080 }
9081
gaudi_get_device_time(struct hl_device * hdev)9082 static u64 gaudi_get_device_time(struct hl_device *hdev)
9083 {
9084 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9085
9086 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9087 }
9088
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)9089 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9090 u32 *block_size, u32 *block_id)
9091 {
9092 return -EPERM;
9093 }
9094
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)9095 static int gaudi_block_mmap(struct hl_device *hdev,
9096 struct vm_area_struct *vma,
9097 u32 block_id, u32 block_size)
9098 {
9099 return -EPERM;
9100 }
9101
gaudi_enable_events_from_fw(struct hl_device * hdev)9102 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9103 {
9104 struct cpu_dyn_regs *dyn_regs =
9105 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9106 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9107 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9108 le32_to_cpu(dyn_regs->gic_host_ints_irq);
9109
9110 WREG32(irq_handler_offset,
9111 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9112 }
9113
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)9114 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9115 {
9116 switch (pll_idx) {
9117 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9118 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9119 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9120 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9121 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9122 case HL_GAUDI_MME_PLL: return MME_PLL;
9123 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9124 case HL_GAUDI_IF_PLL: return IF_PLL;
9125 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9126 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9127 default: return -EINVAL;
9128 }
9129 }
9130
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)9131 static int gaudi_add_sync_to_engine_map_entry(
9132 struct hl_sync_to_engine_map *map, u32 reg_value,
9133 enum hl_sync_engine_type engine_type, u32 engine_id)
9134 {
9135 struct hl_sync_to_engine_map_entry *entry;
9136
9137 /* Reg value represents a partial address of sync object,
9138 * it is used as unique identifier. For this we need to
9139 * clear the cutoff cfg base bits from the value.
9140 */
9141 if (reg_value == 0 || reg_value == 0xffffffff)
9142 return 0;
9143 reg_value -= (u32)CFG_BASE;
9144
9145 /* create a new hash entry */
9146 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9147 if (!entry)
9148 return -ENOMEM;
9149 entry->engine_type = engine_type;
9150 entry->engine_id = engine_id;
9151 entry->sync_id = reg_value;
9152 hash_add(map->tb, &entry->node, reg_value);
9153
9154 return 0;
9155 }
9156
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)9157 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9158 struct hl_sync_to_engine_map *map)
9159 {
9160 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9161 struct gaudi_device *gaudi = hdev->asic_specific;
9162 int i, j, rc;
9163 u32 reg_value;
9164
9165 /* Iterate over TPC engines */
9166 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9167 /* TPC registered must be accessed with clock gating disabled */
9168 mutex_lock(&gaudi->clk_gate_mutex);
9169 hdev->asic_funcs->disable_clock_gating(hdev);
9170
9171 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9172 sds->props[SP_NEXT_TPC] * i);
9173
9174 /* We can reenable clock_gating */
9175 hdev->asic_funcs->set_clock_gating(hdev);
9176 mutex_unlock(&gaudi->clk_gate_mutex);
9177
9178 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9179 ENGINE_TPC, i);
9180 if (rc)
9181 goto free_sync_to_engine_map;
9182 }
9183
9184 /* Iterate over MME engines */
9185 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9186 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9187 /* MME registered must be accessed with clock gating
9188 * disabled
9189 */
9190 mutex_lock(&gaudi->clk_gate_mutex);
9191 hdev->asic_funcs->disable_clock_gating(hdev);
9192
9193 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9194 sds->props[SP_NEXT_MME] * i +
9195 j * sizeof(u32));
9196
9197 /* We can reenable clock_gating */
9198 hdev->asic_funcs->set_clock_gating(hdev);
9199 mutex_unlock(&gaudi->clk_gate_mutex);
9200
9201 rc = gaudi_add_sync_to_engine_map_entry(
9202 map, reg_value, ENGINE_MME,
9203 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9204 if (rc)
9205 goto free_sync_to_engine_map;
9206 }
9207 }
9208
9209 /* Iterate over DMA engines */
9210 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9211 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9212 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9213 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9214 ENGINE_DMA, i);
9215 if (rc)
9216 goto free_sync_to_engine_map;
9217 }
9218
9219 return 0;
9220
9221 free_sync_to_engine_map:
9222 hl_state_dump_free_sync_to_engine_map(map);
9223
9224 return rc;
9225 }
9226
gaudi_monitor_valid(struct hl_mon_state_dump * mon)9227 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9228 {
9229 return FIELD_GET(
9230 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9231 mon->status);
9232 }
9233
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)9234 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9235 {
9236 const size_t max_write = 10;
9237 u32 gid, mask, sob;
9238 int i, offset;
9239
9240 /* Sync object ID is calculated as follows:
9241 * (8 * group_id + cleared bits in mask)
9242 */
9243 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9244 mon->arm_data);
9245 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9246 mon->arm_data);
9247
9248 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9249 max_write; mask >>= 1, i++) {
9250 if (!(mask & 1)) {
9251 sob = gid * MONITOR_MAX_SOBS + i;
9252
9253 if (offset > 0)
9254 offset += snprintf(sobs + offset, max_write,
9255 ", ");
9256
9257 offset += snprintf(sobs + offset, max_write, "%u", sob);
9258 }
9259 }
9260 }
9261
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)9262 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9263 struct hl_device *hdev,
9264 struct hl_mon_state_dump *mon)
9265 {
9266 const char *name;
9267 char scratch_buf1[BIN_REG_STRING_SIZE],
9268 scratch_buf2[BIN_REG_STRING_SIZE];
9269 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9270
9271 name = hl_state_dump_get_monitor_name(hdev, mon);
9272 if (!name)
9273 name = "";
9274
9275 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9276
9277 return hl_snprintf_resize(
9278 buf, size, offset,
9279 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9280 mon->id, name,
9281 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9282 mon->arm_data),
9283 hl_format_as_binary(
9284 scratch_buf1, sizeof(scratch_buf1),
9285 FIELD_GET(
9286 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9287 mon->arm_data)),
9288 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9289 mon->arm_data),
9290 mon->wr_data,
9291 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9292 hl_format_as_binary(
9293 scratch_buf2, sizeof(scratch_buf2),
9294 FIELD_GET(
9295 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9296 mon->status)),
9297 monitored_sobs);
9298 }
9299
9300
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)9301 static int gaudi_print_fences_single_engine(
9302 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9303 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9304 size_t *size, size_t *offset)
9305 {
9306 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9307 int rc = -ENOMEM, i;
9308 u32 *statuses, *fences;
9309
9310 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9311 sizeof(*statuses), GFP_KERNEL);
9312 if (!statuses)
9313 goto out;
9314
9315 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9316 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9317 sizeof(*fences), GFP_KERNEL);
9318 if (!fences)
9319 goto free_status;
9320
9321 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9322 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9323
9324 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9325 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9326 fences[i] = RREG32(base_offset + i * sizeof(u32));
9327
9328 /* The actual print */
9329 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9330 u32 fence_id;
9331 u64 fence_cnt, fence_rdata;
9332 const char *engine_name;
9333
9334 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9335 statuses[i]))
9336 continue;
9337
9338 fence_id =
9339 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9340 fence_cnt = base_offset + CFG_BASE +
9341 sizeof(u32) *
9342 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9343 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9344 sds->props[SP_FENCE0_RDATA_OFFSET];
9345 engine_name = hl_sync_engine_to_string(engine_type);
9346
9347 rc = hl_snprintf_resize(
9348 buf, size, offset,
9349 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9350 engine_name, engine_id,
9351 i, fence_id,
9352 fence_cnt, engine_name, engine_id, fence_id, i,
9353 fence_rdata, engine_name, engine_id, fence_id, i,
9354 fences[fence_id],
9355 statuses[i]);
9356 if (rc)
9357 goto free_fences;
9358 }
9359
9360 rc = 0;
9361
9362 free_fences:
9363 kfree(fences);
9364 free_status:
9365 kfree(statuses);
9366 out:
9367 return rc;
9368 }
9369
9370
9371 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9372 .monitor_valid = gaudi_monitor_valid,
9373 .print_single_monitor = gaudi_print_single_monitor,
9374 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9375 .print_fences_single_engine = gaudi_print_fences_single_engine,
9376 };
9377
gaudi_state_dump_init(struct hl_device * hdev)9378 static void gaudi_state_dump_init(struct hl_device *hdev)
9379 {
9380 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9381 int i;
9382
9383 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9384 hash_add(sds->so_id_to_str_tb,
9385 &gaudi_so_id_to_str[i].node,
9386 gaudi_so_id_to_str[i].id);
9387
9388 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9389 hash_add(sds->monitor_id_to_str_tb,
9390 &gaudi_monitor_id_to_str[i].node,
9391 gaudi_monitor_id_to_str[i].id);
9392
9393 sds->props = gaudi_state_dump_specs_props;
9394
9395 sds->sync_namager_names = gaudi_sync_manager_names;
9396
9397 sds->funcs = gaudi_state_dump_funcs;
9398 }
9399
gaudi_get_stream_master_qid_arr(void)9400 static u32 *gaudi_get_stream_master_qid_arr(void)
9401 {
9402 return gaudi_stream_master;
9403 }
9404
9405 static const struct hl_asic_funcs gaudi_funcs = {
9406 .early_init = gaudi_early_init,
9407 .early_fini = gaudi_early_fini,
9408 .late_init = gaudi_late_init,
9409 .late_fini = gaudi_late_fini,
9410 .sw_init = gaudi_sw_init,
9411 .sw_fini = gaudi_sw_fini,
9412 .hw_init = gaudi_hw_init,
9413 .hw_fini = gaudi_hw_fini,
9414 .halt_engines = gaudi_halt_engines,
9415 .suspend = gaudi_suspend,
9416 .resume = gaudi_resume,
9417 .mmap = gaudi_mmap,
9418 .ring_doorbell = gaudi_ring_doorbell,
9419 .pqe_write = gaudi_pqe_write,
9420 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9421 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9422 .scrub_device_mem = gaudi_scrub_device_mem,
9423 .get_int_queue_base = gaudi_get_int_queue_base,
9424 .test_queues = gaudi_test_queues,
9425 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9426 .asic_dma_pool_free = gaudi_dma_pool_free,
9427 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9428 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9429 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9430 .cs_parser = gaudi_cs_parser,
9431 .asic_dma_map_sg = gaudi_dma_map_sg,
9432 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9433 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9434 .update_eq_ci = gaudi_update_eq_ci,
9435 .context_switch = gaudi_context_switch,
9436 .restore_phase_topology = gaudi_restore_phase_topology,
9437 .debugfs_read32 = gaudi_debugfs_read32,
9438 .debugfs_write32 = gaudi_debugfs_write32,
9439 .debugfs_read64 = gaudi_debugfs_read64,
9440 .debugfs_write64 = gaudi_debugfs_write64,
9441 .debugfs_read_dma = gaudi_debugfs_read_dma,
9442 .add_device_attr = gaudi_add_device_attr,
9443 .handle_eqe = gaudi_handle_eqe,
9444 .set_pll_profile = gaudi_set_pll_profile,
9445 .get_events_stat = gaudi_get_events_stat,
9446 .read_pte = gaudi_read_pte,
9447 .write_pte = gaudi_write_pte,
9448 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9449 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9450 .send_heartbeat = gaudi_send_heartbeat,
9451 .set_clock_gating = gaudi_set_clock_gating,
9452 .disable_clock_gating = gaudi_disable_clock_gating,
9453 .debug_coresight = gaudi_debug_coresight,
9454 .is_device_idle = gaudi_is_device_idle,
9455 .soft_reset_late_init = gaudi_soft_reset_late_init,
9456 .hw_queues_lock = gaudi_hw_queues_lock,
9457 .hw_queues_unlock = gaudi_hw_queues_unlock,
9458 .get_pci_id = gaudi_get_pci_id,
9459 .get_eeprom_data = gaudi_get_eeprom_data,
9460 .send_cpu_message = gaudi_send_cpu_message,
9461 .pci_bars_map = gaudi_pci_bars_map,
9462 .init_iatu = gaudi_init_iatu,
9463 .rreg = hl_rreg,
9464 .wreg = hl_wreg,
9465 .halt_coresight = gaudi_halt_coresight,
9466 .ctx_init = gaudi_ctx_init,
9467 .ctx_fini = gaudi_ctx_fini,
9468 .get_clk_rate = gaudi_get_clk_rate,
9469 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9470 .load_firmware_to_device = gaudi_load_firmware_to_device,
9471 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9472 .get_signal_cb_size = gaudi_get_signal_cb_size,
9473 .get_wait_cb_size = gaudi_get_wait_cb_size,
9474 .gen_signal_cb = gaudi_gen_signal_cb,
9475 .gen_wait_cb = gaudi_gen_wait_cb,
9476 .reset_sob = gaudi_reset_sob,
9477 .reset_sob_group = gaudi_reset_sob_group,
9478 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9479 .get_device_time = gaudi_get_device_time,
9480 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9481 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9482 .scramble_addr = hl_mmu_scramble_addr,
9483 .descramble_addr = hl_mmu_descramble_addr,
9484 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9485 .get_hw_block_id = gaudi_get_hw_block_id,
9486 .hw_block_mmap = gaudi_block_mmap,
9487 .enable_events_from_fw = gaudi_enable_events_from_fw,
9488 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9489 .init_firmware_loader = gaudi_init_firmware_loader,
9490 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9491 .state_dump_init = gaudi_state_dump_init,
9492 .get_sob_addr = gaudi_get_sob_addr,
9493 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9494 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9495 };
9496
9497 /**
9498 * gaudi_set_asic_funcs - set GAUDI function pointers
9499 *
9500 * @hdev: pointer to hl_device structure
9501 *
9502 */
gaudi_set_asic_funcs(struct hl_device * hdev)9503 void gaudi_set_asic_funcs(struct hl_device *hdev)
9504 {
9505 hdev->asic_funcs = &gaudi_funcs;
9506 }
9507