1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN 20
86
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
99
100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
101
102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
103
104 #define MONITOR_SOB_STRING_SIZE 256
105
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 GAUDI_QUEUE_ID_DMA_0_0,
108 GAUDI_QUEUE_ID_DMA_0_1,
109 GAUDI_QUEUE_ID_DMA_0_2,
110 GAUDI_QUEUE_ID_DMA_0_3,
111 GAUDI_QUEUE_ID_DMA_1_0,
112 GAUDI_QUEUE_ID_DMA_1_1,
113 GAUDI_QUEUE_ID_DMA_1_2,
114 GAUDI_QUEUE_ID_DMA_1_3
115 };
116
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 "gaudi cpu eq"
122 };
123
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 [0] = GAUDI_QUEUE_ID_DMA_0_0,
137 [1] = GAUDI_QUEUE_ID_DMA_0_1,
138 [2] = GAUDI_QUEUE_ID_DMA_0_2,
139 [3] = GAUDI_QUEUE_ID_DMA_0_3,
140 [4] = GAUDI_QUEUE_ID_DMA_1_0,
141 [5] = GAUDI_QUEUE_ID_DMA_1_1,
142 [6] = GAUDI_QUEUE_ID_DMA_1_2,
143 [7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
152 [PACKET_REPEAT] = sizeof(struct packet_repeat),
153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
154 [PACKET_FENCE] = sizeof(struct packet_fence),
155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
156 [PACKET_NOP] = sizeof(struct packet_nop),
157 [PACKET_STOP] = sizeof(struct packet_stop),
158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
159 [PACKET_WAIT] = sizeof(struct packet_wait),
160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
161 };
162
validate_packet_id(enum packet_id id)163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 switch (id) {
166 case PACKET_WREG_32:
167 case PACKET_WREG_BULK:
168 case PACKET_MSG_LONG:
169 case PACKET_MSG_SHORT:
170 case PACKET_CP_DMA:
171 case PACKET_REPEAT:
172 case PACKET_MSG_PROT:
173 case PACKET_FENCE:
174 case PACKET_LIN_DMA:
175 case PACKET_NOP:
176 case PACKET_STOP:
177 case PACKET_ARB_POINT:
178 case PACKET_WAIT:
179 case PACKET_LOAD_AND_EXE:
180 return true;
181 default:
182 return false;
183 }
184 }
185
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 "tpc_address_exceed_slm",
189 "tpc_div_by_0",
190 "tpc_spu_mac_overflow",
191 "tpc_spu_addsub_overflow",
192 "tpc_spu_abs_overflow",
193 "tpc_spu_fp_dst_nan_inf",
194 "tpc_spu_fp_dst_denorm",
195 "tpc_vpu_mac_overflow",
196 "tpc_vpu_addsub_overflow",
197 "tpc_vpu_abs_overflow",
198 "tpc_vpu_fp_dst_nan_inf",
199 "tpc_vpu_fp_dst_denorm",
200 "tpc_assertions",
201 "tpc_illegal_instruction",
202 "tpc_pc_wrap_around",
203 "tpc_qm_sw_err",
204 "tpc_hbw_rresp_err",
205 "tpc_hbw_bresp_err",
206 "tpc_lbw_rresp_err",
207 "tpc_lbw_bresp_err"
208 };
209
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 "PQ AXI HBW error",
213 "CQ AXI HBW error",
214 "CP AXI HBW error",
215 "CP error due to undefined OPCODE",
216 "CP encountered STOP OPCODE",
217 "CP AXI LBW error",
218 "CP WRREG32 or WRBULK returned error",
219 "N/A",
220 "FENCE 0 inc over max value and clipped",
221 "FENCE 1 inc over max value and clipped",
222 "FENCE 2 inc over max value and clipped",
223 "FENCE 3 inc over max value and clipped",
224 "FENCE 0 dec under min value and clipped",
225 "FENCE 1 dec under min value and clipped",
226 "FENCE 2 dec under min value and clipped",
227 "FENCE 3 dec under min value and clipped"
228 };
229
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 "Choice push while full error",
233 "Choice Q watchdog error",
234 "MSG AXI LBW returned with error"
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 [SP_MON_OBJ_WR_ADDR_LOW] =
402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 [SP_MON_OBJ_WR_ADDR_HIGH] =
404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 [SP_FENCE0_CNT_OFFSET] =
426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_FENCE0_RDATA_OFFSET] =
428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 [SP_NUM_CORES] = 1,
431 };
432
433 static const int gaudi_queue_id_to_engine_id[] = {
434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464
465 /* The order here is opposite to the order of the indexing in the h/w.
466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467 */
468 static const char * const gaudi_sync_manager_names[] = {
469 "SYNC_MGR_E_N",
470 "SYNC_MGR_W_N",
471 "SYNC_MGR_E_S",
472 "SYNC_MGR_W_S",
473 NULL
474 };
475
476 struct ecc_info_extract_params {
477 u64 block_address;
478 u32 num_memories;
479 bool derr;
480 };
481
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 return HL_COLLECTIVE_MASTER;
505
506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 return HL_COLLECTIVE_SLAVE;
509
510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 return HL_COLLECTIVE_SLAVE;
513
514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 return HL_COLLECTIVE_SLAVE;
517
518 return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520
set_default_power_values(struct hl_device * hdev)521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 struct asic_fixed_properties *prop = &hdev->asic_prop;
524
525 if (hdev->card_type == cpucp_card_type_pmc) {
526 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527
528 if (prop->fw_security_enabled)
529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 else
531 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 } else {
533 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 }
536 }
537
gaudi_set_fixed_properties(struct hl_device * hdev)538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 struct asic_fixed_properties *prop = &hdev->asic_prop;
541 u32 num_sync_stream_queues = 0;
542 int i;
543
544 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 prop->hw_queues_props = kcalloc(prop->max_queues,
546 sizeof(struct hw_queue_properties),
547 GFP_KERNEL);
548
549 if (!prop->hw_queues_props)
550 return -ENOMEM;
551
552 for (i = 0 ; i < prop->max_queues ; i++) {
553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 prop->hw_queues_props[i].driver_only = 0;
556 prop->hw_queues_props[i].supports_sync_stream = 1;
557 prop->hw_queues_props[i].cb_alloc_flags =
558 CB_ALLOC_KERNEL;
559 num_sync_stream_queues++;
560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 prop->hw_queues_props[i].driver_only = 1;
563 prop->hw_queues_props[i].supports_sync_stream = 0;
564 prop->hw_queues_props[i].cb_alloc_flags =
565 CB_ALLOC_KERNEL;
566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 prop->hw_queues_props[i].driver_only = 0;
569 prop->hw_queues_props[i].supports_sync_stream = 0;
570 prop->hw_queues_props[i].cb_alloc_flags =
571 CB_ALLOC_USER;
572
573 }
574 prop->hw_queues_props[i].collective_mode =
575 get_collective_mode(hdev, i);
576 }
577
578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 prop->cfg_base_address = CFG_BASE;
580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 prop->host_base_address = HOST_PHYS_BASE;
582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 prop->collective_first_sob = 0;
586 prop->collective_first_mon = 0;
587
588 /* 2 SOBs per internal queue stream are reserved for collective */
589 prop->sync_stream_first_sob =
590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 * QMAN_STREAMS * HL_RSVD_SOBS;
592
593 /* 1 monitor per internal queue stream are reserved for collective
594 * 2 monitors per external queue stream are reserved for collective
595 */
596 prop->sync_stream_first_mon =
597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 (NUMBER_OF_EXT_HW_QUEUES * 2);
599
600 prop->dram_base_address = DRAM_PHYS_BASE;
601 prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604
605 prop->sram_base_address = SRAM_BASE_ADDR;
606 prop->sram_size = SRAM_SIZE;
607 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 prop->sram_user_base_address =
609 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610
611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613
614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 if (hdev->pldm)
616 prop->mmu_pgt_size = 0x800000; /* 8MB */
617 else
618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 prop->mmu_pte_size = HL_PTE_SIZE;
620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 prop->dram_page_size = PAGE_SIZE_2MB;
623 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 prop->dram_supports_virtual_memory = false;
625
626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 prop->pmmu.end_addr =
638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 prop->pmmu.page_size = PAGE_SIZE_4KB;
640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 prop->pmmu.last_mask = LAST_MASK;
642 /* TODO: will be duplicated until implementing per-MMU props */
643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645
646 /* PMMU and HPMMU are the same except of page size */
647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649
650 /* shifts and masks are the same in PMMU and DMMU */
651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 prop->dmmu.page_size = PAGE_SIZE_2MB;
655
656 prop->cfg_size = CFG_SIZE;
657 prop->max_asid = MAX_ASID;
658 prop->num_of_events = GAUDI_EVENT_SIZE;
659 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660
661 set_default_power_values(hdev);
662
663 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665
666 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668
669 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670 CARD_NAME_MAX_LEN);
671
672 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673
674 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675 prop->sync_stream_first_sob +
676 (num_sync_stream_queues * HL_RSVD_SOBS);
677 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678 prop->sync_stream_first_mon +
679 (num_sync_stream_queues * HL_RSVD_MONS);
680
681 prop->first_available_user_interrupt = USHRT_MAX;
682
683 for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 prop->first_available_cq[i] = USHRT_MAX;
685
686 prop->fw_cpu_boot_dev_sts0_valid = false;
687 prop->fw_cpu_boot_dev_sts1_valid = false;
688 prop->hard_reset_done_by_fw = false;
689 prop->gic_interrupts_enable = true;
690
691 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693 prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696 prop->use_get_power_for_reset_history = true;
697
698 prop->configurable_stop_on_err = true;
699
700 prop->set_max_power_on_device_init = true;
701
702 prop->dma_mask = 48;
703
704 return 0;
705 }
706
gaudi_pci_bars_map(struct hl_device * hdev)707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709 static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 bool is_wc[3] = {false, false, true};
711 int rc;
712
713 rc = hl_pci_bars_map(hdev, name, is_wc);
714 if (rc)
715 return rc;
716
717 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 (CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720 return 0;
721 }
722
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725 struct gaudi_device *gaudi = hdev->asic_specific;
726 struct hl_inbound_pci_region pci_region;
727 u64 old_addr = addr;
728 int rc;
729
730 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731 return old_addr;
732
733 if (hdev->asic_prop.iatu_done_by_fw)
734 return U64_MAX;
735
736 /* Inbound Region 2 - Bar 4 - Point to HBM */
737 pci_region.mode = PCI_BAR_MATCH_MODE;
738 pci_region.bar = HBM_BAR_ID;
739 pci_region.addr = addr;
740 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741 if (rc)
742 return U64_MAX;
743
744 if (gaudi) {
745 old_addr = gaudi->hbm_bar_cur_addr;
746 gaudi->hbm_bar_cur_addr = addr;
747 }
748
749 return old_addr;
750 }
751
gaudi_init_iatu(struct hl_device * hdev)752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754 struct hl_inbound_pci_region inbound_region;
755 struct hl_outbound_pci_region outbound_region;
756 int rc;
757
758 if (hdev->asic_prop.iatu_done_by_fw)
759 return 0;
760
761 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762 inbound_region.mode = PCI_BAR_MATCH_MODE;
763 inbound_region.bar = SRAM_BAR_ID;
764 inbound_region.addr = SRAM_BASE_ADDR;
765 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766 if (rc)
767 goto done;
768
769 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770 inbound_region.mode = PCI_BAR_MATCH_MODE;
771 inbound_region.bar = CFG_BAR_ID;
772 inbound_region.addr = SPI_FLASH_BASE_ADDR;
773 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774 if (rc)
775 goto done;
776
777 /* Inbound Region 2 - Bar 4 - Point to HBM */
778 inbound_region.mode = PCI_BAR_MATCH_MODE;
779 inbound_region.bar = HBM_BAR_ID;
780 inbound_region.addr = DRAM_PHYS_BASE;
781 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782 if (rc)
783 goto done;
784
785 /* Outbound Region 0 - Point to Host */
786 outbound_region.addr = HOST_PHYS_BASE;
787 outbound_region.size = HOST_PHYS_SIZE;
788 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789
790 done:
791 return rc;
792 }
793
gaudi_get_hw_state(struct hl_device * hdev)794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796 return RREG32(mmHW_STATE);
797 }
798
gaudi_early_init(struct hl_device * hdev)799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801 struct asic_fixed_properties *prop = &hdev->asic_prop;
802 struct pci_dev *pdev = hdev->pdev;
803 resource_size_t pci_bar_size;
804 u32 fw_boot_status;
805 int rc;
806
807 rc = gaudi_set_fixed_properties(hdev);
808 if (rc) {
809 dev_err(hdev->dev, "Failed setting fixed properties\n");
810 return rc;
811 }
812
813 /* Check BAR sizes */
814 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815
816 if (pci_bar_size != SRAM_BAR_SIZE) {
817 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819 rc = -ENODEV;
820 goto free_queue_props;
821 }
822
823 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824
825 if (pci_bar_size != CFG_BAR_SIZE) {
826 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828 rc = -ENODEV;
829 goto free_queue_props;
830 }
831
832 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834
835 /* If FW security is enabled at this point it means no access to ELBI */
836 if (hdev->asic_prop.fw_security_enabled) {
837 hdev->asic_prop.iatu_done_by_fw = true;
838
839 /*
840 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841 * decision can only be taken based on PCI ID security.
842 */
843 hdev->asic_prop.gic_interrupts_enable = false;
844 goto pci_init;
845 }
846
847 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848 &fw_boot_status);
849 if (rc)
850 goto free_queue_props;
851
852 /* Check whether FW is configuring iATU */
853 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855 hdev->asic_prop.iatu_done_by_fw = true;
856
857 pci_init:
858 rc = hl_pci_init(hdev);
859 if (rc)
860 goto free_queue_props;
861
862 /* Before continuing in the initialization, we need to read the preboot
863 * version to determine whether we run with a security-enabled firmware
864 */
865 rc = hl_fw_read_preboot_status(hdev);
866 if (rc) {
867 if (hdev->reset_on_preboot_fail)
868 hdev->asic_funcs->hw_fini(hdev, true, false);
869 goto pci_fini;
870 }
871
872 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874 hdev->asic_funcs->hw_fini(hdev, true, false);
875 }
876
877 return 0;
878
879 pci_fini:
880 hl_pci_fini(hdev);
881 free_queue_props:
882 kfree(hdev->asic_prop.hw_queues_props);
883 return rc;
884 }
885
gaudi_early_fini(struct hl_device * hdev)886 static int gaudi_early_fini(struct hl_device *hdev)
887 {
888 kfree(hdev->asic_prop.hw_queues_props);
889 hl_pci_fini(hdev);
890
891 return 0;
892 }
893
894 /**
895 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
896 *
897 * @hdev: pointer to hl_device structure
898 *
899 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
901 {
902 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
903 struct asic_fixed_properties *prop = &hdev->asic_prop;
904 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
905 int rc;
906
907 if ((hdev->fw_components & FW_TYPE_LINUX) &&
908 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
909 struct gaudi_device *gaudi = hdev->asic_specific;
910
911 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
912 return 0;
913
914 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
915
916 if (rc)
917 return rc;
918
919 freq = pll_freq_arr[2];
920 } else {
921 /* Backward compatibility */
922 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
923 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
924 nr = RREG32(mmPSOC_CPU_PLL_NR);
925 nf = RREG32(mmPSOC_CPU_PLL_NF);
926 od = RREG32(mmPSOC_CPU_PLL_OD);
927
928 if (div_sel == DIV_SEL_REF_CLK ||
929 div_sel == DIV_SEL_DIVIDED_REF) {
930 if (div_sel == DIV_SEL_REF_CLK)
931 freq = PLL_REF_CLK;
932 else
933 freq = PLL_REF_CLK / (div_fctr + 1);
934 } else if (div_sel == DIV_SEL_PLL_CLK ||
935 div_sel == DIV_SEL_DIVIDED_PLL) {
936 pll_clk = PLL_REF_CLK * (nf + 1) /
937 ((nr + 1) * (od + 1));
938 if (div_sel == DIV_SEL_PLL_CLK)
939 freq = pll_clk;
940 else
941 freq = pll_clk / (div_fctr + 1);
942 } else {
943 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
944 freq = 0;
945 }
946 }
947
948 prop->psoc_timestamp_frequency = freq;
949 prop->psoc_pci_pll_nr = nr;
950 prop->psoc_pci_pll_nf = nf;
951 prop->psoc_pci_pll_od = od;
952 prop->psoc_pci_pll_div_factor = div_fctr;
953
954 return 0;
955 }
956
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)957 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
958 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
959 {
960 struct asic_fixed_properties *prop = &hdev->asic_prop;
961 struct packet_lin_dma *init_tpc_mem_pkt;
962 struct hl_cs_job *job;
963 struct hl_cb *cb;
964 u64 dst_addr;
965 u32 cb_size, ctl;
966 u8 tpc_id;
967 int rc;
968
969 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
970 if (!cb)
971 return -EFAULT;
972
973 init_tpc_mem_pkt = cb->kernel_address;
974 cb_size = sizeof(*init_tpc_mem_pkt);
975 memset(init_tpc_mem_pkt, 0, cb_size);
976
977 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
978
979 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
980 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
981 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
982 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
983
984 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
985
986 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
987
988 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
989 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
990 round_up(prop->sram_user_base_address, SZ_8K));
991 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
992
993 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
994 if (!job) {
995 dev_err(hdev->dev, "Failed to allocate a new job\n");
996 rc = -ENOMEM;
997 goto release_cb;
998 }
999
1000 job->id = 0;
1001 job->user_cb = cb;
1002 atomic_inc(&job->user_cb->cs_cnt);
1003 job->user_cb_size = cb_size;
1004 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1005 job->patched_cb = job->user_cb;
1006 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1007
1008 hl_debugfs_add_job(hdev, job);
1009
1010 rc = gaudi_send_job_on_qman0(hdev, job);
1011
1012 if (rc)
1013 goto free_job;
1014
1015 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1016 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1017 if (rc)
1018 break;
1019 }
1020
1021 free_job:
1022 hl_userptr_delete_list(hdev, &job->userptr_list);
1023 hl_debugfs_remove_job(hdev, job);
1024 kfree(job);
1025 atomic_dec(&cb->cs_cnt);
1026
1027 release_cb:
1028 hl_cb_put(cb);
1029 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1030
1031 return rc;
1032 }
1033
1034 /*
1035 * gaudi_init_tpc_mem() - Initialize TPC memories.
1036 * @hdev: Pointer to hl_device structure.
1037 *
1038 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1039 *
1040 * Return: 0 for success, negative value for error.
1041 */
gaudi_init_tpc_mem(struct hl_device * hdev)1042 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1043 {
1044 const struct firmware *fw;
1045 size_t fw_size;
1046 void *cpu_addr;
1047 dma_addr_t dma_handle;
1048 int rc, count = 5;
1049
1050 again:
1051 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1052 if (rc == -EINTR && count-- > 0) {
1053 msleep(50);
1054 goto again;
1055 }
1056
1057 if (rc) {
1058 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1059 GAUDI_TPC_FW_FILE);
1060 goto out;
1061 }
1062
1063 fw_size = fw->size;
1064 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1065 if (!cpu_addr) {
1066 dev_err(hdev->dev,
1067 "Failed to allocate %zu of dma memory for TPC kernel\n",
1068 fw_size);
1069 rc = -ENOMEM;
1070 goto out;
1071 }
1072
1073 memcpy(cpu_addr, fw->data, fw_size);
1074
1075 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1076
1077 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1078
1079 out:
1080 release_firmware(fw);
1081 return rc;
1082 }
1083
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1084 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1085 {
1086 struct gaudi_device *gaudi = hdev->asic_specific;
1087 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1088 struct hl_hw_queue *q;
1089 u32 i, sob_id, sob_group_id, queue_id;
1090
1091 /* Iterate through SOB groups and assign a SOB for each slave queue */
1092 sob_group_id =
1093 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1094 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1095
1096 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1097 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1098 q = &hdev->kernel_queues[queue_id + (4 * i)];
1099 q->sync_stream_prop.collective_sob_id = sob_id + i;
1100 }
1101
1102 /* Both DMA5 and TPC7 use the same resources since only a single
1103 * engine need to participate in the reduction process
1104 */
1105 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1106 q = &hdev->kernel_queues[queue_id];
1107 q->sync_stream_prop.collective_sob_id =
1108 sob_id + NIC_NUMBER_OF_ENGINES;
1109
1110 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1111 q = &hdev->kernel_queues[queue_id];
1112 q->sync_stream_prop.collective_sob_id =
1113 sob_id + NIC_NUMBER_OF_ENGINES;
1114 }
1115
gaudi_sob_group_hw_reset(struct kref * ref)1116 static void gaudi_sob_group_hw_reset(struct kref *ref)
1117 {
1118 struct gaudi_hw_sob_group *hw_sob_group =
1119 container_of(ref, struct gaudi_hw_sob_group, kref);
1120 struct hl_device *hdev = hw_sob_group->hdev;
1121 int i;
1122
1123 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1124 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1125 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1126
1127 kref_init(&hw_sob_group->kref);
1128 }
1129
gaudi_sob_group_reset_error(struct kref * ref)1130 static void gaudi_sob_group_reset_error(struct kref *ref)
1131 {
1132 struct gaudi_hw_sob_group *hw_sob_group =
1133 container_of(ref, struct gaudi_hw_sob_group, kref);
1134 struct hl_device *hdev = hw_sob_group->hdev;
1135
1136 dev_crit(hdev->dev,
1137 "SOB release shouldn't be called here, base_sob_id: %d\n",
1138 hw_sob_group->base_sob_id);
1139 }
1140
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1141 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1142 {
1143 struct gaudi_collective_properties *prop;
1144 int i;
1145
1146 prop = &gaudi->collective_props;
1147
1148 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1149
1150 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1151 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1152 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1153 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1154 /* Set collective engine bit */
1155 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1156 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1157 }
1158
gaudi_collective_init(struct hl_device * hdev)1159 static int gaudi_collective_init(struct hl_device *hdev)
1160 {
1161 u32 i, sob_id, reserved_sobs_per_group;
1162 struct gaudi_collective_properties *prop;
1163 struct gaudi_device *gaudi;
1164
1165 gaudi = hdev->asic_specific;
1166 prop = &gaudi->collective_props;
1167 sob_id = hdev->asic_prop.collective_first_sob;
1168
1169 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1170 reserved_sobs_per_group =
1171 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1172
1173 /* Init SOB groups */
1174 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1175 prop->hw_sob_group[i].hdev = hdev;
1176 prop->hw_sob_group[i].base_sob_id = sob_id;
1177 sob_id += reserved_sobs_per_group;
1178 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1179 }
1180
1181 for (i = 0 ; i < QMAN_STREAMS; i++) {
1182 prop->next_sob_group_val[i] = 1;
1183 prop->curr_sob_group_idx[i] = 0;
1184 gaudi_collective_map_sobs(hdev, i);
1185 }
1186
1187 gaudi_collective_mstr_sob_mask_set(gaudi);
1188
1189 return 0;
1190 }
1191
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1192 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1193 {
1194 struct gaudi_device *gaudi = hdev->asic_specific;
1195 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1196
1197 kref_put(&cprop->hw_sob_group[sob_group].kref,
1198 gaudi_sob_group_hw_reset);
1199 }
1200
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1201 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1202 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1203 {
1204 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1205 struct gaudi_collective_properties *cprop;
1206 struct hl_gen_wait_properties wait_prop;
1207 struct hl_sync_stream_properties *prop;
1208 struct gaudi_device *gaudi;
1209
1210 gaudi = hdev->asic_specific;
1211 cprop = &gaudi->collective_props;
1212 queue_id = job->hw_queue_id;
1213 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1214
1215 master_sob_base =
1216 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1217 master_monitor = prop->collective_mstr_mon_id[0];
1218
1219 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1220
1221 dev_dbg(hdev->dev,
1222 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1223 master_sob_base, cprop->mstr_sob_mask[0],
1224 cprop->next_sob_group_val[stream],
1225 master_monitor, queue_id);
1226
1227 wait_prop.data = (void *) job->patched_cb;
1228 wait_prop.sob_base = master_sob_base;
1229 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1230 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1231 wait_prop.mon_id = master_monitor;
1232 wait_prop.q_idx = queue_id;
1233 wait_prop.size = cb_size;
1234 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1235
1236 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1237 master_monitor = prop->collective_mstr_mon_id[1];
1238
1239 dev_dbg(hdev->dev,
1240 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1241 master_sob_base, cprop->mstr_sob_mask[1],
1242 cprop->next_sob_group_val[stream],
1243 master_monitor, queue_id);
1244
1245 wait_prop.sob_base = master_sob_base;
1246 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1247 wait_prop.mon_id = master_monitor;
1248 wait_prop.size = cb_size;
1249 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1250 }
1251
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1252 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1253 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1254 {
1255 struct hl_gen_wait_properties wait_prop;
1256 struct hl_sync_stream_properties *prop;
1257 u32 queue_id, cb_size = 0;
1258
1259 queue_id = job->hw_queue_id;
1260 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1261
1262 if (job->cs->encaps_signals) {
1263 /* use the encaps signal handle store earlier in the flow
1264 * and set the SOB information from the encaps
1265 * signals handle
1266 */
1267 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1268 cs_cmpl);
1269
1270 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1271 job->cs->sequence,
1272 cs_cmpl->hw_sob->sob_id,
1273 cs_cmpl->sob_val);
1274 }
1275
1276 /* Add to wait CBs using slave monitor */
1277 wait_prop.data = (void *) job->user_cb;
1278 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1279 wait_prop.sob_mask = 0x1;
1280 wait_prop.sob_val = cs_cmpl->sob_val;
1281 wait_prop.mon_id = prop->collective_slave_mon_id;
1282 wait_prop.q_idx = queue_id;
1283 wait_prop.size = cb_size;
1284
1285 dev_dbg(hdev->dev,
1286 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1287 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1288 prop->collective_slave_mon_id, queue_id);
1289
1290 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1291
1292 dev_dbg(hdev->dev,
1293 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1294 prop->collective_sob_id, queue_id);
1295
1296 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1297 prop->collective_sob_id, cb_size, false);
1298 }
1299
gaudi_collective_wait_init_cs(struct hl_cs * cs)1300 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1301 {
1302 struct hl_cs_compl *signal_cs_cmpl =
1303 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1304 struct hl_cs_compl *cs_cmpl =
1305 container_of(cs->fence, struct hl_cs_compl, base_fence);
1306 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1307 struct gaudi_collective_properties *cprop;
1308 u32 stream, queue_id, sob_group_offset;
1309 struct gaudi_device *gaudi;
1310 struct hl_device *hdev;
1311 struct hl_cs_job *job;
1312 struct hl_ctx *ctx;
1313
1314 ctx = cs->ctx;
1315 hdev = ctx->hdev;
1316 gaudi = hdev->asic_specific;
1317 cprop = &gaudi->collective_props;
1318
1319 if (cs->encaps_signals) {
1320 cs_cmpl->hw_sob = handle->hw_sob;
1321 /* at this checkpoint we only need the hw_sob pointer
1322 * for the completion check before start going over the jobs
1323 * of the master/slaves, the sob_value will be taken later on
1324 * in gaudi_collective_slave_init_job depends on each
1325 * job wait offset value.
1326 */
1327 cs_cmpl->sob_val = 0;
1328 } else {
1329 /* copy the SOB id and value of the signal CS */
1330 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1331 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1332 }
1333
1334 /* check again if the signal cs already completed.
1335 * if yes then don't send any wait cs since the hw_sob
1336 * could be in reset already. if signal is not completed
1337 * then get refcount to hw_sob to prevent resetting the sob
1338 * while wait cs is not submitted.
1339 * note that this check is protected by two locks,
1340 * hw queue lock and completion object lock,
1341 * and the same completion object lock also protects
1342 * the hw_sob reset handler function.
1343 * The hw_queue lock prevent out of sync of hw_sob
1344 * refcount value, changed by signal/wait flows.
1345 */
1346 spin_lock(&signal_cs_cmpl->lock);
1347
1348 if (completion_done(&cs->signal_fence->completion)) {
1349 spin_unlock(&signal_cs_cmpl->lock);
1350 return -EINVAL;
1351 }
1352 /* Increment kref since all slave queues are now waiting on it */
1353 kref_get(&cs_cmpl->hw_sob->kref);
1354
1355 spin_unlock(&signal_cs_cmpl->lock);
1356
1357 /* Calculate the stream from collective master queue (1st job) */
1358 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1359 stream = job->hw_queue_id % 4;
1360 sob_group_offset =
1361 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1362
1363 list_for_each_entry(job, &cs->job_list, cs_node) {
1364 queue_id = job->hw_queue_id;
1365
1366 if (hdev->kernel_queues[queue_id].collective_mode ==
1367 HL_COLLECTIVE_MASTER)
1368 gaudi_collective_master_init_job(hdev, job, stream,
1369 sob_group_offset);
1370 else
1371 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1372 }
1373
1374 cs_cmpl->sob_group = sob_group_offset;
1375
1376 /* Handle sob group kref and wraparound */
1377 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1378 cprop->next_sob_group_val[stream]++;
1379
1380 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1381 /*
1382 * Decrement as we reached the max value.
1383 * The release function won't be called here as we've
1384 * just incremented the refcount.
1385 */
1386 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1387 gaudi_sob_group_reset_error);
1388 cprop->next_sob_group_val[stream] = 1;
1389 /* only two SOBs are currently in use */
1390 cprop->curr_sob_group_idx[stream] =
1391 (cprop->curr_sob_group_idx[stream] + 1) &
1392 (HL_RSVD_SOBS - 1);
1393
1394 gaudi_collective_map_sobs(hdev, stream);
1395
1396 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1397 cprop->curr_sob_group_idx[stream], stream);
1398 }
1399
1400 mb();
1401 hl_fence_put(cs->signal_fence);
1402 cs->signal_fence = NULL;
1403
1404 return 0;
1405 }
1406
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1407 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1408 {
1409 u32 cacheline_end, additional_commands;
1410
1411 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1412 additional_commands = sizeof(struct packet_msg_prot) * 2;
1413
1414 if (user_cb_size + additional_commands > cacheline_end)
1415 return cacheline_end - user_cb_size + additional_commands;
1416 else
1417 return additional_commands;
1418 }
1419
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1420 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1421 struct hl_ctx *ctx, struct hl_cs *cs,
1422 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1423 u32 encaps_signal_offset)
1424 {
1425 struct hw_queue_properties *hw_queue_prop;
1426 struct hl_cs_counters_atomic *cntr;
1427 struct hl_cs_job *job;
1428 struct hl_cb *cb;
1429 u32 cb_size;
1430 bool patched_cb;
1431
1432 cntr = &hdev->aggregated_cs_counters;
1433
1434 if (mode == HL_COLLECTIVE_MASTER) {
1435 /* CB size of collective master queue contains
1436 * 4 msg short packets for monitor 1 configuration
1437 * 1 fence packet
1438 * 4 msg short packets for monitor 2 configuration
1439 * 1 fence packet
1440 * 2 msg prot packets for completion and MSI
1441 */
1442 cb_size = sizeof(struct packet_msg_short) * 8 +
1443 sizeof(struct packet_fence) * 2 +
1444 sizeof(struct packet_msg_prot) * 2;
1445 patched_cb = true;
1446 } else {
1447 /* CB size of collective slave queues contains
1448 * 4 msg short packets for monitor configuration
1449 * 1 fence packet
1450 * 1 additional msg short packet for sob signal
1451 */
1452 cb_size = sizeof(struct packet_msg_short) * 5 +
1453 sizeof(struct packet_fence);
1454 patched_cb = false;
1455 }
1456
1457 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1458 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1459 if (!job) {
1460 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1461 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1462 dev_err(hdev->dev, "Failed to allocate a new job\n");
1463 return -ENOMEM;
1464 }
1465
1466 /* Allocate internal mapped CB for non patched CBs */
1467 cb = hl_cb_kernel_create(hdev, cb_size,
1468 hdev->mmu_enable && !patched_cb);
1469 if (!cb) {
1470 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1471 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1472 kfree(job);
1473 return -EFAULT;
1474 }
1475
1476 job->id = 0;
1477 job->cs = cs;
1478 job->user_cb = cb;
1479 atomic_inc(&job->user_cb->cs_cnt);
1480 job->user_cb_size = cb_size;
1481 job->hw_queue_id = queue_id;
1482
1483 /* since its guaranteed to have only one chunk in the collective wait
1484 * cs, we can use this chunk to set the encapsulated signal offset
1485 * in the jobs.
1486 */
1487 if (cs->encaps_signals)
1488 job->encaps_sig_wait_offset = encaps_signal_offset;
1489
1490 /*
1491 * No need in parsing, user CB is the patched CB.
1492 * We call hl_cb_destroy() out of two reasons - we don't need
1493 * the CB in the CB idr anymore and to decrement its refcount as
1494 * it was incremented inside hl_cb_kernel_create().
1495 */
1496 if (patched_cb)
1497 job->patched_cb = job->user_cb;
1498 else
1499 job->patched_cb = NULL;
1500
1501 job->job_cb_size = job->user_cb_size;
1502 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1503
1504 /* increment refcount as for external queues we get completion */
1505 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1506 cs_get(cs);
1507
1508 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1509
1510 list_add_tail(&job->cs_node, &cs->job_list);
1511
1512 hl_debugfs_add_job(hdev, job);
1513
1514 return 0;
1515 }
1516
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1517 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1518 struct hl_ctx *ctx, struct hl_cs *cs,
1519 u32 wait_queue_id, u32 collective_engine_id,
1520 u32 encaps_signal_offset)
1521 {
1522 struct gaudi_device *gaudi = hdev->asic_specific;
1523 struct hw_queue_properties *hw_queue_prop;
1524 u32 queue_id, collective_queue, num_jobs;
1525 u32 stream, nic_queue, nic_idx = 0;
1526 bool skip;
1527 int i, rc = 0;
1528
1529 /* Verify wait queue id is configured as master */
1530 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1531 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1532 dev_err(hdev->dev,
1533 "Queue %d is not configured as collective master\n",
1534 wait_queue_id);
1535 return -EINVAL;
1536 }
1537
1538 /* Verify engine id is supported */
1539 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1540 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1541 dev_err(hdev->dev,
1542 "Collective wait does not support engine %u\n",
1543 collective_engine_id);
1544 return -EINVAL;
1545 }
1546
1547 stream = wait_queue_id % 4;
1548
1549 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1550 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1551 else
1552 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1553
1554 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1555 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1556
1557 /* First job goes to the collective master queue, it will wait for
1558 * the collective slave queues to finish execution.
1559 * The synchronization is done using two monitors:
1560 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1561 * reduction engine (DMA5/TPC7).
1562 *
1563 * Rest of the jobs goes to the collective slave queues which will
1564 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1565 */
1566 for (i = 0 ; i < num_jobs ; i++) {
1567 if (i == 0) {
1568 queue_id = wait_queue_id;
1569 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1570 HL_COLLECTIVE_MASTER, queue_id,
1571 wait_queue_id, encaps_signal_offset);
1572 } else {
1573 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1574 if (gaudi->hw_cap_initialized &
1575 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1576 skip = false;
1577 else
1578 skip = true;
1579
1580 queue_id = nic_queue;
1581 nic_queue += 4;
1582 nic_idx++;
1583
1584 if (skip)
1585 continue;
1586 } else {
1587 queue_id = collective_queue;
1588 }
1589
1590 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1591 HL_COLLECTIVE_SLAVE, queue_id,
1592 wait_queue_id, encaps_signal_offset);
1593 }
1594
1595 if (rc)
1596 return rc;
1597 }
1598
1599 return rc;
1600 }
1601
gaudi_late_init(struct hl_device * hdev)1602 static int gaudi_late_init(struct hl_device *hdev)
1603 {
1604 struct gaudi_device *gaudi = hdev->asic_specific;
1605 int rc;
1606
1607 rc = gaudi->cpucp_info_get(hdev);
1608 if (rc) {
1609 dev_err(hdev->dev, "Failed to get cpucp info\n");
1610 return rc;
1611 }
1612
1613 if ((hdev->card_type == cpucp_card_type_pci) &&
1614 (hdev->nic_ports_mask & 0x3)) {
1615 dev_info(hdev->dev,
1616 "PCI card detected, only 8 ports are enabled\n");
1617 hdev->nic_ports_mask &= ~0x3;
1618
1619 /* Stop and disable unused NIC QMANs */
1620 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1621 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1622 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1623
1624 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627
1628 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1629 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1630
1631 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1632 }
1633
1634 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1635 if (rc) {
1636 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1637 return rc;
1638 }
1639
1640 /* Scrub both SRAM and DRAM */
1641 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1642 if (rc)
1643 goto disable_pci_access;
1644
1645 rc = gaudi_fetch_psoc_frequency(hdev);
1646 if (rc) {
1647 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1648 goto disable_pci_access;
1649 }
1650
1651 rc = gaudi_mmu_clear_pgt_range(hdev);
1652 if (rc) {
1653 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1654 goto disable_pci_access;
1655 }
1656
1657 rc = gaudi_init_tpc_mem(hdev);
1658 if (rc) {
1659 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1660 goto disable_pci_access;
1661 }
1662
1663 rc = gaudi_collective_init(hdev);
1664 if (rc) {
1665 dev_err(hdev->dev, "Failed to init collective\n");
1666 goto disable_pci_access;
1667 }
1668
1669 /* We only support a single ASID for the user, so for the sake of optimization, just
1670 * initialize the ASID one time during device initialization with the fixed value of 1
1671 */
1672 gaudi_mmu_prepare(hdev, 1);
1673
1674 hl_fw_set_pll_profile(hdev);
1675
1676 return 0;
1677
1678 disable_pci_access:
1679 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1680
1681 return rc;
1682 }
1683
gaudi_late_fini(struct hl_device * hdev)1684 static void gaudi_late_fini(struct hl_device *hdev)
1685 {
1686 hl_hwmon_release_resources(hdev);
1687 }
1688
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1689 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1690 {
1691 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1692 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1693 int i, j, rc = 0;
1694
1695 /*
1696 * The device CPU works with 40-bits addresses, while bit 39 must be set
1697 * to '1' when accessing the host.
1698 * Bits 49:39 of the full host address are saved for a later
1699 * configuration of the HW to perform extension to 50 bits.
1700 * Because there is a single HW register that holds the extension bits,
1701 * these bits must be identical in all allocated range.
1702 */
1703
1704 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1705 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1706 &dma_addr_arr[i],
1707 GFP_KERNEL | __GFP_ZERO);
1708 if (!virt_addr_arr[i]) {
1709 rc = -ENOMEM;
1710 goto free_dma_mem_arr;
1711 }
1712
1713 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1714 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1715 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1716 break;
1717 }
1718
1719 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1720 dev_err(hdev->dev,
1721 "MSB of CPU accessible DMA memory are not identical in all range\n");
1722 rc = -EFAULT;
1723 goto free_dma_mem_arr;
1724 }
1725
1726 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1727 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1728 hdev->cpu_pci_msb_addr =
1729 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1730
1731 if (!hdev->asic_prop.fw_security_enabled)
1732 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1733
1734 free_dma_mem_arr:
1735 for (j = 0 ; j < i ; j++)
1736 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1737 dma_addr_arr[j]);
1738
1739 return rc;
1740 }
1741
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1742 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1743 {
1744 struct gaudi_device *gaudi = hdev->asic_specific;
1745 struct gaudi_internal_qman_info *q;
1746 u32 i;
1747
1748 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1749 q = &gaudi->internal_qmans[i];
1750 if (!q->pq_kernel_addr)
1751 continue;
1752 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1753 }
1754 }
1755
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1756 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1757 {
1758 struct gaudi_device *gaudi = hdev->asic_specific;
1759 struct gaudi_internal_qman_info *q;
1760 int rc, i;
1761
1762 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1763 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1764 continue;
1765
1766 q = &gaudi->internal_qmans[i];
1767
1768 switch (i) {
1769 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1770 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1771 break;
1772 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1773 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1774 break;
1775 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1776 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1777 break;
1778 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1779 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1780 break;
1781 default:
1782 dev_err(hdev->dev, "Bad internal queue index %d", i);
1783 rc = -EINVAL;
1784 goto free_internal_qmans_pq_mem;
1785 }
1786
1787 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1788 GFP_KERNEL | __GFP_ZERO);
1789 if (!q->pq_kernel_addr) {
1790 rc = -ENOMEM;
1791 goto free_internal_qmans_pq_mem;
1792 }
1793 }
1794
1795 return 0;
1796
1797 free_internal_qmans_pq_mem:
1798 gaudi_free_internal_qmans_pq_mem(hdev);
1799 return rc;
1800 }
1801
gaudi_set_pci_memory_regions(struct hl_device * hdev)1802 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1803 {
1804 struct asic_fixed_properties *prop = &hdev->asic_prop;
1805 struct pci_mem_region *region;
1806
1807 /* CFG */
1808 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1809 region->region_base = CFG_BASE;
1810 region->region_size = CFG_SIZE;
1811 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1812 region->bar_size = CFG_BAR_SIZE;
1813 region->bar_id = CFG_BAR_ID;
1814 region->used = 1;
1815
1816 /* SRAM */
1817 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1818 region->region_base = SRAM_BASE_ADDR;
1819 region->region_size = SRAM_SIZE;
1820 region->offset_in_bar = 0;
1821 region->bar_size = SRAM_BAR_SIZE;
1822 region->bar_id = SRAM_BAR_ID;
1823 region->used = 1;
1824
1825 /* DRAM */
1826 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1827 region->region_base = DRAM_PHYS_BASE;
1828 region->region_size = hdev->asic_prop.dram_size;
1829 region->offset_in_bar = 0;
1830 region->bar_size = prop->dram_pci_bar_size;
1831 region->bar_id = HBM_BAR_ID;
1832 region->used = 1;
1833
1834 /* SP SRAM */
1835 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1836 region->region_base = PSOC_SCRATCHPAD_ADDR;
1837 region->region_size = PSOC_SCRATCHPAD_SIZE;
1838 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1839 region->bar_size = CFG_BAR_SIZE;
1840 region->bar_id = CFG_BAR_ID;
1841 region->used = 1;
1842 }
1843
gaudi_sw_init(struct hl_device * hdev)1844 static int gaudi_sw_init(struct hl_device *hdev)
1845 {
1846 struct gaudi_device *gaudi;
1847 u32 i, event_id = 0;
1848 int rc;
1849
1850 /* Allocate device structure */
1851 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1852 if (!gaudi)
1853 return -ENOMEM;
1854
1855 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1856 if (gaudi_irq_map_table[i].valid) {
1857 if (event_id == GAUDI_EVENT_SIZE) {
1858 dev_err(hdev->dev,
1859 "Event array exceeds the limit of %u events\n",
1860 GAUDI_EVENT_SIZE);
1861 rc = -EINVAL;
1862 goto free_gaudi_device;
1863 }
1864
1865 gaudi->events[event_id++] =
1866 gaudi_irq_map_table[i].fc_id;
1867 }
1868 }
1869
1870 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1871
1872 hdev->asic_specific = gaudi;
1873
1874 /* Create DMA pool for small allocations */
1875 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1876 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1877 if (!hdev->dma_pool) {
1878 dev_err(hdev->dev, "failed to create DMA pool\n");
1879 rc = -ENOMEM;
1880 goto free_gaudi_device;
1881 }
1882
1883 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1884 if (rc)
1885 goto free_dma_pool;
1886
1887 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1888 if (!hdev->cpu_accessible_dma_pool) {
1889 dev_err(hdev->dev,
1890 "Failed to create CPU accessible DMA pool\n");
1891 rc = -ENOMEM;
1892 goto free_cpu_dma_mem;
1893 }
1894
1895 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1896 (uintptr_t) hdev->cpu_accessible_dma_mem,
1897 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1898 if (rc) {
1899 dev_err(hdev->dev,
1900 "Failed to add memory to CPU accessible DMA pool\n");
1901 rc = -EFAULT;
1902 goto free_cpu_accessible_dma_pool;
1903 }
1904
1905 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1906 if (rc)
1907 goto free_cpu_accessible_dma_pool;
1908
1909 spin_lock_init(&gaudi->hw_queues_lock);
1910
1911 hdev->supports_sync_stream = true;
1912 hdev->supports_coresight = true;
1913 hdev->supports_staged_submission = true;
1914 hdev->supports_wait_for_multi_cs = true;
1915
1916 hdev->asic_funcs->set_pci_memory_regions(hdev);
1917 hdev->stream_master_qid_arr =
1918 hdev->asic_funcs->get_stream_master_qid_arr();
1919 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1920
1921 return 0;
1922
1923 free_cpu_accessible_dma_pool:
1924 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1925 free_cpu_dma_mem:
1926 if (!hdev->asic_prop.fw_security_enabled)
1927 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1928 hdev->cpu_pci_msb_addr);
1929 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1930 hdev->cpu_accessible_dma_address);
1931 free_dma_pool:
1932 dma_pool_destroy(hdev->dma_pool);
1933 free_gaudi_device:
1934 kfree(gaudi);
1935 return rc;
1936 }
1937
gaudi_sw_fini(struct hl_device * hdev)1938 static int gaudi_sw_fini(struct hl_device *hdev)
1939 {
1940 struct gaudi_device *gaudi = hdev->asic_specific;
1941
1942 gaudi_free_internal_qmans_pq_mem(hdev);
1943
1944 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1945
1946 if (!hdev->asic_prop.fw_security_enabled)
1947 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1948 hdev->cpu_pci_msb_addr);
1949
1950 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1951 hdev->cpu_accessible_dma_address);
1952
1953 dma_pool_destroy(hdev->dma_pool);
1954
1955 kfree(gaudi);
1956
1957 return 0;
1958 }
1959
gaudi_irq_handler_single(int irq,void * arg)1960 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1961 {
1962 struct hl_device *hdev = arg;
1963 int i;
1964
1965 if (hdev->disabled)
1966 return IRQ_HANDLED;
1967
1968 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1969 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1970
1971 hl_irq_handler_eq(irq, &hdev->event_queue);
1972
1973 return IRQ_HANDLED;
1974 }
1975
1976 /*
1977 * For backward compatibility, new MSI interrupts should be set after the
1978 * existing CPU and NIC interrupts.
1979 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1980 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1981 bool cpu_eq)
1982 {
1983 int msi_vec;
1984
1985 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1986 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1987 GAUDI_EVENT_QUEUE_MSI_IDX);
1988
1989 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1990 (nr + NIC_NUMBER_OF_ENGINES + 1);
1991
1992 return pci_irq_vector(hdev->pdev, msi_vec);
1993 }
1994
gaudi_enable_msi_single(struct hl_device * hdev)1995 static int gaudi_enable_msi_single(struct hl_device *hdev)
1996 {
1997 int rc, irq;
1998
1999 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2000
2001 irq = gaudi_pci_irq_vector(hdev, 0, false);
2002 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2003 "gaudi single msi", hdev);
2004 if (rc)
2005 dev_err(hdev->dev,
2006 "Failed to request single MSI IRQ\n");
2007
2008 return rc;
2009 }
2010
gaudi_enable_msi_multi(struct hl_device * hdev)2011 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2012 {
2013 int cq_cnt = hdev->asic_prop.completion_queues_count;
2014 int rc, i, irq_cnt_init, irq;
2015
2016 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2017 irq = gaudi_pci_irq_vector(hdev, i, false);
2018 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2019 &hdev->completion_queue[i]);
2020 if (rc) {
2021 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2022 goto free_irqs;
2023 }
2024 }
2025
2026 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2027 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2028 &hdev->event_queue);
2029 if (rc) {
2030 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2031 goto free_irqs;
2032 }
2033
2034 return 0;
2035
2036 free_irqs:
2037 for (i = 0 ; i < irq_cnt_init ; i++)
2038 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2039 &hdev->completion_queue[i]);
2040 return rc;
2041 }
2042
gaudi_enable_msi(struct hl_device * hdev)2043 static int gaudi_enable_msi(struct hl_device *hdev)
2044 {
2045 struct gaudi_device *gaudi = hdev->asic_specific;
2046 int rc;
2047
2048 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2049 return 0;
2050
2051 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2052 if (rc < 0) {
2053 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2054 return rc;
2055 }
2056
2057 if (rc < NUMBER_OF_INTERRUPTS) {
2058 gaudi->multi_msi_mode = false;
2059 rc = gaudi_enable_msi_single(hdev);
2060 } else {
2061 gaudi->multi_msi_mode = true;
2062 rc = gaudi_enable_msi_multi(hdev);
2063 }
2064
2065 if (rc)
2066 goto free_pci_irq_vectors;
2067
2068 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2069
2070 return 0;
2071
2072 free_pci_irq_vectors:
2073 pci_free_irq_vectors(hdev->pdev);
2074 return rc;
2075 }
2076
gaudi_sync_irqs(struct hl_device * hdev)2077 static void gaudi_sync_irqs(struct hl_device *hdev)
2078 {
2079 struct gaudi_device *gaudi = hdev->asic_specific;
2080 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2081
2082 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2083 return;
2084
2085 /* Wait for all pending IRQs to be finished */
2086 if (gaudi->multi_msi_mode) {
2087 for (i = 0 ; i < cq_cnt ; i++)
2088 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2089
2090 synchronize_irq(gaudi_pci_irq_vector(hdev,
2091 GAUDI_EVENT_QUEUE_MSI_IDX,
2092 true));
2093 } else {
2094 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2095 }
2096 }
2097
gaudi_disable_msi(struct hl_device * hdev)2098 static void gaudi_disable_msi(struct hl_device *hdev)
2099 {
2100 struct gaudi_device *gaudi = hdev->asic_specific;
2101 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2102
2103 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2104 return;
2105
2106 gaudi_sync_irqs(hdev);
2107
2108 if (gaudi->multi_msi_mode) {
2109 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2110 true);
2111 free_irq(irq, &hdev->event_queue);
2112
2113 for (i = 0 ; i < cq_cnt ; i++) {
2114 irq = gaudi_pci_irq_vector(hdev, i, false);
2115 free_irq(irq, &hdev->completion_queue[i]);
2116 }
2117 } else {
2118 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2119 }
2120
2121 pci_free_irq_vectors(hdev->pdev);
2122
2123 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2124 }
2125
gaudi_init_scrambler_sram(struct hl_device * hdev)2126 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2127 {
2128 struct gaudi_device *gaudi = hdev->asic_specific;
2129
2130 if (hdev->asic_prop.fw_security_enabled)
2131 return;
2132
2133 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2134 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2135 return;
2136
2137 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2138 return;
2139
2140 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2141 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2142 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2143 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2145 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2147 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2149 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2151 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2153 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2155 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156
2157 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2172 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173
2174 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2175 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2176 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2177 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2179 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2181 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2183 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2185 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2189 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190
2191 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2192 }
2193
gaudi_init_scrambler_hbm(struct hl_device * hdev)2194 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2195 {
2196 struct gaudi_device *gaudi = hdev->asic_specific;
2197
2198 if (hdev->asic_prop.fw_security_enabled)
2199 return;
2200
2201 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2202 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2203 return;
2204
2205 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2206 return;
2207
2208 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224
2225 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2240 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241
2242 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2243 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2244 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2245 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2247 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2249 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2251 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2253 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2255 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2257 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258
2259 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2260 }
2261
gaudi_init_e2e(struct hl_device * hdev)2262 static void gaudi_init_e2e(struct hl_device *hdev)
2263 {
2264 if (hdev->asic_prop.fw_security_enabled)
2265 return;
2266
2267 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2268 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2269 return;
2270
2271 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2272 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2273 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2274 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2275
2276 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2277 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2278 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2279 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2280
2281 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2282 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2283 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2284 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2285
2286 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2287 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2288 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2289 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2290
2291 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2292 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2293 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2294 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2295
2296 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2297 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2298 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2299 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2300
2301 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2302 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2303 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2304 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2305
2306 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2307 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2308 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2309 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2310
2311 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2312 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2313 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2314 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2315
2316 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2317 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2318 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2319 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2320
2321 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2322 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2323 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2324 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2325
2326 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2327 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2328 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2329 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2330
2331 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2332 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2333 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2334 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2335
2336 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2337 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2338 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2339 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2340
2341 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2342 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2343 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2344 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2345
2346 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2347 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2348 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2349 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2350
2351 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2352 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2354 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2355
2356 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2357 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2359 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2360
2361 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2362 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2364 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2365
2366 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2367 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2369 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2370
2371 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2372 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2374 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2375
2376 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2377 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2379 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2380
2381 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2382 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2384 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2385
2386 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2387 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2389 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2390
2391 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2392 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2393 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2394 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2395
2396 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2397 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2398 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2399 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2400
2401 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2402 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2403 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2404 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2405
2406 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2407 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2408 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2409 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410
2411 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2412 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2413 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2414 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415
2416 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2417 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2418 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2419 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2420
2421 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2422 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2423 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2424 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2425
2426 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2427 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2428 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2429 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2430
2431 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2432 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2433 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2434 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2435
2436 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2437 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2438 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2439 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2440
2441 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2442 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2443 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2444 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2445
2446 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2447 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2448 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2449 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2450
2451 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2452 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2453 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2454 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2455
2456 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2457 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2458 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2459 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2460
2461 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2462 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2463 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2464 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2465
2466 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2467 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2468 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2469 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2470
2471 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2472 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2473 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2474 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2475
2476 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2477 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2478 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2479 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2480
2481 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2482 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2483 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2484 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2485
2486 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2487 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2488 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2489 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2490
2491 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2492 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2493 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2494 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2495
2496 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2497 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2498 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2499 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2500
2501 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2502 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2503 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2504 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2505
2506 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2507 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2508 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2509 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2510 }
2511
gaudi_init_hbm_cred(struct hl_device * hdev)2512 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2513 {
2514 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2515
2516 if (hdev->asic_prop.fw_security_enabled)
2517 return;
2518
2519 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2520 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2521 return;
2522
2523 hbm0_wr = 0x33333333;
2524 hbm0_rd = 0x77777777;
2525 hbm1_wr = 0x55555555;
2526 hbm1_rd = 0xDDDDDDDD;
2527
2528 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2529 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2530 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2531 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2532
2533 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2534 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2535 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2536 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2537
2538 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2539 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2540 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2541 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2542
2543 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2544 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2545 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2546 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2547
2548 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2549 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2550 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2551 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2552 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2553 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2554 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2555 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2556 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2557 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2558 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2559 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2560
2561 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2562 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2563 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2564 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2565 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2566 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2567 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2568 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2569 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2570 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2571 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2572 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2573 }
2574
gaudi_init_golden_registers(struct hl_device * hdev)2575 static void gaudi_init_golden_registers(struct hl_device *hdev)
2576 {
2577 u32 tpc_offset;
2578 int tpc_id, i;
2579
2580 gaudi_init_e2e(hdev);
2581 gaudi_init_hbm_cred(hdev);
2582
2583 for (tpc_id = 0, tpc_offset = 0;
2584 tpc_id < TPC_NUMBER_OF_ENGINES;
2585 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2586 /* Mask all arithmetic interrupts from TPC */
2587 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2588 /* Set 16 cache lines */
2589 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2590 ICACHE_FETCH_LINE_NUM, 2);
2591 }
2592
2593 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2594 for (i = 0 ; i < 128 ; i += 8)
2595 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2596
2597 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2598 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2599 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2600 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2601 }
2602
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2603 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2604 int qman_id, dma_addr_t qman_pq_addr)
2605 {
2606 struct cpu_dyn_regs *dyn_regs =
2607 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2608 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2609 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2610 u32 q_off, dma_qm_offset;
2611 u32 dma_qm_err_cfg, irq_handler_offset;
2612
2613 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2614
2615 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2616 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2617 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2618 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2619 so_base_en_lo = lower_32_bits(CFG_BASE +
2620 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2621 so_base_en_hi = upper_32_bits(CFG_BASE +
2622 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2623 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2624 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2625 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2626 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2627 so_base_ws_lo = lower_32_bits(CFG_BASE +
2628 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2629 so_base_ws_hi = upper_32_bits(CFG_BASE +
2630 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631
2632 q_off = dma_qm_offset + qman_id * 4;
2633
2634 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2635 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2636
2637 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2638 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2639 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2640
2641 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2642 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2643 QMAN_LDMA_SRC_OFFSET);
2644 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2645 QMAN_LDMA_DST_OFFSET);
2646
2647 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2648 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2649 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2650 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2651 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2652 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2653 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2654 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2655
2656 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2657
2658 /* The following configuration is needed only once per QMAN */
2659 if (qman_id == 0) {
2660 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2661 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2662 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2663
2664 /* Configure RAZWI IRQ */
2665 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2666 if (hdev->stop_on_err)
2667 dma_qm_err_cfg |=
2668 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2669
2670 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2671
2672 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2673 lower_32_bits(CFG_BASE + irq_handler_offset));
2674 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2675 upper_32_bits(CFG_BASE + irq_handler_offset));
2676
2677 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2678 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2679 dma_id);
2680
2681 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2682 QM_ARB_ERR_MSG_EN_MASK);
2683
2684 /* Set timeout to maximum */
2685 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2686
2687 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2688 QMAN_EXTERNAL_MAKE_TRUSTED);
2689
2690 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2691 }
2692 }
2693
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2694 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2695 {
2696 struct cpu_dyn_regs *dyn_regs =
2697 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2698 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2699 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2700 u32 irq_handler_offset;
2701
2702 /* Set to maximum possible according to physical size */
2703 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2704 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2705
2706 /* WA for H/W bug H3-2116 */
2707 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2708
2709 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2710 if (hdev->stop_on_err)
2711 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2712
2713 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2714
2715 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2716 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2717 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2718
2719 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2720 lower_32_bits(CFG_BASE + irq_handler_offset));
2721 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2722 upper_32_bits(CFG_BASE + irq_handler_offset));
2723
2724 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2725 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2726 WREG32(mmDMA0_CORE_PROT + dma_offset,
2727 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2728 /* If the channel is secured, it should be in MMU bypass mode */
2729 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2730 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2731 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2732 }
2733
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2734 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2735 u32 enable_mask)
2736 {
2737 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738
2739 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2740 }
2741
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2742 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2743 {
2744 struct gaudi_device *gaudi = hdev->asic_specific;
2745 struct hl_hw_queue *q;
2746 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2747
2748 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2749 return;
2750
2751 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2752 dma_id = gaudi_dma_assignment[i];
2753 /*
2754 * For queues after the CPU Q need to add 1 to get the correct
2755 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2756 * order to get the correct MSI register.
2757 */
2758 if (dma_id > 1) {
2759 cpu_skip = 1;
2760 nic_skip = NIC_NUMBER_OF_ENGINES;
2761 } else {
2762 cpu_skip = 0;
2763 nic_skip = 0;
2764 }
2765
2766 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2767 q_idx = 4 * dma_id + j + cpu_skip;
2768 q = &hdev->kernel_queues[q_idx];
2769 q->cq_id = cq_id++;
2770 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2771 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2772 q->bus_address);
2773 }
2774
2775 gaudi_init_dma_core(hdev, dma_id);
2776
2777 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2778 }
2779
2780 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2781 }
2782
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2783 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2784 int qman_id, u64 qman_base_addr)
2785 {
2786 struct cpu_dyn_regs *dyn_regs =
2787 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2788 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2789 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2790 u32 dma_qm_err_cfg, irq_handler_offset;
2791 u32 q_off, dma_qm_offset;
2792
2793 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2794
2795 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2796 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2797 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2798 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2799 so_base_en_lo = lower_32_bits(CFG_BASE +
2800 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2801 so_base_en_hi = upper_32_bits(CFG_BASE +
2802 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2803 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2804 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2805 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2806 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2807 so_base_ws_lo = lower_32_bits(CFG_BASE +
2808 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2809 so_base_ws_hi = upper_32_bits(CFG_BASE +
2810 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811
2812 q_off = dma_qm_offset + qman_id * 4;
2813
2814 if (qman_id < 4) {
2815 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2816 lower_32_bits(qman_base_addr));
2817 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2818 upper_32_bits(qman_base_addr));
2819
2820 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2821 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2822 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2823
2824 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2825 QMAN_CPDMA_SIZE_OFFSET);
2826 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2827 QMAN_CPDMA_SRC_OFFSET);
2828 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2829 QMAN_CPDMA_DST_OFFSET);
2830 } else {
2831 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2832 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2833 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2834
2835 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2836 QMAN_LDMA_SIZE_OFFSET);
2837 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2838 QMAN_LDMA_SRC_OFFSET);
2839 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2840 QMAN_LDMA_DST_OFFSET);
2841
2842 /* Configure RAZWI IRQ */
2843 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2844 if (hdev->stop_on_err)
2845 dma_qm_err_cfg |=
2846 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2847
2848 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2849
2850 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2851 lower_32_bits(CFG_BASE + irq_handler_offset));
2852 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2853 upper_32_bits(CFG_BASE + irq_handler_offset));
2854
2855 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2856 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2857 dma_id);
2858
2859 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2860 QM_ARB_ERR_MSG_EN_MASK);
2861
2862 /* Set timeout to maximum */
2863 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2864
2865 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2866 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2867 QMAN_INTERNAL_MAKE_TRUSTED);
2868 }
2869
2870 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2871 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2872 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2873 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2874
2875 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2876 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2877 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2878 mtr_base_ws_lo);
2879 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2880 mtr_base_ws_hi);
2881 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2882 so_base_ws_lo);
2883 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2884 so_base_ws_hi);
2885 }
2886 }
2887
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2888 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2889 {
2890 struct gaudi_device *gaudi = hdev->asic_specific;
2891 struct gaudi_internal_qman_info *q;
2892 u64 qman_base_addr;
2893 int i, j, dma_id, internal_q_index;
2894
2895 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2896 return;
2897
2898 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2899 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2900
2901 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2902 /*
2903 * Add the CPU queue in order to get the correct queue
2904 * number as all internal queue are placed after it
2905 */
2906 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2907
2908 q = &gaudi->internal_qmans[internal_q_index];
2909 qman_base_addr = (u64) q->pq_dma_addr;
2910 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2911 qman_base_addr);
2912 }
2913
2914 /* Initializing lower CP for HBM DMA QMAN */
2915 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2916
2917 gaudi_init_dma_core(hdev, dma_id);
2918
2919 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2920 }
2921
2922 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2923 }
2924
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2925 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2926 int qman_id, u64 qman_base_addr)
2927 {
2928 struct cpu_dyn_regs *dyn_regs =
2929 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2930 u32 mtr_base_lo, mtr_base_hi;
2931 u32 so_base_lo, so_base_hi;
2932 u32 irq_handler_offset;
2933 u32 q_off, mme_id;
2934 u32 mme_qm_err_cfg;
2935
2936 mtr_base_lo = lower_32_bits(CFG_BASE +
2937 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2938 mtr_base_hi = upper_32_bits(CFG_BASE +
2939 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2940 so_base_lo = lower_32_bits(CFG_BASE +
2941 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2942 so_base_hi = upper_32_bits(CFG_BASE +
2943 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2944
2945 q_off = mme_offset + qman_id * 4;
2946
2947 if (qman_id < 4) {
2948 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2949 lower_32_bits(qman_base_addr));
2950 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2951 upper_32_bits(qman_base_addr));
2952
2953 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2954 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2955 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2956
2957 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2958 QMAN_CPDMA_SIZE_OFFSET);
2959 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2960 QMAN_CPDMA_SRC_OFFSET);
2961 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2962 QMAN_CPDMA_DST_OFFSET);
2963 } else {
2964 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2965 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2966 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2967
2968 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2969 QMAN_LDMA_SIZE_OFFSET);
2970 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2971 QMAN_LDMA_SRC_OFFSET);
2972 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2973 QMAN_LDMA_DST_OFFSET);
2974
2975 /* Configure RAZWI IRQ */
2976 mme_id = mme_offset /
2977 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2978
2979 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2980 if (hdev->stop_on_err)
2981 mme_qm_err_cfg |=
2982 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2983
2984 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2985
2986 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2987 lower_32_bits(CFG_BASE + irq_handler_offset));
2988 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2989 upper_32_bits(CFG_BASE + irq_handler_offset));
2990
2991 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2992 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2993 mme_id);
2994
2995 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2996 QM_ARB_ERR_MSG_EN_MASK);
2997
2998 /* Set timeout to maximum */
2999 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3000
3001 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3002 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3003 QMAN_INTERNAL_MAKE_TRUSTED);
3004 }
3005
3006 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3007 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3008 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3009 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3010 }
3011
gaudi_init_mme_qmans(struct hl_device * hdev)3012 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3013 {
3014 struct gaudi_device *gaudi = hdev->asic_specific;
3015 struct gaudi_internal_qman_info *q;
3016 u64 qman_base_addr;
3017 u32 mme_offset;
3018 int i, internal_q_index;
3019
3020 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3021 return;
3022
3023 /*
3024 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3025 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3026 */
3027
3028 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3029
3030 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3031 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3032 q = &gaudi->internal_qmans[internal_q_index];
3033 qman_base_addr = (u64) q->pq_dma_addr;
3034 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3035 qman_base_addr);
3036 if (i == 3)
3037 mme_offset = 0;
3038 }
3039
3040 /* Initializing lower CP for MME QMANs */
3041 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3042 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3043 gaudi_init_mme_qman(hdev, 0, 4, 0);
3044
3045 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3046 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3047
3048 gaudi->hw_cap_initialized |= HW_CAP_MME;
3049 }
3050
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)3051 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3052 int qman_id, u64 qman_base_addr)
3053 {
3054 struct cpu_dyn_regs *dyn_regs =
3055 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3056 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3057 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3058 u32 tpc_qm_err_cfg, irq_handler_offset;
3059 u32 q_off, tpc_id;
3060
3061 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3062 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3063 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3064 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065 so_base_en_lo = lower_32_bits(CFG_BASE +
3066 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3067 so_base_en_hi = upper_32_bits(CFG_BASE +
3068 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3069 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3070 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3071 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3072 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3073 so_base_ws_lo = lower_32_bits(CFG_BASE +
3074 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3075 so_base_ws_hi = upper_32_bits(CFG_BASE +
3076 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077
3078 q_off = tpc_offset + qman_id * 4;
3079
3080 tpc_id = tpc_offset /
3081 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3082
3083 if (qman_id < 4) {
3084 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3085 lower_32_bits(qman_base_addr));
3086 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3087 upper_32_bits(qman_base_addr));
3088
3089 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3090 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3091 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3092
3093 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3094 QMAN_CPDMA_SIZE_OFFSET);
3095 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3096 QMAN_CPDMA_SRC_OFFSET);
3097 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3098 QMAN_CPDMA_DST_OFFSET);
3099 } else {
3100 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3101 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3102 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3103
3104 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3105 QMAN_LDMA_SIZE_OFFSET);
3106 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3107 QMAN_LDMA_SRC_OFFSET);
3108 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3109 QMAN_LDMA_DST_OFFSET);
3110
3111 /* Configure RAZWI IRQ */
3112 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3113 if (hdev->stop_on_err)
3114 tpc_qm_err_cfg |=
3115 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3116
3117 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3118
3119 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3120 lower_32_bits(CFG_BASE + irq_handler_offset));
3121 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3122 upper_32_bits(CFG_BASE + irq_handler_offset));
3123
3124 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3125 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3126 tpc_id);
3127
3128 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3129 QM_ARB_ERR_MSG_EN_MASK);
3130
3131 /* Set timeout to maximum */
3132 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3133
3134 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3135 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3136 QMAN_INTERNAL_MAKE_TRUSTED);
3137 }
3138
3139 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3140 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3141 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3142 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3143
3144 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3145 if (tpc_id == 6) {
3146 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3147 mtr_base_ws_lo);
3148 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3149 mtr_base_ws_hi);
3150 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3151 so_base_ws_lo);
3152 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3153 so_base_ws_hi);
3154 }
3155 }
3156
gaudi_init_tpc_qmans(struct hl_device * hdev)3157 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3158 {
3159 struct gaudi_device *gaudi = hdev->asic_specific;
3160 struct gaudi_internal_qman_info *q;
3161 u64 qman_base_addr;
3162 u32 so_base_hi, tpc_offset = 0;
3163 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3164 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3165 int i, tpc_id, internal_q_index;
3166
3167 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3168 return;
3169
3170 so_base_hi = upper_32_bits(CFG_BASE +
3171 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172
3173 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3174 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3175 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3176 tpc_id * QMAN_STREAMS + i;
3177 q = &gaudi->internal_qmans[internal_q_index];
3178 qman_base_addr = (u64) q->pq_dma_addr;
3179 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3180 qman_base_addr);
3181
3182 if (i == 3) {
3183 /* Initializing lower CP for TPC QMAN */
3184 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3185
3186 /* Enable the QMAN and TPC channel */
3187 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3188 QMAN_TPC_ENABLE);
3189 }
3190 }
3191
3192 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3193 so_base_hi);
3194
3195 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3196
3197 gaudi->hw_cap_initialized |=
3198 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3199 }
3200 }
3201
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3202 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3203 int qman_id, u64 qman_base_addr, int nic_id)
3204 {
3205 struct cpu_dyn_regs *dyn_regs =
3206 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3207 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3208 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3209 u32 nic_qm_err_cfg, irq_handler_offset;
3210 u32 q_off;
3211
3212 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3213 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3214 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3215 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3216 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3217 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3218 so_base_en_hi = upper_32_bits(CFG_BASE +
3219 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3220 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3221 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3222 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3223 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3224 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3225 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3226 so_base_ws_hi = upper_32_bits(CFG_BASE +
3227 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228
3229 q_off = nic_offset + qman_id * 4;
3230
3231 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3232 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3233
3234 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3235 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3236 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3237
3238 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3239 QMAN_LDMA_SIZE_OFFSET);
3240 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3241 QMAN_LDMA_SRC_OFFSET);
3242 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3243 QMAN_LDMA_DST_OFFSET);
3244
3245 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3246 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3247 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3248 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3249
3250 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3251 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3252 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3253 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3254 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3255
3256 if (qman_id == 0) {
3257 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3258 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3259 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3260
3261 /* Configure RAZWI IRQ */
3262 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3263 if (hdev->stop_on_err)
3264 nic_qm_err_cfg |=
3265 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3266
3267 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3268
3269 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3270 lower_32_bits(CFG_BASE + irq_handler_offset));
3271 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3272 upper_32_bits(CFG_BASE + irq_handler_offset));
3273
3274 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3275 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3276 nic_id);
3277
3278 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3279 QM_ARB_ERR_MSG_EN_MASK);
3280
3281 /* Set timeout to maximum */
3282 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3283
3284 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3285 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3286 QMAN_INTERNAL_MAKE_TRUSTED);
3287 }
3288 }
3289
gaudi_init_nic_qmans(struct hl_device * hdev)3290 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3291 {
3292 struct gaudi_device *gaudi = hdev->asic_specific;
3293 struct gaudi_internal_qman_info *q;
3294 u64 qman_base_addr;
3295 u32 nic_offset = 0;
3296 u32 nic_delta_between_qmans =
3297 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3298 u32 nic_delta_between_nics =
3299 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3300 int i, nic_id, internal_q_index;
3301
3302 if (!hdev->nic_ports_mask)
3303 return;
3304
3305 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3306 return;
3307
3308 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3309
3310 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3311 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3312 nic_offset += nic_delta_between_qmans;
3313 if (nic_id & 1) {
3314 nic_offset -= (nic_delta_between_qmans * 2);
3315 nic_offset += nic_delta_between_nics;
3316 }
3317 continue;
3318 }
3319
3320 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3321 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3322 nic_id * QMAN_STREAMS + i;
3323 q = &gaudi->internal_qmans[internal_q_index];
3324 qman_base_addr = (u64) q->pq_dma_addr;
3325 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3326 qman_base_addr, nic_id);
3327 }
3328
3329 /* Enable the QMAN */
3330 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3331
3332 nic_offset += nic_delta_between_qmans;
3333 if (nic_id & 1) {
3334 nic_offset -= (nic_delta_between_qmans * 2);
3335 nic_offset += nic_delta_between_nics;
3336 }
3337
3338 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3339 }
3340 }
3341
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3342 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3343 {
3344 struct gaudi_device *gaudi = hdev->asic_specific;
3345
3346 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3347 return;
3348
3349 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3350 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3351 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3352 }
3353
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3354 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3355 {
3356 struct gaudi_device *gaudi = hdev->asic_specific;
3357
3358 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3359 return;
3360
3361 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3362 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3363 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3364 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3365 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3366 }
3367
gaudi_disable_mme_qmans(struct hl_device * hdev)3368 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3369 {
3370 struct gaudi_device *gaudi = hdev->asic_specific;
3371
3372 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3373 return;
3374
3375 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3376 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3377 }
3378
gaudi_disable_tpc_qmans(struct hl_device * hdev)3379 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3380 {
3381 struct gaudi_device *gaudi = hdev->asic_specific;
3382 u32 tpc_offset = 0;
3383 int tpc_id;
3384
3385 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3386 return;
3387
3388 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3389 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3390 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3391 }
3392 }
3393
gaudi_disable_nic_qmans(struct hl_device * hdev)3394 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3395 {
3396 struct gaudi_device *gaudi = hdev->asic_specific;
3397 u32 nic_mask, nic_offset = 0;
3398 u32 nic_delta_between_qmans =
3399 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3400 u32 nic_delta_between_nics =
3401 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3402 int nic_id;
3403
3404 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3405 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3406
3407 if (gaudi->hw_cap_initialized & nic_mask)
3408 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3409
3410 nic_offset += nic_delta_between_qmans;
3411 if (nic_id & 1) {
3412 nic_offset -= (nic_delta_between_qmans * 2);
3413 nic_offset += nic_delta_between_nics;
3414 }
3415 }
3416 }
3417
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3418 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3419 {
3420 struct gaudi_device *gaudi = hdev->asic_specific;
3421
3422 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3423 return;
3424
3425 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3426 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3427 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3428 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3429 }
3430
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3431 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3432 {
3433 struct gaudi_device *gaudi = hdev->asic_specific;
3434
3435 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3436 return;
3437
3438 /* Stop CPs of HBM DMA QMANs */
3439
3440 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3441 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3445 }
3446
gaudi_stop_mme_qmans(struct hl_device * hdev)3447 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3448 {
3449 struct gaudi_device *gaudi = hdev->asic_specific;
3450
3451 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3452 return;
3453
3454 /* Stop CPs of MME QMANs */
3455 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3456 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3457 }
3458
gaudi_stop_tpc_qmans(struct hl_device * hdev)3459 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3460 {
3461 struct gaudi_device *gaudi = hdev->asic_specific;
3462
3463 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3464 return;
3465
3466 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3467 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3468 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474 }
3475
gaudi_stop_nic_qmans(struct hl_device * hdev)3476 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3477 {
3478 struct gaudi_device *gaudi = hdev->asic_specific;
3479
3480 /* Stop upper CPs of QMANs */
3481
3482 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3483 WREG32(mmNIC0_QM0_GLBL_CFG1,
3484 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3485 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3486 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3487
3488 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3489 WREG32(mmNIC0_QM1_GLBL_CFG1,
3490 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3491 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3492 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3493
3494 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3495 WREG32(mmNIC1_QM0_GLBL_CFG1,
3496 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3497 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3498 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3499
3500 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3501 WREG32(mmNIC1_QM1_GLBL_CFG1,
3502 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3503 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3504 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3505
3506 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3507 WREG32(mmNIC2_QM0_GLBL_CFG1,
3508 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3509 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3510 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3511
3512 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3513 WREG32(mmNIC2_QM1_GLBL_CFG1,
3514 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3515 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3516 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3517
3518 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3519 WREG32(mmNIC3_QM0_GLBL_CFG1,
3520 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3521 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3522 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3523
3524 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3525 WREG32(mmNIC3_QM1_GLBL_CFG1,
3526 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3527 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3528 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3529
3530 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3531 WREG32(mmNIC4_QM0_GLBL_CFG1,
3532 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3533 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3534 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3535
3536 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3537 WREG32(mmNIC4_QM1_GLBL_CFG1,
3538 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3539 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3540 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3541 }
3542
gaudi_pci_dma_stall(struct hl_device * hdev)3543 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3544 {
3545 struct gaudi_device *gaudi = hdev->asic_specific;
3546
3547 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3548 return;
3549
3550 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3551 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3552 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3553 }
3554
gaudi_hbm_dma_stall(struct hl_device * hdev)3555 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3556 {
3557 struct gaudi_device *gaudi = hdev->asic_specific;
3558
3559 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3560 return;
3561
3562 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3563 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3564 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567 }
3568
gaudi_mme_stall(struct hl_device * hdev)3569 static void gaudi_mme_stall(struct hl_device *hdev)
3570 {
3571 struct gaudi_device *gaudi = hdev->asic_specific;
3572
3573 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3574 return;
3575
3576 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3577 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3578 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3579 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3580 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3581 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3582 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3583 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3584 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3585 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3586 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3587 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3588 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3589 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3590 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3591 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3592 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3593 }
3594
gaudi_tpc_stall(struct hl_device * hdev)3595 static void gaudi_tpc_stall(struct hl_device *hdev)
3596 {
3597 struct gaudi_device *gaudi = hdev->asic_specific;
3598
3599 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3600 return;
3601
3602 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3603 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3604 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3609 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3610 }
3611
gaudi_disable_clock_gating(struct hl_device * hdev)3612 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3613 {
3614 u32 qman_offset;
3615 int i;
3616
3617 if (hdev->asic_prop.fw_security_enabled)
3618 return;
3619
3620 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3621 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3622 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3623
3624 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3625 }
3626
3627 WREG32(mmMME0_QM_CGM_CFG, 0);
3628 WREG32(mmMME0_QM_CGM_CFG1, 0);
3629 WREG32(mmMME2_QM_CGM_CFG, 0);
3630 WREG32(mmMME2_QM_CGM_CFG1, 0);
3631
3632 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3633 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3634 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3635
3636 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3637 }
3638 }
3639
gaudi_enable_timestamp(struct hl_device * hdev)3640 static void gaudi_enable_timestamp(struct hl_device *hdev)
3641 {
3642 /* Disable the timestamp counter */
3643 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3644
3645 /* Zero the lower/upper parts of the 64-bit counter */
3646 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3647 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3648
3649 /* Enable the counter */
3650 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3651 }
3652
gaudi_disable_timestamp(struct hl_device * hdev)3653 static void gaudi_disable_timestamp(struct hl_device *hdev)
3654 {
3655 /* Disable the timestamp counter */
3656 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3657 }
3658
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3659 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3660 {
3661 u32 wait_timeout_ms;
3662
3663 if (hdev->pldm)
3664 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3665 else
3666 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3667
3668 if (fw_reset)
3669 goto skip_engines;
3670
3671 gaudi_stop_nic_qmans(hdev);
3672 gaudi_stop_mme_qmans(hdev);
3673 gaudi_stop_tpc_qmans(hdev);
3674 gaudi_stop_hbm_dma_qmans(hdev);
3675 gaudi_stop_pci_dma_qmans(hdev);
3676
3677 msleep(wait_timeout_ms);
3678
3679 gaudi_pci_dma_stall(hdev);
3680 gaudi_hbm_dma_stall(hdev);
3681 gaudi_tpc_stall(hdev);
3682 gaudi_mme_stall(hdev);
3683
3684 msleep(wait_timeout_ms);
3685
3686 gaudi_disable_nic_qmans(hdev);
3687 gaudi_disable_mme_qmans(hdev);
3688 gaudi_disable_tpc_qmans(hdev);
3689 gaudi_disable_hbm_dma_qmans(hdev);
3690 gaudi_disable_pci_dma_qmans(hdev);
3691
3692 gaudi_disable_timestamp(hdev);
3693
3694 skip_engines:
3695 gaudi_disable_msi(hdev);
3696 }
3697
gaudi_mmu_init(struct hl_device * hdev)3698 static int gaudi_mmu_init(struct hl_device *hdev)
3699 {
3700 struct asic_fixed_properties *prop = &hdev->asic_prop;
3701 struct gaudi_device *gaudi = hdev->asic_specific;
3702 u64 hop0_addr;
3703 int rc, i;
3704
3705 if (!hdev->mmu_enable)
3706 return 0;
3707
3708 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3709 return 0;
3710
3711 for (i = 0 ; i < prop->max_asid ; i++) {
3712 hop0_addr = prop->mmu_pgt_addr +
3713 (i * prop->mmu_hop_table_size);
3714
3715 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3716 if (rc) {
3717 dev_err(hdev->dev,
3718 "failed to set hop0 addr for asid %d\n", i);
3719 goto err;
3720 }
3721 }
3722
3723 /* init MMU cache manage page */
3724 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3725 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3726
3727 /* mem cache invalidation */
3728 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3729
3730 hl_mmu_invalidate_cache(hdev, true, 0);
3731
3732 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3733 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3734
3735 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3736
3737 /*
3738 * The H/W expects the first PI after init to be 1. After wraparound
3739 * we'll write 0.
3740 */
3741 gaudi->mmu_cache_inv_pi = 1;
3742
3743 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3744
3745 return 0;
3746
3747 err:
3748 return rc;
3749 }
3750
gaudi_load_firmware_to_device(struct hl_device * hdev)3751 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3752 {
3753 void __iomem *dst;
3754
3755 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3756
3757 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3758 }
3759
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3761 {
3762 void __iomem *dst;
3763
3764 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3765
3766 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3767 }
3768
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3769 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3770 {
3771 struct dynamic_fw_load_mgr *dynamic_loader;
3772 struct cpu_dyn_regs *dyn_regs;
3773
3774 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3775
3776 /*
3777 * here we update initial values for few specific dynamic regs (as
3778 * before reading the first descriptor from FW those value has to be
3779 * hard-coded) in later stages of the protocol those values will be
3780 * updated automatically by reading the FW descriptor so data there
3781 * will always be up-to-date
3782 */
3783 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3784 dyn_regs->kmd_msg_to_cpu =
3785 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3786 dyn_regs->cpu_cmd_status_to_host =
3787 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3788
3789 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3790 }
3791
gaudi_init_static_firmware_loader(struct hl_device * hdev)3792 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3793 {
3794 struct static_fw_load_mgr *static_loader;
3795
3796 static_loader = &hdev->fw_loader.static_loader;
3797
3798 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3799 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3800 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3801 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3802 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3803 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3804 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3805 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3806 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3807 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3808 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3809 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3810 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3811 GAUDI_PLDM_RESET_WAIT_MSEC :
3812 GAUDI_CPU_RESET_WAIT_MSEC;
3813 }
3814
gaudi_init_firmware_preload_params(struct hl_device * hdev)3815 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3816 {
3817 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3818
3819 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3820 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3821 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3822 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3823 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3824 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3825 }
3826
gaudi_init_firmware_loader(struct hl_device * hdev)3827 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3828 {
3829 struct asic_fixed_properties *prop = &hdev->asic_prop;
3830 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3831
3832 /* fill common fields */
3833 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3834 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3835 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3836 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3837 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3838 fw_loader->skip_bmc = !hdev->bmc_enable;
3839 fw_loader->sram_bar_id = SRAM_BAR_ID;
3840 fw_loader->dram_bar_id = HBM_BAR_ID;
3841
3842 if (prop->dynamic_fw_load)
3843 gaudi_init_dynamic_firmware_loader(hdev);
3844 else
3845 gaudi_init_static_firmware_loader(hdev);
3846 }
3847
gaudi_init_cpu(struct hl_device * hdev)3848 static int gaudi_init_cpu(struct hl_device *hdev)
3849 {
3850 struct gaudi_device *gaudi = hdev->asic_specific;
3851 int rc;
3852
3853 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3854 return 0;
3855
3856 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3857 return 0;
3858
3859 /*
3860 * The device CPU works with 40 bits addresses.
3861 * This register sets the extension to 50 bits.
3862 */
3863 if (!hdev->asic_prop.fw_security_enabled)
3864 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3865
3866 rc = hl_fw_init_cpu(hdev);
3867
3868 if (rc)
3869 return rc;
3870
3871 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3872
3873 return 0;
3874 }
3875
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3876 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3877 {
3878 struct cpu_dyn_regs *dyn_regs =
3879 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3880 struct asic_fixed_properties *prop = &hdev->asic_prop;
3881 struct gaudi_device *gaudi = hdev->asic_specific;
3882 u32 status, irq_handler_offset;
3883 struct hl_eq *eq;
3884 struct hl_hw_queue *cpu_pq =
3885 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3886 int err;
3887
3888 if (!hdev->cpu_queues_enable)
3889 return 0;
3890
3891 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3892 return 0;
3893
3894 eq = &hdev->event_queue;
3895
3896 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3897 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3898
3899 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3900 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3901
3902 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3903 lower_32_bits(hdev->cpu_accessible_dma_address));
3904 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3905 upper_32_bits(hdev->cpu_accessible_dma_address));
3906
3907 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3908 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3909 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3910
3911 /* Used for EQ CI */
3912 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3913
3914 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3915
3916 if (gaudi->multi_msi_mode)
3917 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3918 else
3919 WREG32(mmCPU_IF_QUEUE_INIT,
3920 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3921
3922 irq_handler_offset = prop->gic_interrupts_enable ?
3923 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3924 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3925
3926 WREG32(irq_handler_offset,
3927 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3928
3929 err = hl_poll_timeout(
3930 hdev,
3931 mmCPU_IF_QUEUE_INIT,
3932 status,
3933 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3934 1000,
3935 cpu_timeout);
3936
3937 if (err) {
3938 dev_err(hdev->dev,
3939 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3940 return -EIO;
3941 }
3942
3943 /* update FW application security bits */
3944 if (prop->fw_cpu_boot_dev_sts0_valid)
3945 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3946 if (prop->fw_cpu_boot_dev_sts1_valid)
3947 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3948
3949 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3950 return 0;
3951 }
3952
gaudi_pre_hw_init(struct hl_device * hdev)3953 static void gaudi_pre_hw_init(struct hl_device *hdev)
3954 {
3955 /* Perform read from the device to make sure device is up */
3956 RREG32(mmHW_STATE);
3957
3958 if (!hdev->asic_prop.fw_security_enabled) {
3959 /* Set the access through PCI bars (Linux driver only) as
3960 * secured
3961 */
3962 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3963 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3964 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3965
3966 /* Perform read to flush the waiting writes to ensure
3967 * configuration was set in the device
3968 */
3969 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3970 }
3971
3972 /*
3973 * Let's mark in the H/W that we have reached this point. We check
3974 * this value in the reset_before_init function to understand whether
3975 * we need to reset the chip before doing H/W init. This register is
3976 * cleared by the H/W upon H/W reset
3977 */
3978 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3979 }
3980
gaudi_hw_init(struct hl_device * hdev)3981 static int gaudi_hw_init(struct hl_device *hdev)
3982 {
3983 struct gaudi_device *gaudi = hdev->asic_specific;
3984 int rc;
3985
3986 gaudi_pre_hw_init(hdev);
3987
3988 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3989 * So we set it here and if anyone tries to move it later to
3990 * a different address, there will be an error
3991 */
3992 if (hdev->asic_prop.iatu_done_by_fw)
3993 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3994
3995 /*
3996 * Before pushing u-boot/linux to device, need to set the hbm bar to
3997 * base address of dram
3998 */
3999 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4000 dev_err(hdev->dev,
4001 "failed to map HBM bar to DRAM base address\n");
4002 return -EIO;
4003 }
4004
4005 rc = gaudi_init_cpu(hdev);
4006 if (rc) {
4007 dev_err(hdev->dev, "failed to initialize CPU\n");
4008 return rc;
4009 }
4010
4011 /* In case the clock gating was enabled in preboot we need to disable
4012 * it here before touching the MME/TPC registers.
4013 */
4014 gaudi_disable_clock_gating(hdev);
4015
4016 /* SRAM scrambler must be initialized after CPU is running from HBM */
4017 gaudi_init_scrambler_sram(hdev);
4018
4019 /* This is here just in case we are working without CPU */
4020 gaudi_init_scrambler_hbm(hdev);
4021
4022 gaudi_init_golden_registers(hdev);
4023
4024 rc = gaudi_mmu_init(hdev);
4025 if (rc)
4026 return rc;
4027
4028 gaudi_init_security(hdev);
4029
4030 gaudi_init_pci_dma_qmans(hdev);
4031
4032 gaudi_init_hbm_dma_qmans(hdev);
4033
4034 gaudi_init_mme_qmans(hdev);
4035
4036 gaudi_init_tpc_qmans(hdev);
4037
4038 gaudi_init_nic_qmans(hdev);
4039
4040 gaudi_enable_timestamp(hdev);
4041
4042 /* MSI must be enabled before CPU queues and NIC are initialized */
4043 rc = gaudi_enable_msi(hdev);
4044 if (rc)
4045 goto disable_queues;
4046
4047 /* must be called after MSI was enabled */
4048 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4049 if (rc) {
4050 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4051 rc);
4052 goto disable_msi;
4053 }
4054
4055 /* Perform read from the device to flush all configuration */
4056 RREG32(mmHW_STATE);
4057
4058 return 0;
4059
4060 disable_msi:
4061 gaudi_disable_msi(hdev);
4062 disable_queues:
4063 gaudi_disable_mme_qmans(hdev);
4064 gaudi_disable_pci_dma_qmans(hdev);
4065
4066 return rc;
4067 }
4068
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4069 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4070 {
4071 struct cpu_dyn_regs *dyn_regs =
4072 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4073 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4074 struct gaudi_device *gaudi = hdev->asic_specific;
4075 bool driver_performs_reset;
4076
4077 if (!hard_reset) {
4078 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4079 return;
4080 }
4081
4082 if (hdev->pldm) {
4083 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4084 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4085 } else {
4086 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4087 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4088 }
4089
4090 if (fw_reset) {
4091 dev_dbg(hdev->dev,
4092 "Firmware performs HARD reset, going to wait %dms\n",
4093 reset_timeout_ms);
4094
4095 goto skip_reset;
4096 }
4097
4098 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4099 !hdev->asic_prop.hard_reset_done_by_fw);
4100
4101 /* Set device to handle FLR by H/W as we will put the device CPU to
4102 * halt mode
4103 */
4104 if (driver_performs_reset)
4105 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4106 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4107
4108 /* If linux is loaded in the device CPU we need to communicate with it
4109 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4110 * registers in case of old F/Ws
4111 */
4112 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4113 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4114 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4115 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4116
4117 WREG32(irq_handler_offset,
4118 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4119
4120 /* This is a hail-mary attempt to revive the card in the small chance that the
4121 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4122 * In that case, triggering reset through GIC won't help. We need to trigger the
4123 * reset as if Linux wasn't loaded.
4124 *
4125 * We do it only if the reset cause was HB, because that would be the indication
4126 * of such an event.
4127 *
4128 * In case watchdog hasn't expired but we still got HB, then this won't do any
4129 * damage.
4130 */
4131 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4132 if (hdev->asic_prop.hard_reset_done_by_fw)
4133 hl_fw_ask_hard_reset_without_linux(hdev);
4134 else
4135 hl_fw_ask_halt_machine_without_linux(hdev);
4136 }
4137 } else {
4138 if (hdev->asic_prop.hard_reset_done_by_fw)
4139 hl_fw_ask_hard_reset_without_linux(hdev);
4140 else
4141 hl_fw_ask_halt_machine_without_linux(hdev);
4142 }
4143
4144 if (driver_performs_reset) {
4145
4146 /* Configure the reset registers. Must be done as early as
4147 * possible in case we fail during H/W initialization
4148 */
4149 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4150 (CFG_RST_H_DMA_MASK |
4151 CFG_RST_H_MME_MASK |
4152 CFG_RST_H_SM_MASK |
4153 CFG_RST_H_TPC_7_MASK));
4154
4155 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4156
4157 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4158 (CFG_RST_H_HBM_MASK |
4159 CFG_RST_H_TPC_7_MASK |
4160 CFG_RST_H_NIC_MASK |
4161 CFG_RST_H_SM_MASK |
4162 CFG_RST_H_DMA_MASK |
4163 CFG_RST_H_MME_MASK |
4164 CFG_RST_H_CPU_MASK |
4165 CFG_RST_H_MMU_MASK));
4166
4167 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4168 (CFG_RST_L_IF_MASK |
4169 CFG_RST_L_PSOC_MASK |
4170 CFG_RST_L_TPC_MASK));
4171
4172 msleep(cpu_timeout_ms);
4173
4174 /* Tell ASIC not to re-initialize PCIe */
4175 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4176
4177 /* Restart BTL/BLR upon hard-reset */
4178 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4179
4180 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4181 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4182
4183 dev_dbg(hdev->dev,
4184 "Issued HARD reset command, going to wait %dms\n",
4185 reset_timeout_ms);
4186 } else {
4187 dev_dbg(hdev->dev,
4188 "Firmware performs HARD reset, going to wait %dms\n",
4189 reset_timeout_ms);
4190 }
4191
4192 skip_reset:
4193 /*
4194 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4195 * itself is in reset. Need to wait until the reset is deasserted
4196 */
4197 msleep(reset_timeout_ms);
4198
4199 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4200 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4201 dev_err(hdev->dev,
4202 "Timeout while waiting for device to reset 0x%x\n",
4203 status);
4204
4205 if (gaudi) {
4206 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4207 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4208 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4209 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4210 HW_CAP_HBM_SCRAMBLER);
4211
4212 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4213
4214 hdev->device_cpu_is_halted = false;
4215 }
4216 }
4217
gaudi_suspend(struct hl_device * hdev)4218 static int gaudi_suspend(struct hl_device *hdev)
4219 {
4220 int rc;
4221
4222 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4223 if (rc)
4224 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4225
4226 return rc;
4227 }
4228
gaudi_resume(struct hl_device * hdev)4229 static int gaudi_resume(struct hl_device *hdev)
4230 {
4231 return gaudi_init_iatu(hdev);
4232 }
4233
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4234 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4235 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4236 {
4237 int rc;
4238
4239 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4240 VM_DONTCOPY | VM_NORESERVE;
4241
4242 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4243 (dma_addr - HOST_PHYS_BASE), size);
4244 if (rc)
4245 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4246
4247 return rc;
4248 }
4249
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4250 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4251 {
4252 struct cpu_dyn_regs *dyn_regs =
4253 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4254 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4255 struct gaudi_device *gaudi = hdev->asic_specific;
4256 bool invalid_queue = false;
4257 int dma_id;
4258
4259 switch (hw_queue_id) {
4260 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4261 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4262 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4263 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4264 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4265 break;
4266
4267 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4268 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4269 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4270 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4271 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4272 break;
4273
4274 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4275 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4276 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4277 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4278 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4279 break;
4280
4281 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4282 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4283 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4284 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4285 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4286 break;
4287
4288 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4289 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4290 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4291 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4292 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4293 break;
4294
4295 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4296 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4297 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4298 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4299 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4300 break;
4301
4302 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4303 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4304 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4305 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4306 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4307 break;
4308
4309 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4310 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4311 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4312 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4313 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4314 break;
4315
4316 case GAUDI_QUEUE_ID_CPU_PQ:
4317 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4318 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4319 else
4320 invalid_queue = true;
4321 break;
4322
4323 case GAUDI_QUEUE_ID_MME_0_0:
4324 db_reg_offset = mmMME2_QM_PQ_PI_0;
4325 break;
4326
4327 case GAUDI_QUEUE_ID_MME_0_1:
4328 db_reg_offset = mmMME2_QM_PQ_PI_1;
4329 break;
4330
4331 case GAUDI_QUEUE_ID_MME_0_2:
4332 db_reg_offset = mmMME2_QM_PQ_PI_2;
4333 break;
4334
4335 case GAUDI_QUEUE_ID_MME_0_3:
4336 db_reg_offset = mmMME2_QM_PQ_PI_3;
4337 break;
4338
4339 case GAUDI_QUEUE_ID_MME_1_0:
4340 db_reg_offset = mmMME0_QM_PQ_PI_0;
4341 break;
4342
4343 case GAUDI_QUEUE_ID_MME_1_1:
4344 db_reg_offset = mmMME0_QM_PQ_PI_1;
4345 break;
4346
4347 case GAUDI_QUEUE_ID_MME_1_2:
4348 db_reg_offset = mmMME0_QM_PQ_PI_2;
4349 break;
4350
4351 case GAUDI_QUEUE_ID_MME_1_3:
4352 db_reg_offset = mmMME0_QM_PQ_PI_3;
4353 break;
4354
4355 case GAUDI_QUEUE_ID_TPC_0_0:
4356 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4357 break;
4358
4359 case GAUDI_QUEUE_ID_TPC_0_1:
4360 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4361 break;
4362
4363 case GAUDI_QUEUE_ID_TPC_0_2:
4364 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4365 break;
4366
4367 case GAUDI_QUEUE_ID_TPC_0_3:
4368 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4369 break;
4370
4371 case GAUDI_QUEUE_ID_TPC_1_0:
4372 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4373 break;
4374
4375 case GAUDI_QUEUE_ID_TPC_1_1:
4376 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4377 break;
4378
4379 case GAUDI_QUEUE_ID_TPC_1_2:
4380 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4381 break;
4382
4383 case GAUDI_QUEUE_ID_TPC_1_3:
4384 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4385 break;
4386
4387 case GAUDI_QUEUE_ID_TPC_2_0:
4388 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4389 break;
4390
4391 case GAUDI_QUEUE_ID_TPC_2_1:
4392 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4393 break;
4394
4395 case GAUDI_QUEUE_ID_TPC_2_2:
4396 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4397 break;
4398
4399 case GAUDI_QUEUE_ID_TPC_2_3:
4400 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4401 break;
4402
4403 case GAUDI_QUEUE_ID_TPC_3_0:
4404 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4405 break;
4406
4407 case GAUDI_QUEUE_ID_TPC_3_1:
4408 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4409 break;
4410
4411 case GAUDI_QUEUE_ID_TPC_3_2:
4412 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4413 break;
4414
4415 case GAUDI_QUEUE_ID_TPC_3_3:
4416 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4417 break;
4418
4419 case GAUDI_QUEUE_ID_TPC_4_0:
4420 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4421 break;
4422
4423 case GAUDI_QUEUE_ID_TPC_4_1:
4424 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4425 break;
4426
4427 case GAUDI_QUEUE_ID_TPC_4_2:
4428 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4429 break;
4430
4431 case GAUDI_QUEUE_ID_TPC_4_3:
4432 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4433 break;
4434
4435 case GAUDI_QUEUE_ID_TPC_5_0:
4436 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4437 break;
4438
4439 case GAUDI_QUEUE_ID_TPC_5_1:
4440 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4441 break;
4442
4443 case GAUDI_QUEUE_ID_TPC_5_2:
4444 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4445 break;
4446
4447 case GAUDI_QUEUE_ID_TPC_5_3:
4448 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4449 break;
4450
4451 case GAUDI_QUEUE_ID_TPC_6_0:
4452 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4453 break;
4454
4455 case GAUDI_QUEUE_ID_TPC_6_1:
4456 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4457 break;
4458
4459 case GAUDI_QUEUE_ID_TPC_6_2:
4460 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4461 break;
4462
4463 case GAUDI_QUEUE_ID_TPC_6_3:
4464 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4465 break;
4466
4467 case GAUDI_QUEUE_ID_TPC_7_0:
4468 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4469 break;
4470
4471 case GAUDI_QUEUE_ID_TPC_7_1:
4472 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4473 break;
4474
4475 case GAUDI_QUEUE_ID_TPC_7_2:
4476 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4477 break;
4478
4479 case GAUDI_QUEUE_ID_TPC_7_3:
4480 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4481 break;
4482
4483 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4484 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4485 invalid_queue = true;
4486
4487 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4488 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4489 break;
4490
4491 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4492 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4493 invalid_queue = true;
4494
4495 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4496 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4497 break;
4498
4499 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4500 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4501 invalid_queue = true;
4502
4503 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4504 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4505 break;
4506
4507 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4508 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4509 invalid_queue = true;
4510
4511 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4512 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4513 break;
4514
4515 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4516 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4517 invalid_queue = true;
4518
4519 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4520 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4521 break;
4522
4523 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4524 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4525 invalid_queue = true;
4526
4527 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4528 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4529 break;
4530
4531 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4532 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4533 invalid_queue = true;
4534
4535 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4536 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4537 break;
4538
4539 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4540 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4541 invalid_queue = true;
4542
4543 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4544 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4545 break;
4546
4547 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4548 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4549 invalid_queue = true;
4550
4551 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4552 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4553 break;
4554
4555 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4556 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4557 invalid_queue = true;
4558
4559 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4560 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4561 break;
4562
4563 default:
4564 invalid_queue = true;
4565 }
4566
4567 if (invalid_queue) {
4568 /* Should never get here */
4569 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4570 hw_queue_id);
4571 return;
4572 }
4573
4574 db_value = pi;
4575
4576 /* ring the doorbell */
4577 WREG32(db_reg_offset, db_value);
4578
4579 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4580 /* make sure device CPU will read latest data from host */
4581 mb();
4582
4583 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4584 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4585 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4586
4587 WREG32(irq_handler_offset,
4588 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4589 }
4590 }
4591
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4592 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4593 struct hl_bd *bd)
4594 {
4595 __le64 *pbd = (__le64 *) bd;
4596
4597 /* The QMANs are on the host memory so a simple copy suffice */
4598 pqe[0] = pbd[0];
4599 pqe[1] = pbd[1];
4600 }
4601
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4602 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4603 dma_addr_t *dma_handle, gfp_t flags)
4604 {
4605 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4606 dma_handle, flags);
4607
4608 /* Shift to the device's base physical address of host memory */
4609 if (kernel_addr)
4610 *dma_handle += HOST_PHYS_BASE;
4611
4612 return kernel_addr;
4613 }
4614
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4615 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4616 void *cpu_addr, dma_addr_t dma_handle)
4617 {
4618 /* Cancel the device's base physical address of host memory */
4619 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4620
4621 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4622 }
4623
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4624 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4625 {
4626 struct asic_fixed_properties *prop = &hdev->asic_prop;
4627 u64 cur_addr = prop->dram_user_base_address;
4628 u32 chunk_size, busy;
4629 int rc, dma_id;
4630
4631 while (cur_addr < prop->dram_end_address) {
4632 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4633 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4634
4635 chunk_size =
4636 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4637
4638 dev_dbg(hdev->dev,
4639 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4640 cur_addr, cur_addr + chunk_size);
4641
4642 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4643 lower_32_bits(val));
4644 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4645 upper_32_bits(val));
4646 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4647 lower_32_bits(cur_addr));
4648 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4649 upper_32_bits(cur_addr));
4650 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4651 chunk_size);
4652 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4653 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4654 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4655
4656 cur_addr += chunk_size;
4657
4658 if (cur_addr == prop->dram_end_address)
4659 break;
4660 }
4661
4662 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4663 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4664
4665 rc = hl_poll_timeout(
4666 hdev,
4667 mmDMA0_CORE_STS0 + dma_offset,
4668 busy,
4669 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4670 1000,
4671 HBM_SCRUBBING_TIMEOUT_US);
4672
4673 if (rc) {
4674 dev_err(hdev->dev,
4675 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4676 dma_id);
4677 return -EIO;
4678 }
4679 }
4680 }
4681
4682 return 0;
4683 }
4684
gaudi_scrub_device_mem(struct hl_device * hdev)4685 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4686 {
4687 struct asic_fixed_properties *prop = &hdev->asic_prop;
4688 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4689 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4690 u64 addr, size, val = hdev->memory_scrub_val;
4691 ktime_t timeout;
4692 int rc = 0;
4693
4694 if (!hdev->memory_scrub)
4695 return 0;
4696
4697 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4698 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4699 if (ktime_compare(ktime_get(), timeout) > 0) {
4700 dev_err(hdev->dev, "waiting for idle timeout\n");
4701 return -ETIMEDOUT;
4702 }
4703 usleep_range((1000 >> 2) + 1, 1000);
4704 }
4705
4706 /* Scrub SRAM */
4707 addr = prop->sram_user_base_address;
4708 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4709
4710 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4711 addr, addr + size, val);
4712 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4713 if (rc) {
4714 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4715 return rc;
4716 }
4717
4718 /* Scrub HBM using all DMA channels in parallel */
4719 rc = gaudi_scrub_device_dram(hdev, val);
4720 if (rc) {
4721 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4722 return rc;
4723 }
4724
4725 return 0;
4726 }
4727
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4728 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4729 u32 queue_id, dma_addr_t *dma_handle,
4730 u16 *queue_len)
4731 {
4732 struct gaudi_device *gaudi = hdev->asic_specific;
4733 struct gaudi_internal_qman_info *q;
4734
4735 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4736 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4737 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4738 return NULL;
4739 }
4740
4741 q = &gaudi->internal_qmans[queue_id];
4742 *dma_handle = q->pq_dma_addr;
4743 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4744
4745 return q->pq_kernel_addr;
4746 }
4747
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4748 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4749 u16 len, u32 timeout, u64 *result)
4750 {
4751 struct gaudi_device *gaudi = hdev->asic_specific;
4752
4753 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4754 if (result)
4755 *result = 0;
4756 return 0;
4757 }
4758
4759 if (!timeout)
4760 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4761
4762 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4763 timeout, result);
4764 }
4765
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4766 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4767 {
4768 struct packet_msg_prot *fence_pkt;
4769 dma_addr_t pkt_dma_addr;
4770 u32 fence_val, tmp, timeout_usec;
4771 dma_addr_t fence_dma_addr;
4772 u32 *fence_ptr;
4773 int rc;
4774
4775 if (hdev->pldm)
4776 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4777 else
4778 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4779
4780 fence_val = GAUDI_QMAN0_FENCE_VAL;
4781
4782 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4783 if (!fence_ptr) {
4784 dev_err(hdev->dev,
4785 "Failed to allocate memory for H/W queue %d testing\n",
4786 hw_queue_id);
4787 return -ENOMEM;
4788 }
4789
4790 *fence_ptr = 0;
4791
4792 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4793 &pkt_dma_addr);
4794 if (!fence_pkt) {
4795 dev_err(hdev->dev,
4796 "Failed to allocate packet for H/W queue %d testing\n",
4797 hw_queue_id);
4798 rc = -ENOMEM;
4799 goto free_fence_ptr;
4800 }
4801
4802 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4803 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4804 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4805
4806 fence_pkt->ctl = cpu_to_le32(tmp);
4807 fence_pkt->value = cpu_to_le32(fence_val);
4808 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4809
4810 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4811 sizeof(struct packet_msg_prot),
4812 pkt_dma_addr);
4813 if (rc) {
4814 dev_err(hdev->dev,
4815 "Failed to send fence packet to H/W queue %d\n",
4816 hw_queue_id);
4817 goto free_pkt;
4818 }
4819
4820 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4821 1000, timeout_usec, true);
4822
4823 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4824
4825 if (rc == -ETIMEDOUT) {
4826 dev_err(hdev->dev,
4827 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4828 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4829 rc = -EIO;
4830 }
4831
4832 free_pkt:
4833 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4834 free_fence_ptr:
4835 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4836 return rc;
4837 }
4838
gaudi_test_cpu_queue(struct hl_device * hdev)4839 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4840 {
4841 struct gaudi_device *gaudi = hdev->asic_specific;
4842
4843 /*
4844 * check capability here as send_cpu_message() won't update the result
4845 * value if no capability
4846 */
4847 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4848 return 0;
4849
4850 return hl_fw_test_cpu_queue(hdev);
4851 }
4852
gaudi_test_queues(struct hl_device * hdev)4853 static int gaudi_test_queues(struct hl_device *hdev)
4854 {
4855 int i, rc, ret_val = 0;
4856
4857 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4858 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4859 rc = gaudi_test_queue(hdev, i);
4860 if (rc)
4861 ret_val = -EINVAL;
4862 }
4863 }
4864
4865 rc = gaudi_test_cpu_queue(hdev);
4866 if (rc)
4867 ret_val = -EINVAL;
4868
4869 return ret_val;
4870 }
4871
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4872 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4873 gfp_t mem_flags, dma_addr_t *dma_handle)
4874 {
4875 void *kernel_addr;
4876
4877 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4878 return NULL;
4879
4880 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4881
4882 /* Shift to the device's base physical address of host memory */
4883 if (kernel_addr)
4884 *dma_handle += HOST_PHYS_BASE;
4885
4886 return kernel_addr;
4887 }
4888
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4889 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4890 dma_addr_t dma_addr)
4891 {
4892 /* Cancel the device's base physical address of host memory */
4893 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4894
4895 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4896 }
4897
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4898 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4899 size_t size, dma_addr_t *dma_handle)
4900 {
4901 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4902 }
4903
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4904 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4905 size_t size, void *vaddr)
4906 {
4907 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4908 }
4909
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4910 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4911 {
4912 struct scatterlist *sg, *sg_next_iter;
4913 u32 count, dma_desc_cnt;
4914 u64 len, len_next;
4915 dma_addr_t addr, addr_next;
4916
4917 dma_desc_cnt = 0;
4918
4919 for_each_sgtable_dma_sg(sgt, sg, count) {
4920 len = sg_dma_len(sg);
4921 addr = sg_dma_address(sg);
4922
4923 if (len == 0)
4924 break;
4925
4926 while ((count + 1) < sgt->nents) {
4927 sg_next_iter = sg_next(sg);
4928 len_next = sg_dma_len(sg_next_iter);
4929 addr_next = sg_dma_address(sg_next_iter);
4930
4931 if (len_next == 0)
4932 break;
4933
4934 if ((addr + len == addr_next) &&
4935 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4936 len += len_next;
4937 count++;
4938 sg = sg_next_iter;
4939 } else {
4940 break;
4941 }
4942 }
4943
4944 dma_desc_cnt++;
4945 }
4946
4947 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4948 }
4949
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4950 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4951 struct hl_cs_parser *parser,
4952 struct packet_lin_dma *user_dma_pkt,
4953 u64 addr, enum dma_data_direction dir)
4954 {
4955 struct hl_userptr *userptr;
4956 int rc;
4957
4958 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4959 parser->job_userptr_list, &userptr))
4960 goto already_pinned;
4961
4962 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4963 if (!userptr)
4964 return -ENOMEM;
4965
4966 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4967 userptr);
4968 if (rc)
4969 goto free_userptr;
4970
4971 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4972
4973 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4974 if (rc) {
4975 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4976 goto unpin_memory;
4977 }
4978
4979 userptr->dma_mapped = true;
4980 userptr->dir = dir;
4981
4982 already_pinned:
4983 parser->patched_cb_size +=
4984 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4985
4986 return 0;
4987
4988 unpin_memory:
4989 list_del(&userptr->job_node);
4990 hl_unpin_host_memory(hdev, userptr);
4991 free_userptr:
4992 kfree(userptr);
4993 return rc;
4994 }
4995
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4996 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4997 struct hl_cs_parser *parser,
4998 struct packet_lin_dma *user_dma_pkt,
4999 bool src_in_host)
5000 {
5001 enum dma_data_direction dir;
5002 bool skip_host_mem_pin = false, user_memset;
5003 u64 addr;
5004 int rc = 0;
5005
5006 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5007 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5008 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5009
5010 if (src_in_host) {
5011 if (user_memset)
5012 skip_host_mem_pin = true;
5013
5014 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5015 dir = DMA_TO_DEVICE;
5016 addr = le64_to_cpu(user_dma_pkt->src_addr);
5017 } else {
5018 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5019 dir = DMA_FROM_DEVICE;
5020 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5021 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5022 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5023 }
5024
5025 if (skip_host_mem_pin)
5026 parser->patched_cb_size += sizeof(*user_dma_pkt);
5027 else
5028 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5029 addr, dir);
5030
5031 return rc;
5032 }
5033
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5034 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5035 struct hl_cs_parser *parser,
5036 struct packet_lin_dma *user_dma_pkt)
5037 {
5038 bool src_in_host = false;
5039 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5040 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5041 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5042
5043 dev_dbg(hdev->dev, "DMA packet details:\n");
5044 dev_dbg(hdev->dev, "source == 0x%llx\n",
5045 le64_to_cpu(user_dma_pkt->src_addr));
5046 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5047 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5048
5049 /*
5050 * Special handling for DMA with size 0. Bypass all validations
5051 * because no transactions will be done except for WR_COMP, which
5052 * is not a security issue
5053 */
5054 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5055 parser->patched_cb_size += sizeof(*user_dma_pkt);
5056 return 0;
5057 }
5058
5059 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5060 src_in_host = true;
5061
5062 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5063 src_in_host);
5064 }
5065
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5066 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5067 struct hl_cs_parser *parser,
5068 struct packet_load_and_exe *user_pkt)
5069 {
5070 u32 cfg;
5071
5072 cfg = le32_to_cpu(user_pkt->cfg);
5073
5074 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5075 dev_err(hdev->dev,
5076 "User not allowed to use Load and Execute\n");
5077 return -EPERM;
5078 }
5079
5080 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5081
5082 return 0;
5083 }
5084
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5085 static int gaudi_validate_cb(struct hl_device *hdev,
5086 struct hl_cs_parser *parser, bool is_mmu)
5087 {
5088 u32 cb_parsed_length = 0;
5089 int rc = 0;
5090
5091 parser->patched_cb_size = 0;
5092
5093 /* cb_user_size is more than 0 so loop will always be executed */
5094 while (cb_parsed_length < parser->user_cb_size) {
5095 enum packet_id pkt_id;
5096 u16 pkt_size;
5097 struct gaudi_packet *user_pkt;
5098
5099 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5100
5101 pkt_id = (enum packet_id) (
5102 (le64_to_cpu(user_pkt->header) &
5103 PACKET_HEADER_PACKET_ID_MASK) >>
5104 PACKET_HEADER_PACKET_ID_SHIFT);
5105
5106 if (!validate_packet_id(pkt_id)) {
5107 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5108 rc = -EINVAL;
5109 break;
5110 }
5111
5112 pkt_size = gaudi_packet_sizes[pkt_id];
5113 cb_parsed_length += pkt_size;
5114 if (cb_parsed_length > parser->user_cb_size) {
5115 dev_err(hdev->dev,
5116 "packet 0x%x is out of CB boundary\n", pkt_id);
5117 rc = -EINVAL;
5118 break;
5119 }
5120
5121 switch (pkt_id) {
5122 case PACKET_MSG_PROT:
5123 dev_err(hdev->dev,
5124 "User not allowed to use MSG_PROT\n");
5125 rc = -EPERM;
5126 break;
5127
5128 case PACKET_CP_DMA:
5129 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5130 rc = -EPERM;
5131 break;
5132
5133 case PACKET_STOP:
5134 dev_err(hdev->dev, "User not allowed to use STOP\n");
5135 rc = -EPERM;
5136 break;
5137
5138 case PACKET_WREG_BULK:
5139 dev_err(hdev->dev,
5140 "User not allowed to use WREG_BULK\n");
5141 rc = -EPERM;
5142 break;
5143
5144 case PACKET_LOAD_AND_EXE:
5145 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5146 (struct packet_load_and_exe *) user_pkt);
5147 break;
5148
5149 case PACKET_LIN_DMA:
5150 parser->contains_dma_pkt = true;
5151 if (is_mmu)
5152 parser->patched_cb_size += pkt_size;
5153 else
5154 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5155 (struct packet_lin_dma *) user_pkt);
5156 break;
5157
5158 case PACKET_WREG_32:
5159 case PACKET_MSG_LONG:
5160 case PACKET_MSG_SHORT:
5161 case PACKET_REPEAT:
5162 case PACKET_FENCE:
5163 case PACKET_NOP:
5164 case PACKET_ARB_POINT:
5165 parser->patched_cb_size += pkt_size;
5166 break;
5167
5168 default:
5169 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5170 pkt_id);
5171 rc = -EINVAL;
5172 break;
5173 }
5174
5175 if (rc)
5176 break;
5177 }
5178
5179 /*
5180 * The new CB should have space at the end for two MSG_PROT packets:
5181 * 1. Optional NOP padding for cacheline alignment
5182 * 2. A packet that will act as a completion packet
5183 * 3. A packet that will generate MSI interrupt
5184 */
5185 if (parser->completion)
5186 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5187 parser->patched_cb_size);
5188
5189 return rc;
5190 }
5191
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5192 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5193 struct hl_cs_parser *parser,
5194 struct packet_lin_dma *user_dma_pkt,
5195 struct packet_lin_dma *new_dma_pkt,
5196 u32 *new_dma_pkt_size)
5197 {
5198 struct hl_userptr *userptr;
5199 struct scatterlist *sg, *sg_next_iter;
5200 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5201 u64 len, len_next;
5202 dma_addr_t dma_addr, dma_addr_next;
5203 u64 device_memory_addr, addr;
5204 enum dma_data_direction dir;
5205 struct sg_table *sgt;
5206 bool src_in_host = false;
5207 bool skip_host_mem_pin = false;
5208 bool user_memset;
5209
5210 ctl = le32_to_cpu(user_dma_pkt->ctl);
5211
5212 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5213 src_in_host = true;
5214
5215 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5216 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5217
5218 if (src_in_host) {
5219 addr = le64_to_cpu(user_dma_pkt->src_addr);
5220 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5221 dir = DMA_TO_DEVICE;
5222 if (user_memset)
5223 skip_host_mem_pin = true;
5224 } else {
5225 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5226 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5227 dir = DMA_FROM_DEVICE;
5228 }
5229
5230 if ((!skip_host_mem_pin) &&
5231 (!hl_userptr_is_pinned(hdev, addr,
5232 le32_to_cpu(user_dma_pkt->tsize),
5233 parser->job_userptr_list, &userptr))) {
5234 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5235 addr, user_dma_pkt->tsize);
5236 return -EFAULT;
5237 }
5238
5239 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5240 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5241 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5242 return 0;
5243 }
5244
5245 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5246
5247 sgt = userptr->sgt;
5248 dma_desc_cnt = 0;
5249
5250 for_each_sgtable_dma_sg(sgt, sg, count) {
5251 len = sg_dma_len(sg);
5252 dma_addr = sg_dma_address(sg);
5253
5254 if (len == 0)
5255 break;
5256
5257 while ((count + 1) < sgt->nents) {
5258 sg_next_iter = sg_next(sg);
5259 len_next = sg_dma_len(sg_next_iter);
5260 dma_addr_next = sg_dma_address(sg_next_iter);
5261
5262 if (len_next == 0)
5263 break;
5264
5265 if ((dma_addr + len == dma_addr_next) &&
5266 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5267 len += len_next;
5268 count++;
5269 sg = sg_next_iter;
5270 } else {
5271 break;
5272 }
5273 }
5274
5275 ctl = le32_to_cpu(user_dma_pkt->ctl);
5276 if (likely(dma_desc_cnt))
5277 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5278 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5279 new_dma_pkt->ctl = cpu_to_le32(ctl);
5280 new_dma_pkt->tsize = cpu_to_le32(len);
5281
5282 if (dir == DMA_TO_DEVICE) {
5283 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5284 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5285 } else {
5286 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5287 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5288 }
5289
5290 if (!user_memset)
5291 device_memory_addr += len;
5292 dma_desc_cnt++;
5293 new_dma_pkt++;
5294 }
5295
5296 if (!dma_desc_cnt) {
5297 dev_err(hdev->dev,
5298 "Error of 0 SG entries when patching DMA packet\n");
5299 return -EFAULT;
5300 }
5301
5302 /* Fix the last dma packet - wrcomp must be as user set it */
5303 new_dma_pkt--;
5304 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5305
5306 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5307
5308 return 0;
5309 }
5310
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5311 static int gaudi_patch_cb(struct hl_device *hdev,
5312 struct hl_cs_parser *parser)
5313 {
5314 u32 cb_parsed_length = 0;
5315 u32 cb_patched_cur_length = 0;
5316 int rc = 0;
5317
5318 /* cb_user_size is more than 0 so loop will always be executed */
5319 while (cb_parsed_length < parser->user_cb_size) {
5320 enum packet_id pkt_id;
5321 u16 pkt_size;
5322 u32 new_pkt_size = 0;
5323 struct gaudi_packet *user_pkt, *kernel_pkt;
5324
5325 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5326 kernel_pkt = parser->patched_cb->kernel_address +
5327 cb_patched_cur_length;
5328
5329 pkt_id = (enum packet_id) (
5330 (le64_to_cpu(user_pkt->header) &
5331 PACKET_HEADER_PACKET_ID_MASK) >>
5332 PACKET_HEADER_PACKET_ID_SHIFT);
5333
5334 if (!validate_packet_id(pkt_id)) {
5335 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5336 rc = -EINVAL;
5337 break;
5338 }
5339
5340 pkt_size = gaudi_packet_sizes[pkt_id];
5341 cb_parsed_length += pkt_size;
5342 if (cb_parsed_length > parser->user_cb_size) {
5343 dev_err(hdev->dev,
5344 "packet 0x%x is out of CB boundary\n", pkt_id);
5345 rc = -EINVAL;
5346 break;
5347 }
5348
5349 switch (pkt_id) {
5350 case PACKET_LIN_DMA:
5351 rc = gaudi_patch_dma_packet(hdev, parser,
5352 (struct packet_lin_dma *) user_pkt,
5353 (struct packet_lin_dma *) kernel_pkt,
5354 &new_pkt_size);
5355 cb_patched_cur_length += new_pkt_size;
5356 break;
5357
5358 case PACKET_MSG_PROT:
5359 dev_err(hdev->dev,
5360 "User not allowed to use MSG_PROT\n");
5361 rc = -EPERM;
5362 break;
5363
5364 case PACKET_CP_DMA:
5365 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5366 rc = -EPERM;
5367 break;
5368
5369 case PACKET_STOP:
5370 dev_err(hdev->dev, "User not allowed to use STOP\n");
5371 rc = -EPERM;
5372 break;
5373
5374 case PACKET_WREG_32:
5375 case PACKET_WREG_BULK:
5376 case PACKET_MSG_LONG:
5377 case PACKET_MSG_SHORT:
5378 case PACKET_REPEAT:
5379 case PACKET_FENCE:
5380 case PACKET_NOP:
5381 case PACKET_ARB_POINT:
5382 case PACKET_LOAD_AND_EXE:
5383 memcpy(kernel_pkt, user_pkt, pkt_size);
5384 cb_patched_cur_length += pkt_size;
5385 break;
5386
5387 default:
5388 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5389 pkt_id);
5390 rc = -EINVAL;
5391 break;
5392 }
5393
5394 if (rc)
5395 break;
5396 }
5397
5398 return rc;
5399 }
5400
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5401 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5402 struct hl_cs_parser *parser)
5403 {
5404 u64 handle;
5405 u32 patched_cb_size;
5406 struct hl_cb *user_cb;
5407 int rc;
5408
5409 /*
5410 * The new CB should have space at the end for two MSG_PROT packets:
5411 * 1. Optional NOP padding for cacheline alignment
5412 * 2. A packet that will act as a completion packet
5413 * 3. A packet that will generate MSI interrupt
5414 */
5415 if (parser->completion)
5416 parser->patched_cb_size = parser->user_cb_size +
5417 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5418 else
5419 parser->patched_cb_size = parser->user_cb_size;
5420
5421 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5422 parser->patched_cb_size, false, false,
5423 &handle);
5424
5425 if (rc) {
5426 dev_err(hdev->dev,
5427 "Failed to allocate patched CB for DMA CS %d\n",
5428 rc);
5429 return rc;
5430 }
5431
5432 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5433 /* hl_cb_get should never fail */
5434 if (!parser->patched_cb) {
5435 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5436 rc = -EFAULT;
5437 goto out;
5438 }
5439
5440 /*
5441 * We are protected from overflow because the check
5442 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5443 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5444 *
5445 * There is no option to reach here without going through that check because:
5446 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5447 * an external queue.
5448 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5449 */
5450 memcpy(parser->patched_cb->kernel_address,
5451 parser->user_cb->kernel_address,
5452 parser->user_cb_size);
5453
5454 patched_cb_size = parser->patched_cb_size;
5455
5456 /* Validate patched CB instead of user CB */
5457 user_cb = parser->user_cb;
5458 parser->user_cb = parser->patched_cb;
5459 rc = gaudi_validate_cb(hdev, parser, true);
5460 parser->user_cb = user_cb;
5461
5462 if (rc) {
5463 hl_cb_put(parser->patched_cb);
5464 goto out;
5465 }
5466
5467 if (patched_cb_size != parser->patched_cb_size) {
5468 dev_err(hdev->dev, "user CB size mismatch\n");
5469 hl_cb_put(parser->patched_cb);
5470 rc = -EINVAL;
5471 goto out;
5472 }
5473
5474 out:
5475 /*
5476 * Always call cb destroy here because we still have 1 reference
5477 * to it by calling cb_get earlier. After the job will be completed,
5478 * cb_put will release it, but here we want to remove it from the
5479 * idr
5480 */
5481 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5482
5483 return rc;
5484 }
5485
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5486 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5487 struct hl_cs_parser *parser)
5488 {
5489 u64 handle;
5490 int rc;
5491
5492 rc = gaudi_validate_cb(hdev, parser, false);
5493
5494 if (rc)
5495 goto free_userptr;
5496
5497 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5498 parser->patched_cb_size, false, false,
5499 &handle);
5500 if (rc) {
5501 dev_err(hdev->dev,
5502 "Failed to allocate patched CB for DMA CS %d\n", rc);
5503 goto free_userptr;
5504 }
5505
5506 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5507 /* hl_cb_get should never fail here */
5508 if (!parser->patched_cb) {
5509 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5510 rc = -EFAULT;
5511 goto out;
5512 }
5513
5514 rc = gaudi_patch_cb(hdev, parser);
5515
5516 if (rc)
5517 hl_cb_put(parser->patched_cb);
5518
5519 out:
5520 /*
5521 * Always call cb destroy here because we still have 1 reference
5522 * to it by calling cb_get earlier. After the job will be completed,
5523 * cb_put will release it, but here we want to remove it from the
5524 * idr
5525 */
5526 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5527
5528 free_userptr:
5529 if (rc)
5530 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5531 return rc;
5532 }
5533
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5534 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5535 struct hl_cs_parser *parser)
5536 {
5537 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5538 struct gaudi_device *gaudi = hdev->asic_specific;
5539 u32 nic_queue_offset, nic_mask_q_id;
5540
5541 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5542 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5543 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5544 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5545
5546 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5547 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5548 return -EINVAL;
5549 }
5550 }
5551
5552 /* For internal queue jobs just check if CB address is valid */
5553 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5554 parser->user_cb_size,
5555 asic_prop->sram_user_base_address,
5556 asic_prop->sram_end_address))
5557 return 0;
5558
5559 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5560 parser->user_cb_size,
5561 asic_prop->dram_user_base_address,
5562 asic_prop->dram_end_address))
5563 return 0;
5564
5565 /* PMMU and HPMMU addresses are equal, check only one of them */
5566 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5567 parser->user_cb_size,
5568 asic_prop->pmmu.start_addr,
5569 asic_prop->pmmu.end_addr))
5570 return 0;
5571
5572 dev_err(hdev->dev,
5573 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5574 parser->user_cb, parser->user_cb_size);
5575
5576 return -EFAULT;
5577 }
5578
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5579 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5580 {
5581 struct gaudi_device *gaudi = hdev->asic_specific;
5582
5583 if (parser->queue_type == QUEUE_TYPE_INT)
5584 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5585
5586 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5587 return gaudi_parse_cb_mmu(hdev, parser);
5588 else
5589 return gaudi_parse_cb_no_mmu(hdev, parser);
5590 }
5591
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5592 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5593 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5594 u32 msi_vec, bool eb)
5595 {
5596 struct gaudi_device *gaudi = hdev->asic_specific;
5597 struct packet_msg_prot *cq_pkt;
5598 struct packet_nop *cq_padding;
5599 u64 msi_addr;
5600 u32 tmp;
5601
5602 cq_padding = kernel_address + original_len;
5603 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5604
5605 while ((void *)cq_padding < (void *)cq_pkt) {
5606 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5607 cq_padding++;
5608 }
5609
5610 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5611 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5612
5613 if (eb)
5614 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5615
5616 cq_pkt->ctl = cpu_to_le32(tmp);
5617 cq_pkt->value = cpu_to_le32(cq_val);
5618 cq_pkt->addr = cpu_to_le64(cq_addr);
5619
5620 cq_pkt++;
5621
5622 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5623 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5624 cq_pkt->ctl = cpu_to_le32(tmp);
5625 cq_pkt->value = cpu_to_le32(1);
5626
5627 if (gaudi->multi_msi_mode)
5628 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5629 else
5630 msi_addr = mmPCIE_CORE_MSI_REQ;
5631
5632 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5633 }
5634
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5635 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5636 {
5637 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5638 }
5639
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5640 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5641 u32 size, u64 val)
5642 {
5643 struct packet_lin_dma *lin_dma_pkt;
5644 struct hl_cs_job *job;
5645 u32 cb_size, ctl, err_cause;
5646 struct hl_cb *cb;
5647 int rc;
5648
5649 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5650 if (!cb)
5651 return -EFAULT;
5652
5653 lin_dma_pkt = cb->kernel_address;
5654 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5655 cb_size = sizeof(*lin_dma_pkt);
5656
5657 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5658 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5659 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5660 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5661 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5662
5663 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5664 lin_dma_pkt->src_addr = cpu_to_le64(val);
5665 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5666 lin_dma_pkt->tsize = cpu_to_le32(size);
5667
5668 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5669 if (!job) {
5670 dev_err(hdev->dev, "Failed to allocate a new job\n");
5671 rc = -ENOMEM;
5672 goto release_cb;
5673 }
5674
5675 /* Verify DMA is OK */
5676 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5677 if (err_cause && !hdev->init_done) {
5678 dev_dbg(hdev->dev,
5679 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5680 err_cause);
5681 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5682 }
5683
5684 job->id = 0;
5685 job->user_cb = cb;
5686 atomic_inc(&job->user_cb->cs_cnt);
5687 job->user_cb_size = cb_size;
5688 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5689 job->patched_cb = job->user_cb;
5690 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5691
5692 hl_debugfs_add_job(hdev, job);
5693
5694 rc = gaudi_send_job_on_qman0(hdev, job);
5695 hl_debugfs_remove_job(hdev, job);
5696 kfree(job);
5697 atomic_dec(&cb->cs_cnt);
5698
5699 /* Verify DMA is OK */
5700 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5701 if (err_cause) {
5702 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5703 rc = -EIO;
5704 if (!hdev->init_done) {
5705 dev_dbg(hdev->dev,
5706 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5707 err_cause);
5708 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5709 }
5710 }
5711
5712 release_cb:
5713 hl_cb_put(cb);
5714 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5715
5716 return rc;
5717 }
5718
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5719 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5720 u32 num_regs, u32 val)
5721 {
5722 struct packet_msg_long *pkt;
5723 struct hl_cs_job *job;
5724 u32 cb_size, ctl;
5725 struct hl_cb *cb;
5726 int i, rc;
5727
5728 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5729
5730 if (cb_size > SZ_2M) {
5731 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5732 return -ENOMEM;
5733 }
5734
5735 cb = hl_cb_kernel_create(hdev, cb_size, false);
5736 if (!cb)
5737 return -EFAULT;
5738
5739 pkt = cb->kernel_address;
5740
5741 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5742 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5743 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5744 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5745 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5746
5747 for (i = 0; i < num_regs ; i++, pkt++) {
5748 pkt->ctl = cpu_to_le32(ctl);
5749 pkt->value = cpu_to_le32(val);
5750 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5751 }
5752
5753 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5754 if (!job) {
5755 dev_err(hdev->dev, "Failed to allocate a new job\n");
5756 rc = -ENOMEM;
5757 goto release_cb;
5758 }
5759
5760 job->id = 0;
5761 job->user_cb = cb;
5762 atomic_inc(&job->user_cb->cs_cnt);
5763 job->user_cb_size = cb_size;
5764 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5765 job->patched_cb = job->user_cb;
5766 job->job_cb_size = cb_size;
5767
5768 hl_debugfs_add_job(hdev, job);
5769
5770 rc = gaudi_send_job_on_qman0(hdev, job);
5771 hl_debugfs_remove_job(hdev, job);
5772 kfree(job);
5773 atomic_dec(&cb->cs_cnt);
5774
5775 release_cb:
5776 hl_cb_put(cb);
5777 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5778
5779 return rc;
5780 }
5781
gaudi_restore_sm_registers(struct hl_device * hdev)5782 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5783 {
5784 u64 base_addr;
5785 u32 num_regs;
5786 int rc;
5787
5788 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5789 num_regs = NUM_OF_SOB_IN_BLOCK;
5790 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5791 if (rc) {
5792 dev_err(hdev->dev, "failed resetting SM registers");
5793 return -ENOMEM;
5794 }
5795
5796 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5797 num_regs = NUM_OF_SOB_IN_BLOCK;
5798 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5799 if (rc) {
5800 dev_err(hdev->dev, "failed resetting SM registers");
5801 return -ENOMEM;
5802 }
5803
5804 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5805 num_regs = NUM_OF_SOB_IN_BLOCK;
5806 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5807 if (rc) {
5808 dev_err(hdev->dev, "failed resetting SM registers");
5809 return -ENOMEM;
5810 }
5811
5812 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5813 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5814 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5815 if (rc) {
5816 dev_err(hdev->dev, "failed resetting SM registers");
5817 return -ENOMEM;
5818 }
5819
5820 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5821 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5822 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5823 if (rc) {
5824 dev_err(hdev->dev, "failed resetting SM registers");
5825 return -ENOMEM;
5826 }
5827
5828 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5829 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5830 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5831 if (rc) {
5832 dev_err(hdev->dev, "failed resetting SM registers");
5833 return -ENOMEM;
5834 }
5835
5836 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5837 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5838 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5839 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5840 if (rc) {
5841 dev_err(hdev->dev, "failed resetting SM registers");
5842 return -ENOMEM;
5843 }
5844
5845 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5846 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5847 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5848 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5849 if (rc) {
5850 dev_err(hdev->dev, "failed resetting SM registers");
5851 return -ENOMEM;
5852 }
5853
5854 return 0;
5855 }
5856
gaudi_restore_dma_registers(struct hl_device * hdev)5857 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5858 {
5859 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5860 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5861 int i;
5862
5863 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5864 u64 sob_addr = CFG_BASE +
5865 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5866 (i * sob_delta);
5867 u32 dma_offset = i * DMA_CORE_OFFSET;
5868
5869 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5870 lower_32_bits(sob_addr));
5871 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5872 upper_32_bits(sob_addr));
5873 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5874
5875 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5876 * modified by the user for SRAM reduction
5877 */
5878 if (i > 1)
5879 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5880 0x00000001);
5881 }
5882 }
5883
gaudi_restore_qm_registers(struct hl_device * hdev)5884 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5885 {
5886 u32 qman_offset;
5887 int i;
5888
5889 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5890 qman_offset = i * DMA_QMAN_OFFSET;
5891 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5892 }
5893
5894 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5895 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5896 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5897 }
5898
5899 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5900 qman_offset = i * TPC_QMAN_OFFSET;
5901 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5902 }
5903
5904 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5905 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5906 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5907 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5908 }
5909 }
5910
gaudi_restore_user_registers(struct hl_device * hdev)5911 static int gaudi_restore_user_registers(struct hl_device *hdev)
5912 {
5913 int rc;
5914
5915 rc = gaudi_restore_sm_registers(hdev);
5916 if (rc)
5917 return rc;
5918
5919 gaudi_restore_dma_registers(hdev);
5920 gaudi_restore_qm_registers(hdev);
5921
5922 return 0;
5923 }
5924
gaudi_context_switch(struct hl_device * hdev,u32 asid)5925 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5926 {
5927 return 0;
5928 }
5929
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5930 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5931 {
5932 u32 size = hdev->asic_prop.mmu_pgt_size +
5933 hdev->asic_prop.mmu_cache_mng_size;
5934 struct gaudi_device *gaudi = hdev->asic_specific;
5935 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5936
5937 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5938 return 0;
5939
5940 return gaudi_memset_device_memory(hdev, addr, size, 0);
5941 }
5942
gaudi_restore_phase_topology(struct hl_device * hdev)5943 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5944 {
5945
5946 }
5947
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5948 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5949 u32 size_to_dma, dma_addr_t dma_addr)
5950 {
5951 u32 err_cause, val;
5952 u64 dma_offset;
5953 int rc;
5954
5955 dma_offset = dma_id * DMA_CORE_OFFSET;
5956
5957 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5958 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5959 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5960 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5961 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5962 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5963 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5964
5965 rc = hl_poll_timeout(
5966 hdev,
5967 mmDMA0_CORE_STS0 + dma_offset,
5968 val,
5969 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5970 0,
5971 1000000);
5972
5973 if (rc) {
5974 dev_err(hdev->dev,
5975 "DMA %d timed-out during reading of 0x%llx\n",
5976 dma_id, addr);
5977 return -EIO;
5978 }
5979
5980 /* Verify DMA is OK */
5981 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982 if (err_cause) {
5983 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5984 dev_dbg(hdev->dev,
5985 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5986 err_cause);
5987 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5988
5989 return -EIO;
5990 }
5991
5992 return 0;
5993 }
5994
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5995 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5996 void *blob_addr)
5997 {
5998 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5999 u32 qm_glbl_sts0, qm_cgm_sts;
6000 u64 dma_offset, qm_offset;
6001 dma_addr_t dma_addr;
6002 void *kernel_addr;
6003 bool is_eng_idle;
6004 int rc = 0, dma_id;
6005
6006 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6007
6008 if (!kernel_addr)
6009 return -ENOMEM;
6010
6011 hdev->asic_funcs->hw_queues_lock(hdev);
6012
6013 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6014 dma_offset = dma_id * DMA_CORE_OFFSET;
6015 qm_offset = dma_id * DMA_QMAN_OFFSET;
6016 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6017 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6018 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6019 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6020 IS_DMA_IDLE(dma_core_sts0);
6021
6022 if (!is_eng_idle) {
6023 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6024 dma_offset = dma_id * DMA_CORE_OFFSET;
6025 qm_offset = dma_id * DMA_QMAN_OFFSET;
6026 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6027 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6028 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6029 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6030 IS_DMA_IDLE(dma_core_sts0);
6031
6032 if (!is_eng_idle) {
6033 dev_err_ratelimited(hdev->dev,
6034 "Can't read via DMA because it is BUSY\n");
6035 rc = -EAGAIN;
6036 goto out;
6037 }
6038 }
6039
6040 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6041 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6042 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6043
6044 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6045 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6046 * ASID
6047 */
6048 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6049
6050 /* Verify DMA is OK */
6051 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6052 if (err_cause) {
6053 dev_dbg(hdev->dev,
6054 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6055 err_cause);
6056 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6057 }
6058
6059 pos = 0;
6060 size_left = size;
6061 size_to_dma = SZ_2M;
6062
6063 while (size_left > 0) {
6064
6065 if (size_left < SZ_2M)
6066 size_to_dma = size_left;
6067
6068 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6069 dma_addr);
6070 if (rc)
6071 break;
6072
6073 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6074
6075 if (size_left <= SZ_2M)
6076 break;
6077
6078 pos += SZ_2M;
6079 addr += SZ_2M;
6080 size_left -= SZ_2M;
6081 }
6082
6083 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6084 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6085 * ASID
6086 */
6087 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6088 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6089
6090 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6091
6092 out:
6093 hdev->asic_funcs->hw_queues_unlock(hdev);
6094
6095 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6096
6097 return rc;
6098 }
6099
gaudi_read_pte(struct hl_device * hdev,u64 addr)6100 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6101 {
6102 struct gaudi_device *gaudi = hdev->asic_specific;
6103
6104 if (hdev->reset_info.hard_reset_pending)
6105 return U64_MAX;
6106
6107 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6108 (addr - gaudi->hbm_bar_cur_addr));
6109 }
6110
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6111 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6112 {
6113 struct gaudi_device *gaudi = hdev->asic_specific;
6114
6115 if (hdev->reset_info.hard_reset_pending)
6116 return;
6117
6118 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6119 (addr - gaudi->hbm_bar_cur_addr));
6120 }
6121
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6122 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6123 {
6124 /* mask to zero the MMBP and ASID bits */
6125 WREG32_AND(reg, ~0x7FF);
6126 WREG32_OR(reg, asid);
6127 }
6128
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6129 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6130 {
6131 struct gaudi_device *gaudi = hdev->asic_specific;
6132
6133 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6134 return;
6135
6136 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6137 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6138 return;
6139 }
6140
6141 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6146
6147 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152
6153 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158
6159 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164
6165 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6170
6171 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176
6177 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182
6183 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6188
6189 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6197
6198 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6205
6206 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6213
6214 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6216 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6217 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6218 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6219 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6220 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6221
6222 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6223 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6224 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6225 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6226 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6227 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6228 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6229
6230 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6231 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6232 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6233 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6234 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6235 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6236 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6237
6238 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6239 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6240 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6241 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6242 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6243 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6244 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6245
6246 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6247 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6248 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6249 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6250 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6251 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6252 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6253
6254 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6255 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6256 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6257 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6258 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6259 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6260 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6261
6262 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6263 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6264 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6265 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6266 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6267 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6268 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6269 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6270 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6271 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6272
6273 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6274 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6275 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6276 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6277 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6278 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6279 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6280 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6281 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6282 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6283 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6284 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6285
6286 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6287 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6288 asid);
6289 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6290 asid);
6291 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6292 asid);
6293 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6294 asid);
6295 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6296 asid);
6297 }
6298
6299 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6300 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6301 asid);
6302 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6303 asid);
6304 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6305 asid);
6306 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6307 asid);
6308 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6309 asid);
6310 }
6311
6312 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6313 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6314 asid);
6315 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6316 asid);
6317 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6318 asid);
6319 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6320 asid);
6321 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6322 asid);
6323 }
6324
6325 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6326 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6327 asid);
6328 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6329 asid);
6330 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6331 asid);
6332 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6333 asid);
6334 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6335 asid);
6336 }
6337
6338 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6339 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6340 asid);
6341 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6342 asid);
6343 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6344 asid);
6345 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6346 asid);
6347 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6348 asid);
6349 }
6350
6351 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6352 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6353 asid);
6354 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6355 asid);
6356 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6357 asid);
6358 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6359 asid);
6360 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6361 asid);
6362 }
6363
6364 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6365 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6366 asid);
6367 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6368 asid);
6369 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6370 asid);
6371 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6372 asid);
6373 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6374 asid);
6375 }
6376
6377 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6378 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6379 asid);
6380 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6381 asid);
6382 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6383 asid);
6384 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6385 asid);
6386 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6387 asid);
6388 }
6389
6390 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6391 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6392 asid);
6393 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6394 asid);
6395 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6396 asid);
6397 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6398 asid);
6399 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6400 asid);
6401 }
6402
6403 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6404 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6405 asid);
6406 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6407 asid);
6408 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6409 asid);
6410 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6411 asid);
6412 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6413 asid);
6414 }
6415
6416 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6417 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6418 }
6419
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6420 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6421 struct hl_cs_job *job)
6422 {
6423 struct packet_msg_prot *fence_pkt;
6424 u32 *fence_ptr;
6425 dma_addr_t fence_dma_addr;
6426 struct hl_cb *cb;
6427 u32 tmp, timeout, dma_offset;
6428 int rc;
6429
6430 if (hdev->pldm)
6431 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6432 else
6433 timeout = HL_DEVICE_TIMEOUT_USEC;
6434
6435 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6436 dev_err_ratelimited(hdev->dev,
6437 "Can't send driver job on QMAN0 because the device is not idle\n");
6438 return -EBUSY;
6439 }
6440
6441 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6442 if (!fence_ptr) {
6443 dev_err(hdev->dev,
6444 "Failed to allocate fence memory for QMAN0\n");
6445 return -ENOMEM;
6446 }
6447
6448 cb = job->patched_cb;
6449
6450 fence_pkt = cb->kernel_address +
6451 job->job_cb_size - sizeof(struct packet_msg_prot);
6452
6453 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6454 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6455 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6456
6457 fence_pkt->ctl = cpu_to_le32(tmp);
6458 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6459 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6460
6461 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6462
6463 WREG32(mmDMA0_CORE_PROT + dma_offset,
6464 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465
6466 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6467 job->job_cb_size, cb->bus_address);
6468 if (rc) {
6469 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6470 goto free_fence_ptr;
6471 }
6472
6473 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6474 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6475 timeout, true);
6476
6477 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6478
6479 if (rc == -ETIMEDOUT) {
6480 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6481 goto free_fence_ptr;
6482 }
6483
6484 free_fence_ptr:
6485 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6486
6487 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6488 return rc;
6489 }
6490
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6491 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6492 {
6493 if (event_type >= GAUDI_EVENT_SIZE)
6494 goto event_not_supported;
6495
6496 if (!gaudi_irq_map_table[event_type].valid)
6497 goto event_not_supported;
6498
6499 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6500
6501 return;
6502
6503 event_not_supported:
6504 snprintf(desc, size, "N/A");
6505 }
6506
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,s32 * engine_id_1,s32 * engine_id_2)6507 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6508 bool is_write, s32 *engine_id_1,
6509 s32 *engine_id_2)
6510 {
6511 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6512
6513 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6514 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6515
6516 switch (x_y) {
6517 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6518 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6519 dma_id[0] = 0;
6520 dma_id[1] = 2;
6521 break;
6522 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6523 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6524 dma_id[0] = 1;
6525 dma_id[1] = 3;
6526 break;
6527 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6528 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6529 dma_id[0] = 4;
6530 dma_id[1] = 6;
6531 break;
6532 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6533 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6534 dma_id[0] = 5;
6535 dma_id[1] = 7;
6536 break;
6537 default:
6538 goto unknown_initiator;
6539 }
6540
6541 for (i = 0 ; i < 2 ; i++) {
6542 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6543 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6544 }
6545
6546 switch (x_y) {
6547 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6548 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6549 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6550 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6551 return "DMA0";
6552 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6553 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6554 return "DMA2";
6555 } else {
6556 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6557 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6558 return "DMA0 or DMA2";
6559 }
6560 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6561 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6562 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6563 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6564 return "DMA1";
6565 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6566 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6567 return "DMA3";
6568 } else {
6569 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6570 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6571 return "DMA1 or DMA3";
6572 }
6573 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6574 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6575 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6576 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6577 return "DMA4";
6578 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6579 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6580 return "DMA6";
6581 } else {
6582 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6583 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6584 return "DMA4 or DMA6";
6585 }
6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6587 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6588 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6589 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6590 return "DMA5";
6591 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6592 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6593 return "DMA7";
6594 } else {
6595 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6596 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6597 return "DMA5 or DMA7";
6598 }
6599 }
6600
6601 unknown_initiator:
6602 return "unknown initiator";
6603 }
6604
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u32 * engine_id_1,u32 * engine_id_2)6605 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6606 u32 *engine_id_1, u32 *engine_id_2)
6607 {
6608 u32 val, x_y, axi_id;
6609
6610 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6611 RREG32(mmMMU_UP_RAZWI_READ_ID);
6612 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6613 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6614 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6615 RAZWI_INITIATOR_AXI_ID_SHIFT);
6616
6617 switch (x_y) {
6618 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6619 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6620 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6621 return "TPC0";
6622 }
6623 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6624 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6625 return "NIC0";
6626 }
6627 break;
6628 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6629 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6630 return "TPC1";
6631 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6632 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6633 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6634 return "MME0";
6635 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6636 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6637 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6638 return "MME1";
6639 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6640 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6641 return "TPC2";
6642 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6643 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6644 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6645 return "TPC3";
6646 }
6647 /* PCI, CPU or PSOC does not have engine id*/
6648 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6649 return "PCI";
6650 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6651 return "CPU";
6652 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6653 return "PSOC";
6654 break;
6655 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6656 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6657 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6658 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6659 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6660 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6661 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6662 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6663 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6664 engine_id_1, engine_id_2);
6665 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6666 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6667 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6668 return "TPC4";
6669 }
6670 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6671 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6672 return "NIC1";
6673 }
6674 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6675 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6676 return "NIC2";
6677 }
6678 break;
6679 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6680 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6681 return "TPC5";
6682 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6683 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6684 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6685 return "MME2";
6686 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6687 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6688 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6689 return "MME3";
6690 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6691 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6692 return "TPC6";
6693 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6694 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6695 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6696 return "TPC7";
6697 }
6698 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6699 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6700 return "NIC4";
6701 }
6702 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6703 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6704 return "NIC5";
6705 }
6706 break;
6707 default:
6708 break;
6709 }
6710
6711 dev_err(hdev->dev,
6712 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6713 val,
6714 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6715 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6716 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6717 RAZWI_INITIATOR_AXI_ID_MASK);
6718
6719 return "unknown initiator";
6720 }
6721
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u32 * engine_id_1,u32 * engine_id_2)6722 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
6723 u32 *engine_id_2)
6724 {
6725
6726 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6727 dev_err_ratelimited(hdev->dev,
6728 "RAZWI event caused by illegal write of %s\n",
6729 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6730 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6731 }
6732
6733 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6734 dev_err_ratelimited(hdev->dev,
6735 "RAZWI event caused by illegal read of %s\n",
6736 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6737 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6738 }
6739 }
6740
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u8 * type)6741 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
6742 {
6743 struct gaudi_device *gaudi = hdev->asic_specific;
6744 u32 val;
6745
6746 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6747 return;
6748
6749 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6750 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6751 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6752 *addr <<= 32;
6753 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6754
6755 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6756 *type = HL_RAZWI_PAGE_FAULT;
6757
6758 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6759 }
6760
6761 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6762 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6763 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6764 *addr <<= 32;
6765 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6766
6767 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6768 *type = HL_RAZWI_MMU_ACCESS_ERROR;
6769
6770 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6771 }
6772 }
6773
6774 /*
6775 * +-------------------+------------------------------------------------------+
6776 * | Configuration Reg | Description |
6777 * | Address | |
6778 * +-------------------+------------------------------------------------------+
6779 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6780 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6781 * | |0xF34 memory wrappers 63:32 |
6782 * | |0xF38 memory wrappers 95:64 |
6783 * | |0xF3C memory wrappers 127:96 |
6784 * +-------------------+------------------------------------------------------+
6785 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6786 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6787 * | |0xF44 memory wrappers 63:32 |
6788 * | |0xF48 memory wrappers 95:64 |
6789 * | |0xF4C memory wrappers 127:96 |
6790 * +-------------------+------------------------------------------------------+
6791 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6792 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6793 struct ecc_info_extract_params *params, u64 *ecc_address,
6794 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6795 {
6796 u32 i, num_mem_regs, reg, err_bit;
6797 u64 err_addr, err_word = 0;
6798
6799 num_mem_regs = params->num_memories / 32 +
6800 ((params->num_memories % 32) ? 1 : 0);
6801
6802 if (params->block_address >= CFG_BASE)
6803 params->block_address -= CFG_BASE;
6804
6805 if (params->derr)
6806 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6807 else
6808 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6809
6810 /* Set invalid wrapper index */
6811 *memory_wrapper_idx = 0xFF;
6812
6813 /* Iterate through memory wrappers, a single bit must be set */
6814 for (i = 0 ; i < num_mem_regs ; i++) {
6815 err_addr += i * 4;
6816 err_word = RREG32(err_addr);
6817 if (err_word) {
6818 err_bit = __ffs(err_word);
6819 *memory_wrapper_idx = err_bit + (32 * i);
6820 break;
6821 }
6822 }
6823
6824 if (*memory_wrapper_idx == 0xFF) {
6825 dev_err(hdev->dev, "ECC error information cannot be found\n");
6826 return -EINVAL;
6827 }
6828
6829 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6830 *memory_wrapper_idx);
6831
6832 *ecc_address =
6833 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6834 *ecc_syndrom =
6835 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6836
6837 /* Clear error indication */
6838 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6839 if (params->derr)
6840 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6841 else
6842 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6843
6844 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6845
6846 return 0;
6847 }
6848
6849 /*
6850 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6851 *
6852 * @idx: the current pi/ci value
6853 * @q_len: the queue length (power of 2)
6854 *
6855 * @return the cyclically decremented index
6856 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6857 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6858 {
6859 u32 mask = q_len - 1;
6860
6861 /*
6862 * modular decrement is equivalent to adding (queue_size -1)
6863 * later we take LSBs to make sure the value is in the
6864 * range [0, queue_len - 1]
6865 */
6866 return (idx + q_len - 1) & mask;
6867 }
6868
6869 /**
6870 * gaudi_handle_sw_config_stream_data - print SW config stream data
6871 *
6872 * @hdev: pointer to the habanalabs device structure
6873 * @stream: the QMAN's stream
6874 * @qman_base: base address of QMAN registers block
6875 * @event_mask: mask of the last events occurred
6876 */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6877 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6878 u64 qman_base, u64 event_mask)
6879 {
6880 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6881 u32 cq_ptr_lo_off, size;
6882
6883 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6884
6885 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6886 stream * cq_ptr_lo_off;
6887 cq_ptr_hi = cq_ptr_lo +
6888 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6889 cq_tsize = cq_ptr_lo +
6890 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6891
6892 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6893 size = RREG32(cq_tsize);
6894 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6895 stream, cq_ptr, size);
6896
6897 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6898 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6899 hdev->captured_err_info.undef_opcode.cq_size = size;
6900 hdev->captured_err_info.undef_opcode.stream_id = stream;
6901 }
6902 }
6903
6904 /**
6905 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6906 *
6907 * @hdev: pointer to the habanalabs device structure
6908 * @qid_base: first QID of the QMAN (out of 4 streams)
6909 * @stream: the QMAN's stream
6910 * @qman_base: base address of QMAN registers block
6911 * @event_mask: mask of the last events occurred
6912 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6913 */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6914 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6915 u32 stream, u64 qman_base,
6916 u64 event_mask,
6917 bool pr_sw_conf)
6918 {
6919 u32 ci, qm_ci_stream_off, queue_len;
6920 struct hl_hw_queue *q;
6921 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6922 int i;
6923
6924 q = &hdev->kernel_queues[qid_base + stream];
6925
6926 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6927 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6928 stream * qm_ci_stream_off;
6929
6930 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6931 q->int_queue_len : HL_QUEUE_LENGTH;
6932
6933 hdev->asic_funcs->hw_queues_lock(hdev);
6934
6935 if (pr_sw_conf)
6936 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6937
6938 ci = RREG32(pq_ci);
6939
6940 /* we should start printing form ci -1 */
6941 ci = gaudi_queue_idx_dec(ci, queue_len);
6942 memset(addr, 0, sizeof(addr));
6943
6944 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6945 struct hl_bd *bd;
6946 u32 len;
6947
6948 bd = q->kernel_address;
6949 bd += ci;
6950
6951 len = le32_to_cpu(bd->len);
6952 /* len 0 means uninitialized entry- break */
6953 if (!len)
6954 break;
6955
6956 addr[i] = le64_to_cpu(bd->ptr);
6957
6958 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6959 stream, ci, addr[i], len);
6960
6961 /* get previous ci, wrap if needed */
6962 ci = gaudi_queue_idx_dec(ci, queue_len);
6963 }
6964
6965 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6966 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6967 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6968
6969 if (arr_idx == 0) {
6970 undef_opcode->timestamp = ktime_get();
6971 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6972 }
6973
6974 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6975 undef_opcode->cb_addr_streams_len++;
6976 }
6977
6978 hdev->asic_funcs->hw_queues_unlock(hdev);
6979 }
6980
6981 /**
6982 * handle_qman_data_on_err - extract QMAN data on error
6983 *
6984 * @hdev: pointer to the habanalabs device structure
6985 * @qid_base: first QID of the QMAN (out of 4 streams)
6986 * @stream: the QMAN's stream
6987 * @qman_base: base address of QMAN registers block
6988 * @event_mask: mask of the last events occurred
6989 *
6990 * This function attempt to exatract as much data as possible on QMAN error.
6991 * On upper CP print the SW config stream data and last 8 PQEs.
6992 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6993 */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6994 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6995 u32 stream, u64 qman_base, u64 event_mask)
6996 {
6997 u32 i;
6998
6999 if (stream != QMAN_STREAMS) {
7000 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7001 qman_base, event_mask, true);
7002 return;
7003 }
7004
7005 /* handle Lower-CP */
7006 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7007
7008 for (i = 0; i < QMAN_STREAMS; i++)
7009 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7010 qman_base, event_mask, false);
7011 }
7012
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)7013 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7014 const char *qm_name,
7015 u64 qman_base,
7016 u32 qid_base,
7017 u64 *event_mask)
7018 {
7019 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7020 u64 glbl_sts_addr, arb_err_addr;
7021 char reg_desc[32];
7022
7023 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7024 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7025
7026 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7027 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7028 glbl_sts_clr_val = 0;
7029 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7030
7031 if (!glbl_sts_val)
7032 continue;
7033
7034 if (i == QMAN_STREAMS)
7035 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7036 else
7037 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7038
7039 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7040 if (glbl_sts_val & BIT(j)) {
7041 dev_err_ratelimited(hdev->dev,
7042 "%s %s. err cause: %s\n",
7043 qm_name, reg_desc,
7044 gaudi_qman_error_cause[j]);
7045 glbl_sts_clr_val |= BIT(j);
7046 }
7047 }
7048 /* check for undefined opcode */
7049 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7050 hdev->captured_err_info.undef_opcode.write_enable) {
7051 memset(&hdev->captured_err_info.undef_opcode, 0,
7052 sizeof(hdev->captured_err_info.undef_opcode));
7053
7054 hdev->captured_err_info.undef_opcode.write_enable = false;
7055 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7056 }
7057
7058 /* Write 1 clear errors */
7059 if (!hdev->stop_on_err)
7060 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7061 else
7062 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7063 }
7064
7065 arb_err_val = RREG32(arb_err_addr);
7066
7067 if (!arb_err_val)
7068 return;
7069
7070 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7071 if (arb_err_val & BIT(j)) {
7072 dev_err_ratelimited(hdev->dev,
7073 "%s ARB_ERR. err cause: %s\n",
7074 qm_name,
7075 gaudi_qman_arb_error_cause[j]);
7076 }
7077 }
7078 }
7079
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7080 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7081 struct hl_eq_sm_sei_data *sei_data)
7082 {
7083 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7084
7085 /* Flip the bits as the enum is ordered in the opposite way */
7086 index = (index ^ 0x3) & 0x3;
7087
7088 switch (sei_data->sei_cause) {
7089 case SM_SEI_SO_OVERFLOW:
7090 dev_err_ratelimited(hdev->dev,
7091 "%s SEI Error: SOB Group %u overflow/underflow",
7092 gaudi_sync_manager_names[index],
7093 le32_to_cpu(sei_data->sei_log));
7094 break;
7095 case SM_SEI_LBW_4B_UNALIGNED:
7096 dev_err_ratelimited(hdev->dev,
7097 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7098 gaudi_sync_manager_names[index],
7099 le32_to_cpu(sei_data->sei_log));
7100 break;
7101 case SM_SEI_AXI_RESPONSE_ERR:
7102 dev_err_ratelimited(hdev->dev,
7103 "%s SEI Error: AXI ID %u response error",
7104 gaudi_sync_manager_names[index],
7105 le32_to_cpu(sei_data->sei_log));
7106 break;
7107 default:
7108 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7109 le32_to_cpu(sei_data->sei_log));
7110 break;
7111 }
7112 }
7113
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7114 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7115 struct hl_eq_ecc_data *ecc_data)
7116 {
7117 struct ecc_info_extract_params params;
7118 u64 ecc_address = 0, ecc_syndrom = 0;
7119 u8 index, memory_wrapper_idx = 0;
7120 bool extract_info_from_fw;
7121 int rc;
7122
7123 if (hdev->asic_prop.fw_security_enabled) {
7124 extract_info_from_fw = true;
7125 goto extract_ecc_info;
7126 }
7127
7128 switch (event_type) {
7129 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7130 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7131 extract_info_from_fw = true;
7132 break;
7133 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7134 index = event_type - GAUDI_EVENT_TPC0_SERR;
7135 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7136 params.num_memories = 90;
7137 params.derr = false;
7138 extract_info_from_fw = false;
7139 break;
7140 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7141 index = event_type - GAUDI_EVENT_TPC0_DERR;
7142 params.block_address =
7143 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7144 params.num_memories = 90;
7145 params.derr = true;
7146 extract_info_from_fw = false;
7147 break;
7148 case GAUDI_EVENT_MME0_ACC_SERR:
7149 case GAUDI_EVENT_MME1_ACC_SERR:
7150 case GAUDI_EVENT_MME2_ACC_SERR:
7151 case GAUDI_EVENT_MME3_ACC_SERR:
7152 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7153 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7154 params.num_memories = 128;
7155 params.derr = false;
7156 extract_info_from_fw = false;
7157 break;
7158 case GAUDI_EVENT_MME0_ACC_DERR:
7159 case GAUDI_EVENT_MME1_ACC_DERR:
7160 case GAUDI_EVENT_MME2_ACC_DERR:
7161 case GAUDI_EVENT_MME3_ACC_DERR:
7162 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7163 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7164 params.num_memories = 128;
7165 params.derr = true;
7166 extract_info_from_fw = false;
7167 break;
7168 case GAUDI_EVENT_MME0_SBAB_SERR:
7169 case GAUDI_EVENT_MME1_SBAB_SERR:
7170 case GAUDI_EVENT_MME2_SBAB_SERR:
7171 case GAUDI_EVENT_MME3_SBAB_SERR:
7172 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7173 params.block_address =
7174 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7175 params.num_memories = 33;
7176 params.derr = false;
7177 extract_info_from_fw = false;
7178 break;
7179 case GAUDI_EVENT_MME0_SBAB_DERR:
7180 case GAUDI_EVENT_MME1_SBAB_DERR:
7181 case GAUDI_EVENT_MME2_SBAB_DERR:
7182 case GAUDI_EVENT_MME3_SBAB_DERR:
7183 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7184 params.block_address =
7185 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7186 params.num_memories = 33;
7187 params.derr = true;
7188 extract_info_from_fw = false;
7189 break;
7190 default:
7191 return;
7192 }
7193
7194 extract_ecc_info:
7195 if (extract_info_from_fw) {
7196 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7197 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7198 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7199 } else {
7200 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7201 &ecc_syndrom, &memory_wrapper_idx);
7202 if (rc)
7203 return;
7204 }
7205
7206 dev_err(hdev->dev,
7207 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7208 ecc_address, ecc_syndrom, memory_wrapper_idx);
7209 }
7210
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7211 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7212 {
7213 u64 qman_base;
7214 char desc[32];
7215 u32 qid_base;
7216 u8 index;
7217
7218 switch (event_type) {
7219 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7220 index = event_type - GAUDI_EVENT_TPC0_QM;
7221 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7222 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7223 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7224 break;
7225 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7226 if (event_type == GAUDI_EVENT_MME0_QM) {
7227 index = 0;
7228 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7229 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7230 index = 2;
7231 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7232 }
7233 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7234 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7235 break;
7236 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7237 index = event_type - GAUDI_EVENT_DMA0_QM;
7238 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7239 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7240 if (index > 1)
7241 qid_base++;
7242 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7243 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7244 break;
7245 case GAUDI_EVENT_NIC0_QM0:
7246 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7247 qman_base = mmNIC0_QM0_BASE;
7248 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7249 break;
7250 case GAUDI_EVENT_NIC0_QM1:
7251 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7252 qman_base = mmNIC0_QM1_BASE;
7253 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7254 break;
7255 case GAUDI_EVENT_NIC1_QM0:
7256 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7257 qman_base = mmNIC1_QM0_BASE;
7258 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7259 break;
7260 case GAUDI_EVENT_NIC1_QM1:
7261 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7262 qman_base = mmNIC1_QM1_BASE;
7263 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7264 break;
7265 case GAUDI_EVENT_NIC2_QM0:
7266 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7267 qman_base = mmNIC2_QM0_BASE;
7268 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7269 break;
7270 case GAUDI_EVENT_NIC2_QM1:
7271 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7272 qman_base = mmNIC2_QM1_BASE;
7273 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7274 break;
7275 case GAUDI_EVENT_NIC3_QM0:
7276 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7277 qman_base = mmNIC3_QM0_BASE;
7278 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7279 break;
7280 case GAUDI_EVENT_NIC3_QM1:
7281 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7282 qman_base = mmNIC3_QM1_BASE;
7283 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7284 break;
7285 case GAUDI_EVENT_NIC4_QM0:
7286 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7287 qman_base = mmNIC4_QM0_BASE;
7288 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7289 break;
7290 case GAUDI_EVENT_NIC4_QM1:
7291 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7292 qman_base = mmNIC4_QM1_BASE;
7293 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7294 break;
7295 default:
7296 return;
7297 }
7298
7299 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7300 }
7301
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7302 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7303 bool razwi)
7304 {
7305 u32 engine_id_1, engine_id_2;
7306 char desc[64] = "";
7307 u64 razwi_addr = 0;
7308 u8 razwi_type;
7309 int rc;
7310
7311 /*
7312 * Init engine id by default as not valid and only if razwi initiated from engine with
7313 * engine id it will get valid value.
7314 * Init razwi type to default, will be changed only if razwi caused by page fault of
7315 * MMU access error
7316 */
7317 engine_id_1 = U16_MAX;
7318 engine_id_2 = U16_MAX;
7319 razwi_type = U8_MAX;
7320
7321 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7322 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7323 event_type, desc);
7324
7325 if (razwi) {
7326 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7327 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7328
7329 /* In case it's the first razwi, save its parameters*/
7330 rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
7331 if (rc) {
7332 hdev->captured_err_info.razwi.timestamp = ktime_get();
7333 hdev->captured_err_info.razwi.addr = razwi_addr;
7334 hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
7335 hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
7336 /*
7337 * If first engine id holds non valid value the razwi initiator
7338 * does not have engine id
7339 */
7340 hdev->captured_err_info.razwi.non_engine_initiator =
7341 (engine_id_1 == U16_MAX);
7342 hdev->captured_err_info.razwi.type = razwi_type;
7343
7344 }
7345 }
7346 }
7347
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7348 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7349 struct cpucp_pkt_sync_err *sync_err)
7350 {
7351 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7352
7353 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7354 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7355 }
7356
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7357 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7358 struct hl_eq_fw_alive *fw_alive)
7359 {
7360 dev_err(hdev->dev,
7361 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7362 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7363 "Minor" : "Critical", fw_alive->process_id,
7364 fw_alive->thread_id, fw_alive->uptime_seconds);
7365 }
7366
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7367 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7368 void *data)
7369 {
7370 char desc[64] = "", *type;
7371 struct eq_nic_sei_event *eq_nic_sei = data;
7372 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7373
7374 switch (eq_nic_sei->axi_error_cause) {
7375 case RXB:
7376 type = "RXB";
7377 break;
7378 case RXE:
7379 type = "RXE";
7380 break;
7381 case TXS:
7382 type = "TXS";
7383 break;
7384 case TXE:
7385 type = "TXE";
7386 break;
7387 case QPC_RESP:
7388 type = "QPC_RESP";
7389 break;
7390 case NON_AXI_ERR:
7391 type = "NON_AXI_ERR";
7392 break;
7393 case TMR:
7394 type = "TMR";
7395 break;
7396 default:
7397 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7398 eq_nic_sei->axi_error_cause);
7399 type = "N/A";
7400 break;
7401 }
7402
7403 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7404 eq_nic_sei->id);
7405 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7406 event_type, desc);
7407 }
7408
gaudi_compute_reset_late_init(struct hl_device * hdev)7409 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7410 {
7411 /* GAUDI doesn't support any reset except hard-reset */
7412 return -EPERM;
7413 }
7414
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7415 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7416 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7417 {
7418 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7419 int rc = 0;
7420
7421 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7422 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7423 if (!hbm_ecc_data) {
7424 dev_err(hdev->dev, "No FW ECC data");
7425 return 0;
7426 }
7427
7428 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7429 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7430 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7431 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7433 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7435 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7437 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7438 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7439 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7440 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7441 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7442
7443 dev_err(hdev->dev,
7444 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7445 device, ch, wr_par, rd_par, ca_par, serr, derr);
7446 dev_err(hdev->dev,
7447 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7448 device, ch, hbm_ecc_data->first_addr, type,
7449 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7450 hbm_ecc_data->dec_cnt);
7451 return 0;
7452 }
7453
7454 if (hdev->asic_prop.fw_security_enabled) {
7455 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7456 return 0;
7457 }
7458
7459 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7460 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7461 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7462 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7463 if (val) {
7464 rc = -EIO;
7465 dev_err(hdev->dev,
7466 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7467 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7468 (val >> 2) & 0x1, (val >> 3) & 0x1,
7469 (val >> 4) & 0x1);
7470
7471 val2 = RREG32(base + ch * 0x1000 + 0x060);
7472 dev_err(hdev->dev,
7473 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7474 device, ch * 2,
7475 RREG32(base + ch * 0x1000 + 0x064),
7476 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7477 (val2 & 0xFF0000) >> 16,
7478 (val2 & 0xFF000000) >> 24);
7479 }
7480
7481 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7482 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7483 if (val) {
7484 rc = -EIO;
7485 dev_err(hdev->dev,
7486 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7487 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7488 (val >> 2) & 0x1, (val >> 3) & 0x1,
7489 (val >> 4) & 0x1);
7490
7491 val2 = RREG32(base + ch * 0x1000 + 0x070);
7492 dev_err(hdev->dev,
7493 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7494 device, ch * 2 + 1,
7495 RREG32(base + ch * 0x1000 + 0x074),
7496 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7497 (val2 & 0xFF0000) >> 16,
7498 (val2 & 0xFF000000) >> 24);
7499 }
7500
7501 /* Clear interrupts */
7502 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7503 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7504 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7505 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7506 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7507 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7508 }
7509
7510 val = RREG32(base + 0x8F30);
7511 val2 = RREG32(base + 0x8F34);
7512 if (val | val2) {
7513 rc = -EIO;
7514 dev_err(hdev->dev,
7515 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7516 device, val, val2);
7517 }
7518 val = RREG32(base + 0x8F40);
7519 val2 = RREG32(base + 0x8F44);
7520 if (val | val2) {
7521 rc = -EIO;
7522 dev_err(hdev->dev,
7523 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7524 device, val, val2);
7525 }
7526
7527 return rc;
7528 }
7529
gaudi_hbm_event_to_dev(u16 hbm_event_type)7530 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7531 {
7532 switch (hbm_event_type) {
7533 case GAUDI_EVENT_HBM0_SPI_0:
7534 case GAUDI_EVENT_HBM0_SPI_1:
7535 return 0;
7536 case GAUDI_EVENT_HBM1_SPI_0:
7537 case GAUDI_EVENT_HBM1_SPI_1:
7538 return 1;
7539 case GAUDI_EVENT_HBM2_SPI_0:
7540 case GAUDI_EVENT_HBM2_SPI_1:
7541 return 2;
7542 case GAUDI_EVENT_HBM3_SPI_0:
7543 case GAUDI_EVENT_HBM3_SPI_1:
7544 return 3;
7545 default:
7546 break;
7547 }
7548
7549 /* Should never happen */
7550 return 0;
7551 }
7552
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7553 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7554 char *interrupt_name)
7555 {
7556 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7557 bool soft_reset_required = false;
7558
7559 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7560 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7561
7562 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7563 if (tpc_interrupts_cause & BIT(i)) {
7564 dev_err_ratelimited(hdev->dev,
7565 "TPC%d_%s interrupt cause: %s\n",
7566 tpc_id, interrupt_name,
7567 gaudi_tpc_interrupts_cause[i]);
7568 /* If this is QM error, we need to soft-reset */
7569 if (i == 15)
7570 soft_reset_required = true;
7571 }
7572
7573 /* Clear interrupts */
7574 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7575
7576 return soft_reset_required;
7577 }
7578
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7579 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7580 {
7581 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7582 }
7583
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7584 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7585 {
7586 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7587 }
7588
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7589 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
7590 {
7591 ktime_t zero_time = ktime_set(0, 0);
7592
7593 mutex_lock(&hdev->clk_throttling.lock);
7594
7595 switch (event_type) {
7596 case GAUDI_EVENT_FIX_POWER_ENV_S:
7597 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7598 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7599 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7600 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7601 dev_info_ratelimited(hdev->dev,
7602 "Clock throttling due to power consumption\n");
7603 break;
7604
7605 case GAUDI_EVENT_FIX_POWER_ENV_E:
7606 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7607 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7608 dev_info_ratelimited(hdev->dev,
7609 "Power envelop is safe, back to optimal clock\n");
7610 break;
7611
7612 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7613 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7614 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7615 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7616 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7617 dev_info_ratelimited(hdev->dev,
7618 "Clock throttling due to overheating\n");
7619 break;
7620
7621 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7622 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7623 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7624 dev_info_ratelimited(hdev->dev,
7625 "Thermal envelop is safe, back to optimal clock\n");
7626 break;
7627
7628 default:
7629 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7630 event_type);
7631 break;
7632 }
7633
7634 mutex_unlock(&hdev->clk_throttling.lock);
7635 }
7636
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7637 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7638 {
7639 struct gaudi_device *gaudi = hdev->asic_specific;
7640 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7641 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7642 u32 fw_fatal_err_flag = 0, flags = 0;
7643 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7644 >> EQ_CTL_EVENT_TYPE_SHIFT);
7645 bool reset_required, reset_direct = false;
7646 u8 cause;
7647 int rc;
7648
7649 if (event_type >= GAUDI_EVENT_SIZE) {
7650 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7651 event_type, GAUDI_EVENT_SIZE - 1);
7652 return;
7653 }
7654
7655 gaudi->events_stat[event_type]++;
7656 gaudi->events_stat_aggregate[event_type]++;
7657
7658 switch (event_type) {
7659 case GAUDI_EVENT_PCIE_CORE_DERR:
7660 case GAUDI_EVENT_PCIE_IF_DERR:
7661 case GAUDI_EVENT_PCIE_PHY_DERR:
7662 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7663 case GAUDI_EVENT_MME0_ACC_DERR:
7664 case GAUDI_EVENT_MME0_SBAB_DERR:
7665 case GAUDI_EVENT_MME1_ACC_DERR:
7666 case GAUDI_EVENT_MME1_SBAB_DERR:
7667 case GAUDI_EVENT_MME2_ACC_DERR:
7668 case GAUDI_EVENT_MME2_SBAB_DERR:
7669 case GAUDI_EVENT_MME3_ACC_DERR:
7670 case GAUDI_EVENT_MME3_SBAB_DERR:
7671 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7672 fallthrough;
7673 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7674 case GAUDI_EVENT_PSOC_MEM_DERR:
7675 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7676 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7677 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7678 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7679 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7680 case GAUDI_EVENT_MMU_DERR:
7681 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7682 gaudi_print_irq_info(hdev, event_type, true);
7683 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7684 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7685 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7686 goto reset_device;
7687
7688 case GAUDI_EVENT_GIC500:
7689 case GAUDI_EVENT_AXI_ECC:
7690 case GAUDI_EVENT_L2_RAM_ECC:
7691 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7692 gaudi_print_irq_info(hdev, event_type, false);
7693 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7694 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7695 goto reset_device;
7696
7697 case GAUDI_EVENT_HBM0_SPI_0:
7698 case GAUDI_EVENT_HBM1_SPI_0:
7699 case GAUDI_EVENT_HBM2_SPI_0:
7700 case GAUDI_EVENT_HBM3_SPI_0:
7701 gaudi_print_irq_info(hdev, event_type, false);
7702 gaudi_hbm_read_interrupts(hdev,
7703 gaudi_hbm_event_to_dev(event_type),
7704 &eq_entry->hbm_ecc_data);
7705 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7706 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7707 goto reset_device;
7708
7709 case GAUDI_EVENT_HBM0_SPI_1:
7710 case GAUDI_EVENT_HBM1_SPI_1:
7711 case GAUDI_EVENT_HBM2_SPI_1:
7712 case GAUDI_EVENT_HBM3_SPI_1:
7713 gaudi_print_irq_info(hdev, event_type, false);
7714 gaudi_hbm_read_interrupts(hdev,
7715 gaudi_hbm_event_to_dev(event_type),
7716 &eq_entry->hbm_ecc_data);
7717 hl_fw_unmask_irq(hdev, event_type);
7718 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7719 break;
7720
7721 case GAUDI_EVENT_TPC0_DEC:
7722 case GAUDI_EVENT_TPC1_DEC:
7723 case GAUDI_EVENT_TPC2_DEC:
7724 case GAUDI_EVENT_TPC3_DEC:
7725 case GAUDI_EVENT_TPC4_DEC:
7726 case GAUDI_EVENT_TPC5_DEC:
7727 case GAUDI_EVENT_TPC6_DEC:
7728 case GAUDI_EVENT_TPC7_DEC:
7729 /* In TPC DEC event, notify on TPC assertion. While there isn't
7730 * a specific event for assertion yet, the FW generates TPC DEC event.
7731 * The SW upper layer will inspect an internal mapped area to indicate
7732 * if the event is a TPC Assertion or a "real" TPC DEC.
7733 */
7734 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7735 gaudi_print_irq_info(hdev, event_type, true);
7736 reset_required = gaudi_tpc_read_interrupts(hdev,
7737 tpc_dec_event_to_tpc_id(event_type),
7738 "AXI_SLV_DEC_Error");
7739 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7740 if (reset_required) {
7741 dev_err(hdev->dev, "reset required due to %s\n",
7742 gaudi_irq_map_table[event_type].name);
7743
7744 reset_direct = true;
7745 goto reset_device;
7746 } else {
7747 hl_fw_unmask_irq(hdev, event_type);
7748 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7749 }
7750 break;
7751
7752 case GAUDI_EVENT_TPC0_KRN_ERR:
7753 case GAUDI_EVENT_TPC1_KRN_ERR:
7754 case GAUDI_EVENT_TPC2_KRN_ERR:
7755 case GAUDI_EVENT_TPC3_KRN_ERR:
7756 case GAUDI_EVENT_TPC4_KRN_ERR:
7757 case GAUDI_EVENT_TPC5_KRN_ERR:
7758 case GAUDI_EVENT_TPC6_KRN_ERR:
7759 case GAUDI_EVENT_TPC7_KRN_ERR:
7760 gaudi_print_irq_info(hdev, event_type, true);
7761 reset_required = gaudi_tpc_read_interrupts(hdev,
7762 tpc_krn_event_to_tpc_id(event_type),
7763 "KRN_ERR");
7764 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7765 if (reset_required) {
7766 dev_err(hdev->dev, "reset required due to %s\n",
7767 gaudi_irq_map_table[event_type].name);
7768
7769 reset_direct = true;
7770 goto reset_device;
7771 } else {
7772 hl_fw_unmask_irq(hdev, event_type);
7773 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7774 }
7775 break;
7776
7777 case GAUDI_EVENT_PCIE_CORE_SERR:
7778 case GAUDI_EVENT_PCIE_IF_SERR:
7779 case GAUDI_EVENT_PCIE_PHY_SERR:
7780 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7781 case GAUDI_EVENT_MME0_ACC_SERR:
7782 case GAUDI_EVENT_MME0_SBAB_SERR:
7783 case GAUDI_EVENT_MME1_ACC_SERR:
7784 case GAUDI_EVENT_MME1_SBAB_SERR:
7785 case GAUDI_EVENT_MME2_ACC_SERR:
7786 case GAUDI_EVENT_MME2_SBAB_SERR:
7787 case GAUDI_EVENT_MME3_ACC_SERR:
7788 case GAUDI_EVENT_MME3_SBAB_SERR:
7789 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7790 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7791 case GAUDI_EVENT_PSOC_MEM_SERR:
7792 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7793 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7794 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7795 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7796 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7797 fallthrough;
7798 case GAUDI_EVENT_MMU_SERR:
7799 gaudi_print_irq_info(hdev, event_type, true);
7800 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7801 hl_fw_unmask_irq(hdev, event_type);
7802 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7803 break;
7804
7805 case GAUDI_EVENT_PCIE_DEC:
7806 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7807 case GAUDI_EVENT_PSOC_AXI_DEC:
7808 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7809 gaudi_print_irq_info(hdev, event_type, true);
7810 hl_fw_unmask_irq(hdev, event_type);
7811 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7812 break;
7813
7814 case GAUDI_EVENT_MMU_PAGE_FAULT:
7815 case GAUDI_EVENT_MMU_WR_PERM:
7816 gaudi_print_irq_info(hdev, event_type, true);
7817 hl_fw_unmask_irq(hdev, event_type);
7818 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7819 break;
7820
7821 case GAUDI_EVENT_MME0_WBC_RSP:
7822 case GAUDI_EVENT_MME0_SBAB0_RSP:
7823 case GAUDI_EVENT_MME1_WBC_RSP:
7824 case GAUDI_EVENT_MME1_SBAB0_RSP:
7825 case GAUDI_EVENT_MME2_WBC_RSP:
7826 case GAUDI_EVENT_MME2_SBAB0_RSP:
7827 case GAUDI_EVENT_MME3_WBC_RSP:
7828 case GAUDI_EVENT_MME3_SBAB0_RSP:
7829 case GAUDI_EVENT_RAZWI_OR_ADC:
7830 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7831 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7832 fallthrough;
7833 case GAUDI_EVENT_NIC0_QM0:
7834 case GAUDI_EVENT_NIC0_QM1:
7835 case GAUDI_EVENT_NIC1_QM0:
7836 case GAUDI_EVENT_NIC1_QM1:
7837 case GAUDI_EVENT_NIC2_QM0:
7838 case GAUDI_EVENT_NIC2_QM1:
7839 case GAUDI_EVENT_NIC3_QM0:
7840 case GAUDI_EVENT_NIC3_QM1:
7841 case GAUDI_EVENT_NIC4_QM0:
7842 case GAUDI_EVENT_NIC4_QM1:
7843 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7844 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7845 gaudi_print_irq_info(hdev, event_type, true);
7846 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7847 hl_fw_unmask_irq(hdev, event_type);
7848 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7849 break;
7850
7851 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7852 gaudi_print_irq_info(hdev, event_type, true);
7853 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7854 goto reset_device;
7855
7856 case GAUDI_EVENT_TPC0_BMON_SPMU:
7857 case GAUDI_EVENT_TPC1_BMON_SPMU:
7858 case GAUDI_EVENT_TPC2_BMON_SPMU:
7859 case GAUDI_EVENT_TPC3_BMON_SPMU:
7860 case GAUDI_EVENT_TPC4_BMON_SPMU:
7861 case GAUDI_EVENT_TPC5_BMON_SPMU:
7862 case GAUDI_EVENT_TPC6_BMON_SPMU:
7863 case GAUDI_EVENT_TPC7_BMON_SPMU:
7864 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7865 gaudi_print_irq_info(hdev, event_type, false);
7866 hl_fw_unmask_irq(hdev, event_type);
7867 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7868 break;
7869
7870 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7871 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7872 hl_fw_unmask_irq(hdev, event_type);
7873 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7874 break;
7875
7876 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7877 gaudi_print_irq_info(hdev, event_type, false);
7878 gaudi_print_sm_sei_info(hdev, event_type,
7879 &eq_entry->sm_sei_data);
7880 rc = hl_state_dump(hdev);
7881 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7882 if (rc)
7883 dev_err(hdev->dev,
7884 "Error during system state dump %d\n", rc);
7885 hl_fw_unmask_irq(hdev, event_type);
7886 break;
7887
7888 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7889 break;
7890
7891 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7892 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7893 gaudi_print_clk_change_info(hdev, event_type);
7894 hl_fw_unmask_irq(hdev, event_type);
7895 break;
7896
7897 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7898 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7899 dev_err(hdev->dev,
7900 "Received high temp H/W interrupt %d (cause %d)\n",
7901 event_type, cause);
7902 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7903 break;
7904
7905 case GAUDI_EVENT_DEV_RESET_REQ:
7906 gaudi_print_irq_info(hdev, event_type, false);
7907 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7908 goto reset_device;
7909
7910 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7911 gaudi_print_irq_info(hdev, event_type, false);
7912 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7913 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7914 goto reset_device;
7915
7916 case GAUDI_EVENT_FW_ALIVE_S:
7917 gaudi_print_irq_info(hdev, event_type, false);
7918 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7919 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7920 goto reset_device;
7921
7922 default:
7923 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7924 event_type);
7925 break;
7926 }
7927
7928 if (event_mask)
7929 hl_notifier_event_send_all(hdev, event_mask);
7930
7931 return;
7932
7933 reset_device:
7934 reset_required = true;
7935
7936 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7937 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7938
7939 /* notify on device unavailable while the reset triggered by fw */
7940 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7941 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7942 } else if (hdev->hard_reset_on_fw_events) {
7943 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7944 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7945 } else {
7946 reset_required = false;
7947 }
7948
7949 /* despite reset doesn't execute. a notification on
7950 * occurred event needs to be sent here
7951 */
7952 hl_notifier_event_send_all(hdev, event_mask);
7953 if (reset_required)
7954 hl_device_reset(hdev, flags);
7955 else
7956 hl_fw_unmask_irq(hdev, event_type);
7957 }
7958
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7959 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7960 {
7961 struct gaudi_device *gaudi = hdev->asic_specific;
7962
7963 if (aggregate) {
7964 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7965 return gaudi->events_stat_aggregate;
7966 }
7967
7968 *size = (u32) sizeof(gaudi->events_stat);
7969 return gaudi->events_stat;
7970 }
7971
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7972 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7973 {
7974 struct gaudi_device *gaudi = hdev->asic_specific;
7975 u32 status, timeout_usec;
7976 int rc;
7977
7978 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7979 hdev->reset_info.hard_reset_pending)
7980 return 0;
7981
7982 if (hdev->pldm)
7983 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7984 else
7985 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7986
7987 /* L0 & L1 invalidation */
7988 WREG32(mmSTLB_INV_PS, 3);
7989 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7990 WREG32(mmSTLB_INV_PS, 2);
7991
7992 rc = hl_poll_timeout(
7993 hdev,
7994 mmSTLB_INV_PS,
7995 status,
7996 !status,
7997 1000,
7998 timeout_usec);
7999
8000 WREG32(mmSTLB_INV_SET, 0);
8001
8002 return rc;
8003 }
8004
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)8005 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8006 bool is_hard, u32 flags,
8007 u32 asid, u64 va, u64 size)
8008 {
8009 /* Treat as invalidate all because there is no range invalidation
8010 * in Gaudi
8011 */
8012 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8013 }
8014
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)8015 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8016 {
8017 u32 status, timeout_usec;
8018 int rc;
8019
8020 if (hdev->pldm)
8021 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8022 else
8023 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8024
8025 WREG32(MMU_ASID, asid);
8026 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8027 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8028 WREG32(MMU_BUSY, 0x80000000);
8029
8030 rc = hl_poll_timeout(
8031 hdev,
8032 MMU_BUSY,
8033 status,
8034 !(status & 0x80000000),
8035 1000,
8036 timeout_usec);
8037
8038 if (rc) {
8039 dev_err(hdev->dev,
8040 "Timeout during MMU hop0 config of asid %d\n", asid);
8041 return rc;
8042 }
8043
8044 return 0;
8045 }
8046
gaudi_send_heartbeat(struct hl_device * hdev)8047 static int gaudi_send_heartbeat(struct hl_device *hdev)
8048 {
8049 struct gaudi_device *gaudi = hdev->asic_specific;
8050
8051 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8052 return 0;
8053
8054 return hl_fw_send_heartbeat(hdev);
8055 }
8056
gaudi_cpucp_info_get(struct hl_device * hdev)8057 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8058 {
8059 struct gaudi_device *gaudi = hdev->asic_specific;
8060 struct asic_fixed_properties *prop = &hdev->asic_prop;
8061 int rc;
8062
8063 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8064 return 0;
8065
8066 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8067 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8068 mmCPU_BOOT_ERR1);
8069 if (rc)
8070 return rc;
8071
8072 if (!strlen(prop->cpucp_info.card_name))
8073 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8074 CARD_NAME_MAX_LEN);
8075
8076 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8077
8078 set_default_power_values(hdev);
8079
8080 return 0;
8081 }
8082
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8083 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8084 struct engines_data *e)
8085 {
8086 struct gaudi_device *gaudi = hdev->asic_specific;
8087 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8088 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8089 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8090 unsigned long *mask = (unsigned long *)mask_arr;
8091 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8092 bool is_idle = true, is_eng_idle, is_slave;
8093 u64 offset;
8094 int i, dma_id, port;
8095
8096 if (e)
8097 hl_engine_data_sprintf(e,
8098 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8099 "--- ------- ------------ ---------- -------------\n");
8100
8101 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8102 dma_id = gaudi_dma_assignment[i];
8103 offset = dma_id * DMA_QMAN_OFFSET;
8104
8105 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8106 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8107 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8108 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8109 IS_DMA_IDLE(dma_core_sts0);
8110 is_idle &= is_eng_idle;
8111
8112 if (mask && !is_eng_idle)
8113 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8114 if (e)
8115 hl_engine_data_sprintf(e, fmt, dma_id,
8116 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8117 qm_cgm_sts, dma_core_sts0);
8118 }
8119
8120 if (e)
8121 hl_engine_data_sprintf(e,
8122 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8123 "--- ------- ------------ ---------- ----------\n");
8124
8125 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8126 offset = i * TPC_QMAN_OFFSET;
8127 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8128 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8129 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8130 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8131 IS_TPC_IDLE(tpc_cfg_sts);
8132 is_idle &= is_eng_idle;
8133
8134 if (mask && !is_eng_idle)
8135 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8136 if (e)
8137 hl_engine_data_sprintf(e, fmt, i,
8138 is_eng_idle ? "Y" : "N",
8139 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8140 }
8141
8142 if (e)
8143 hl_engine_data_sprintf(e,
8144 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8145 "--- ------- ------------ ---------- -----------\n");
8146
8147 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8148 offset = i * MME_QMAN_OFFSET;
8149 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8150 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8151
8152 /* MME 1 & 3 are slaves, no need to check their QMANs */
8153 is_slave = i % 2;
8154 if (!is_slave) {
8155 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8156 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8157 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8158 }
8159
8160 is_idle &= is_eng_idle;
8161
8162 if (mask && !is_eng_idle)
8163 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8164 if (e) {
8165 if (!is_slave)
8166 hl_engine_data_sprintf(e, fmt, i,
8167 is_eng_idle ? "Y" : "N",
8168 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8169 else
8170 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8171 is_eng_idle ? "Y" : "N", "-",
8172 "-", mme_arch_sts);
8173 }
8174 }
8175
8176 if (e)
8177 hl_engine_data_sprintf(e,
8178 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8179 "--- ------- ------------ ----------\n");
8180
8181 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8182 offset = i * NIC_MACRO_QMAN_OFFSET;
8183 port = 2 * i;
8184 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8185 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8186 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8187 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8188 is_idle &= is_eng_idle;
8189
8190 if (mask && !is_eng_idle)
8191 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8192 if (e)
8193 hl_engine_data_sprintf(e, nic_fmt, port,
8194 is_eng_idle ? "Y" : "N",
8195 qm_glbl_sts0, qm_cgm_sts);
8196 }
8197
8198 port = 2 * i + 1;
8199 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8200 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8201 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8202 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8203 is_idle &= is_eng_idle;
8204
8205 if (mask && !is_eng_idle)
8206 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8207 if (e)
8208 hl_engine_data_sprintf(e, nic_fmt, port,
8209 is_eng_idle ? "Y" : "N",
8210 qm_glbl_sts0, qm_cgm_sts);
8211 }
8212 }
8213
8214 if (e)
8215 hl_engine_data_sprintf(e, "\n");
8216
8217 return is_idle;
8218 }
8219
gaudi_hw_queues_lock(struct hl_device * hdev)8220 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8221 __acquires(&gaudi->hw_queues_lock)
8222 {
8223 struct gaudi_device *gaudi = hdev->asic_specific;
8224
8225 spin_lock(&gaudi->hw_queues_lock);
8226 }
8227
gaudi_hw_queues_unlock(struct hl_device * hdev)8228 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8229 __releases(&gaudi->hw_queues_lock)
8230 {
8231 struct gaudi_device *gaudi = hdev->asic_specific;
8232
8233 spin_unlock(&gaudi->hw_queues_lock);
8234 }
8235
gaudi_get_pci_id(struct hl_device * hdev)8236 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8237 {
8238 return hdev->pdev->device;
8239 }
8240
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8241 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8242 size_t max_size)
8243 {
8244 struct gaudi_device *gaudi = hdev->asic_specific;
8245
8246 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8247 return 0;
8248
8249 return hl_fw_get_eeprom_data(hdev, data, max_size);
8250 }
8251
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8252 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8253 {
8254 struct gaudi_device *gaudi = hdev->asic_specific;
8255
8256 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8257 return 0;
8258
8259 return hl_fw_get_monitor_dump(hdev, data);
8260 }
8261
8262 /*
8263 * this function should be used only during initialization and/or after reset,
8264 * when there are no active users.
8265 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8266 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8267 {
8268 u64 kernel_timeout;
8269 u32 status, offset;
8270 int rc;
8271
8272 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8273
8274 if (hdev->pldm)
8275 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8276 else
8277 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8278
8279 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8280 lower_32_bits(tpc_kernel));
8281 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8282 upper_32_bits(tpc_kernel));
8283
8284 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8285 lower_32_bits(tpc_kernel));
8286 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8287 upper_32_bits(tpc_kernel));
8288 /* set a valid LUT pointer, content is of no significance */
8289 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8290 lower_32_bits(tpc_kernel));
8291 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8292 upper_32_bits(tpc_kernel));
8293
8294 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8295 lower_32_bits(CFG_BASE +
8296 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8297
8298 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8299 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8300 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8301 /* wait a bit for the engine to start executing */
8302 usleep_range(1000, 1500);
8303
8304 /* wait until engine has finished executing */
8305 rc = hl_poll_timeout(
8306 hdev,
8307 mmTPC0_CFG_STATUS + offset,
8308 status,
8309 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8310 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8311 1000,
8312 kernel_timeout);
8313
8314 if (rc) {
8315 dev_err(hdev->dev,
8316 "Timeout while waiting for TPC%d icache prefetch\n",
8317 tpc_id);
8318 return -EIO;
8319 }
8320
8321 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8322 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8323
8324 /* wait a bit for the engine to start executing */
8325 usleep_range(1000, 1500);
8326
8327 /* wait until engine has finished executing */
8328 rc = hl_poll_timeout(
8329 hdev,
8330 mmTPC0_CFG_STATUS + offset,
8331 status,
8332 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8333 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8334 1000,
8335 kernel_timeout);
8336
8337 if (rc) {
8338 dev_err(hdev->dev,
8339 "Timeout while waiting for TPC%d vector pipe\n",
8340 tpc_id);
8341 return -EIO;
8342 }
8343
8344 rc = hl_poll_timeout(
8345 hdev,
8346 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8347 status,
8348 (status == 0),
8349 1000,
8350 kernel_timeout);
8351
8352 if (rc) {
8353 dev_err(hdev->dev,
8354 "Timeout while waiting for TPC%d kernel to execute\n",
8355 tpc_id);
8356 return -EIO;
8357 }
8358
8359 return 0;
8360 }
8361
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8362 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8363 struct hl_ctx *ctx)
8364 {
8365 struct gaudi_device *gaudi = hdev->asic_specific;
8366 int min_alloc_order, rc, collective_cb_size;
8367
8368 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8369 return 0;
8370
8371 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8372 HOST_SPACE_INTERNAL_CB_SZ,
8373 &hdev->internal_cb_pool_dma_addr,
8374 GFP_KERNEL | __GFP_ZERO);
8375
8376 if (!hdev->internal_cb_pool_virt_addr)
8377 return -ENOMEM;
8378
8379 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8380 sizeof(struct packet_fence);
8381 min_alloc_order = ilog2(collective_cb_size);
8382
8383 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8384 if (!hdev->internal_cb_pool) {
8385 dev_err(hdev->dev,
8386 "Failed to create internal CB pool\n");
8387 rc = -ENOMEM;
8388 goto free_internal_cb_pool;
8389 }
8390
8391 rc = gen_pool_add(hdev->internal_cb_pool,
8392 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8393 HOST_SPACE_INTERNAL_CB_SZ, -1);
8394 if (rc) {
8395 dev_err(hdev->dev,
8396 "Failed to add memory to internal CB pool\n");
8397 rc = -EFAULT;
8398 goto destroy_internal_cb_pool;
8399 }
8400
8401 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8402 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8403 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8404
8405 if (!hdev->internal_cb_va_base) {
8406 rc = -ENOMEM;
8407 goto destroy_internal_cb_pool;
8408 }
8409
8410 mutex_lock(&hdev->mmu_lock);
8411 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8412 hdev->internal_cb_pool_dma_addr,
8413 HOST_SPACE_INTERNAL_CB_SZ);
8414
8415 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8416 mutex_unlock(&hdev->mmu_lock);
8417
8418 if (rc)
8419 goto unreserve_internal_cb_pool;
8420
8421 return 0;
8422
8423 unreserve_internal_cb_pool:
8424 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8425 HOST_SPACE_INTERNAL_CB_SZ);
8426 destroy_internal_cb_pool:
8427 gen_pool_destroy(hdev->internal_cb_pool);
8428 free_internal_cb_pool:
8429 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8430 hdev->internal_cb_pool_dma_addr);
8431
8432 return rc;
8433 }
8434
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8435 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8436 struct hl_ctx *ctx)
8437 {
8438 struct gaudi_device *gaudi = hdev->asic_specific;
8439
8440 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8441 return;
8442
8443 mutex_lock(&hdev->mmu_lock);
8444 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8445 HOST_SPACE_INTERNAL_CB_SZ);
8446 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8447 HOST_SPACE_INTERNAL_CB_SZ);
8448 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8449 mutex_unlock(&hdev->mmu_lock);
8450
8451 gen_pool_destroy(hdev->internal_cb_pool);
8452
8453 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8454 hdev->internal_cb_pool_dma_addr);
8455 }
8456
gaudi_ctx_init(struct hl_ctx * ctx)8457 static int gaudi_ctx_init(struct hl_ctx *ctx)
8458 {
8459 int rc;
8460
8461 if (ctx->asid == HL_KERNEL_ASID_ID)
8462 return 0;
8463
8464 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8465 if (rc)
8466 return rc;
8467
8468 rc = gaudi_restore_user_registers(ctx->hdev);
8469 if (rc)
8470 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8471
8472 return rc;
8473 }
8474
gaudi_ctx_fini(struct hl_ctx * ctx)8475 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8476 {
8477 if (ctx->asid == HL_KERNEL_ASID_ID)
8478 return;
8479
8480 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8481 }
8482
gaudi_pre_schedule_cs(struct hl_cs * cs)8483 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8484 {
8485 return 0;
8486 }
8487
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8488 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8489 {
8490 return gaudi_cq_assignment[cq_idx];
8491 }
8492
gaudi_get_signal_cb_size(struct hl_device * hdev)8493 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8494 {
8495 return sizeof(struct packet_msg_short) +
8496 sizeof(struct packet_msg_prot) * 2;
8497 }
8498
gaudi_get_wait_cb_size(struct hl_device * hdev)8499 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8500 {
8501 return sizeof(struct packet_msg_short) * 4 +
8502 sizeof(struct packet_fence) +
8503 sizeof(struct packet_msg_prot) * 2;
8504 }
8505
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8506 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8507 {
8508 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8509 }
8510
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8511 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8512 u32 size, bool eb)
8513 {
8514 struct hl_cb *cb = (struct hl_cb *) data;
8515 struct packet_msg_short *pkt;
8516 u32 value, ctl, pkt_size = sizeof(*pkt);
8517
8518 pkt = cb->kernel_address + size;
8519 memset(pkt, 0, pkt_size);
8520
8521 /* Inc by 1, Mode ADD */
8522 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8523 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8524
8525 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8526 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8527 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8528 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8529 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8530 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8531 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8532
8533 pkt->value = cpu_to_le32(value);
8534 pkt->ctl = cpu_to_le32(ctl);
8535
8536 return size + pkt_size;
8537 }
8538
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8539 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8540 u16 addr)
8541 {
8542 u32 ctl, pkt_size = sizeof(*pkt);
8543
8544 memset(pkt, 0, pkt_size);
8545
8546 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8547 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8551 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8552
8553 pkt->value = cpu_to_le32(value);
8554 pkt->ctl = cpu_to_le32(ctl);
8555
8556 return pkt_size;
8557 }
8558
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8559 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8560 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8561 u16 sob_val, u16 mon_id)
8562 {
8563 u64 monitor_base;
8564 u32 ctl, value, pkt_size = sizeof(*pkt);
8565 u16 msg_addr_offset;
8566 u8 mask;
8567
8568 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8569 dev_err(hdev->dev,
8570 "sob_base %u (mask %#x) is not valid\n",
8571 sob_base, sob_mask);
8572 return 0;
8573 }
8574
8575 /*
8576 * monitor_base should be the content of the base0 address registers,
8577 * so it will be added to the msg short offsets
8578 */
8579 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8580
8581 msg_addr_offset =
8582 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8583 monitor_base;
8584
8585 memset(pkt, 0, pkt_size);
8586
8587 /* Monitor config packet: bind the monitor to a sync object */
8588 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8589 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8590 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8591 0); /* GREATER OR EQUAL*/
8592 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8593
8594 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8595 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8596 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8597 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8598 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8599 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8600 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8601
8602 pkt->value = cpu_to_le32(value);
8603 pkt->ctl = cpu_to_le32(ctl);
8604
8605 return pkt_size;
8606 }
8607
gaudi_add_fence_pkt(struct packet_fence * pkt)8608 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8609 {
8610 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8611
8612 memset(pkt, 0, pkt_size);
8613
8614 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8615 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8616 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8617
8618 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8619 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8620 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8621 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8622
8623 pkt->cfg = cpu_to_le32(cfg);
8624 pkt->ctl = cpu_to_le32(ctl);
8625
8626 return pkt_size;
8627 }
8628
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8629 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8630 {
8631 u32 offset, nic_index;
8632
8633 switch (queue_id) {
8634 case GAUDI_QUEUE_ID_DMA_0_0:
8635 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8636 break;
8637 case GAUDI_QUEUE_ID_DMA_0_1:
8638 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8639 break;
8640 case GAUDI_QUEUE_ID_DMA_0_2:
8641 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8642 break;
8643 case GAUDI_QUEUE_ID_DMA_0_3:
8644 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8645 break;
8646 case GAUDI_QUEUE_ID_DMA_1_0:
8647 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8648 break;
8649 case GAUDI_QUEUE_ID_DMA_1_1:
8650 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8651 break;
8652 case GAUDI_QUEUE_ID_DMA_1_2:
8653 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8654 break;
8655 case GAUDI_QUEUE_ID_DMA_1_3:
8656 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8657 break;
8658 case GAUDI_QUEUE_ID_DMA_5_0:
8659 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8660 break;
8661 case GAUDI_QUEUE_ID_DMA_5_1:
8662 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8663 break;
8664 case GAUDI_QUEUE_ID_DMA_5_2:
8665 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8666 break;
8667 case GAUDI_QUEUE_ID_DMA_5_3:
8668 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8669 break;
8670 case GAUDI_QUEUE_ID_TPC_7_0:
8671 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8672 break;
8673 case GAUDI_QUEUE_ID_TPC_7_1:
8674 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8675 break;
8676 case GAUDI_QUEUE_ID_TPC_7_2:
8677 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8678 break;
8679 case GAUDI_QUEUE_ID_TPC_7_3:
8680 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8681 break;
8682 case GAUDI_QUEUE_ID_NIC_0_0:
8683 case GAUDI_QUEUE_ID_NIC_1_0:
8684 case GAUDI_QUEUE_ID_NIC_2_0:
8685 case GAUDI_QUEUE_ID_NIC_3_0:
8686 case GAUDI_QUEUE_ID_NIC_4_0:
8687 case GAUDI_QUEUE_ID_NIC_5_0:
8688 case GAUDI_QUEUE_ID_NIC_6_0:
8689 case GAUDI_QUEUE_ID_NIC_7_0:
8690 case GAUDI_QUEUE_ID_NIC_8_0:
8691 case GAUDI_QUEUE_ID_NIC_9_0:
8692 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8693 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8694 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8695 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8696 break;
8697 case GAUDI_QUEUE_ID_NIC_0_1:
8698 case GAUDI_QUEUE_ID_NIC_1_1:
8699 case GAUDI_QUEUE_ID_NIC_2_1:
8700 case GAUDI_QUEUE_ID_NIC_3_1:
8701 case GAUDI_QUEUE_ID_NIC_4_1:
8702 case GAUDI_QUEUE_ID_NIC_5_1:
8703 case GAUDI_QUEUE_ID_NIC_6_1:
8704 case GAUDI_QUEUE_ID_NIC_7_1:
8705 case GAUDI_QUEUE_ID_NIC_8_1:
8706 case GAUDI_QUEUE_ID_NIC_9_1:
8707 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8708 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8709 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8710 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8711 break;
8712 case GAUDI_QUEUE_ID_NIC_0_2:
8713 case GAUDI_QUEUE_ID_NIC_1_2:
8714 case GAUDI_QUEUE_ID_NIC_2_2:
8715 case GAUDI_QUEUE_ID_NIC_3_2:
8716 case GAUDI_QUEUE_ID_NIC_4_2:
8717 case GAUDI_QUEUE_ID_NIC_5_2:
8718 case GAUDI_QUEUE_ID_NIC_6_2:
8719 case GAUDI_QUEUE_ID_NIC_7_2:
8720 case GAUDI_QUEUE_ID_NIC_8_2:
8721 case GAUDI_QUEUE_ID_NIC_9_2:
8722 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8723 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8724 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8725 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8726 break;
8727 case GAUDI_QUEUE_ID_NIC_0_3:
8728 case GAUDI_QUEUE_ID_NIC_1_3:
8729 case GAUDI_QUEUE_ID_NIC_2_3:
8730 case GAUDI_QUEUE_ID_NIC_3_3:
8731 case GAUDI_QUEUE_ID_NIC_4_3:
8732 case GAUDI_QUEUE_ID_NIC_5_3:
8733 case GAUDI_QUEUE_ID_NIC_6_3:
8734 case GAUDI_QUEUE_ID_NIC_7_3:
8735 case GAUDI_QUEUE_ID_NIC_8_3:
8736 case GAUDI_QUEUE_ID_NIC_9_3:
8737 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8738 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8739 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8740 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8741 break;
8742 default:
8743 return -EINVAL;
8744 }
8745
8746 *addr = CFG_BASE + offset;
8747
8748 return 0;
8749 }
8750
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8751 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8752 {
8753 u64 monitor_base;
8754 u32 size = 0;
8755 u16 msg_addr_offset;
8756
8757 /*
8758 * monitor_base should be the content of the base0 address registers,
8759 * so it will be added to the msg short offsets
8760 */
8761 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8762
8763 /* First monitor config packet: low address of the sync */
8764 msg_addr_offset =
8765 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8766 monitor_base;
8767
8768 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8769 msg_addr_offset);
8770
8771 /* Second monitor config packet: high address of the sync */
8772 msg_addr_offset =
8773 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8774 monitor_base;
8775
8776 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8777 msg_addr_offset);
8778
8779 /*
8780 * Third monitor config packet: the payload, i.e. what to write when the
8781 * sync triggers
8782 */
8783 msg_addr_offset =
8784 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8785 monitor_base;
8786
8787 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8788
8789 return size;
8790 }
8791
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8792 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8793 struct hl_gen_wait_properties *prop)
8794 {
8795 struct hl_cb *cb = (struct hl_cb *) prop->data;
8796 void *buf = cb->kernel_address;
8797 u64 fence_addr = 0;
8798 u32 size = prop->size;
8799
8800 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8801 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8802 prop->q_idx);
8803 return 0;
8804 }
8805
8806 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8807 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8808 prop->sob_mask, prop->sob_val, prop->mon_id);
8809 size += gaudi_add_fence_pkt(buf + size);
8810
8811 return size;
8812 }
8813
gaudi_reset_sob(struct hl_device * hdev,void * data)8814 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8815 {
8816 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8817
8818 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8819 hw_sob->sob_id);
8820
8821 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8822 hw_sob->sob_id * 4, 0);
8823
8824 kref_init(&hw_sob->kref);
8825 }
8826
gaudi_get_device_time(struct hl_device * hdev)8827 static u64 gaudi_get_device_time(struct hl_device *hdev)
8828 {
8829 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8830
8831 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8832 }
8833
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8834 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8835 u32 *block_size, u32 *block_id)
8836 {
8837 return -EPERM;
8838 }
8839
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8840 static int gaudi_block_mmap(struct hl_device *hdev,
8841 struct vm_area_struct *vma,
8842 u32 block_id, u32 block_size)
8843 {
8844 return -EPERM;
8845 }
8846
gaudi_enable_events_from_fw(struct hl_device * hdev)8847 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8848 {
8849 struct cpu_dyn_regs *dyn_regs =
8850 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8851 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8852 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8853 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8854
8855 WREG32(irq_handler_offset,
8856 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8857 }
8858
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8859 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8860 {
8861 return -EINVAL;
8862 }
8863
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8864 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8865 {
8866 switch (pll_idx) {
8867 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8868 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8869 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8870 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8871 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8872 case HL_GAUDI_MME_PLL: return MME_PLL;
8873 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8874 case HL_GAUDI_IF_PLL: return IF_PLL;
8875 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8876 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8877 default: return -EINVAL;
8878 }
8879 }
8880
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8881 static int gaudi_add_sync_to_engine_map_entry(
8882 struct hl_sync_to_engine_map *map, u32 reg_value,
8883 enum hl_sync_engine_type engine_type, u32 engine_id)
8884 {
8885 struct hl_sync_to_engine_map_entry *entry;
8886
8887 /* Reg value represents a partial address of sync object,
8888 * it is used as unique identifier. For this we need to
8889 * clear the cutoff cfg base bits from the value.
8890 */
8891 if (reg_value == 0 || reg_value == 0xffffffff)
8892 return 0;
8893 reg_value -= lower_32_bits(CFG_BASE);
8894
8895 /* create a new hash entry */
8896 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8897 if (!entry)
8898 return -ENOMEM;
8899 entry->engine_type = engine_type;
8900 entry->engine_id = engine_id;
8901 entry->sync_id = reg_value;
8902 hash_add(map->tb, &entry->node, reg_value);
8903
8904 return 0;
8905 }
8906
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8907 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8908 struct hl_sync_to_engine_map *map)
8909 {
8910 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8911 int i, j, rc;
8912 u32 reg_value;
8913
8914 /* Iterate over TPC engines */
8915 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8916
8917 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8918 sds->props[SP_NEXT_TPC] * i);
8919
8920 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8921 ENGINE_TPC, i);
8922 if (rc)
8923 goto free_sync_to_engine_map;
8924 }
8925
8926 /* Iterate over MME engines */
8927 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8928 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8929
8930 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8931 sds->props[SP_NEXT_MME] * i +
8932 j * sizeof(u32));
8933
8934 rc = gaudi_add_sync_to_engine_map_entry(
8935 map, reg_value, ENGINE_MME,
8936 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8937 if (rc)
8938 goto free_sync_to_engine_map;
8939 }
8940 }
8941
8942 /* Iterate over DMA engines */
8943 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8944 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8945 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8946 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8947 ENGINE_DMA, i);
8948 if (rc)
8949 goto free_sync_to_engine_map;
8950 }
8951
8952 return 0;
8953
8954 free_sync_to_engine_map:
8955 hl_state_dump_free_sync_to_engine_map(map);
8956
8957 return rc;
8958 }
8959
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8960 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8961 {
8962 return FIELD_GET(
8963 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8964 mon->status);
8965 }
8966
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8967 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8968 {
8969 const size_t max_write = 10;
8970 u32 gid, mask, sob;
8971 int i, offset;
8972
8973 /* Sync object ID is calculated as follows:
8974 * (8 * group_id + cleared bits in mask)
8975 */
8976 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8977 mon->arm_data);
8978 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8979 mon->arm_data);
8980
8981 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8982 max_write; mask >>= 1, i++) {
8983 if (!(mask & 1)) {
8984 sob = gid * MONITOR_MAX_SOBS + i;
8985
8986 if (offset > 0)
8987 offset += snprintf(sobs + offset, max_write,
8988 ", ");
8989
8990 offset += snprintf(sobs + offset, max_write, "%u", sob);
8991 }
8992 }
8993 }
8994
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8995 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8996 struct hl_device *hdev,
8997 struct hl_mon_state_dump *mon)
8998 {
8999 const char *name;
9000 char scratch_buf1[BIN_REG_STRING_SIZE],
9001 scratch_buf2[BIN_REG_STRING_SIZE];
9002 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9003
9004 name = hl_state_dump_get_monitor_name(hdev, mon);
9005 if (!name)
9006 name = "";
9007
9008 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9009
9010 return hl_snprintf_resize(
9011 buf, size, offset,
9012 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9013 mon->id, name,
9014 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9015 mon->arm_data),
9016 hl_format_as_binary(
9017 scratch_buf1, sizeof(scratch_buf1),
9018 FIELD_GET(
9019 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9020 mon->arm_data)),
9021 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9022 mon->arm_data),
9023 mon->wr_data,
9024 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9025 hl_format_as_binary(
9026 scratch_buf2, sizeof(scratch_buf2),
9027 FIELD_GET(
9028 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9029 mon->status)),
9030 monitored_sobs);
9031 }
9032
9033
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)9034 static int gaudi_print_fences_single_engine(
9035 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9036 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9037 size_t *size, size_t *offset)
9038 {
9039 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9040 int rc = -ENOMEM, i;
9041 u32 *statuses, *fences;
9042
9043 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9044 sizeof(*statuses), GFP_KERNEL);
9045 if (!statuses)
9046 goto out;
9047
9048 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9049 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9050 sizeof(*fences), GFP_KERNEL);
9051 if (!fences)
9052 goto free_status;
9053
9054 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9055 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9056
9057 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9058 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9059 fences[i] = RREG32(base_offset + i * sizeof(u32));
9060
9061 /* The actual print */
9062 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9063 u32 fence_id;
9064 u64 fence_cnt, fence_rdata;
9065 const char *engine_name;
9066
9067 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9068 statuses[i]))
9069 continue;
9070
9071 fence_id =
9072 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9073 fence_cnt = base_offset + CFG_BASE +
9074 sizeof(u32) *
9075 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9076 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9077 sds->props[SP_FENCE0_RDATA_OFFSET];
9078 engine_name = hl_sync_engine_to_string(engine_type);
9079
9080 rc = hl_snprintf_resize(
9081 buf, size, offset,
9082 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9083 engine_name, engine_id,
9084 i, fence_id,
9085 fence_cnt, engine_name, engine_id, fence_id, i,
9086 fence_rdata, engine_name, engine_id, fence_id, i,
9087 fences[fence_id],
9088 statuses[i]);
9089 if (rc)
9090 goto free_fences;
9091 }
9092
9093 rc = 0;
9094
9095 free_fences:
9096 kfree(fences);
9097 free_status:
9098 kfree(statuses);
9099 out:
9100 return rc;
9101 }
9102
9103
9104 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9105 .monitor_valid = gaudi_monitor_valid,
9106 .print_single_monitor = gaudi_print_single_monitor,
9107 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9108 .print_fences_single_engine = gaudi_print_fences_single_engine,
9109 };
9110
gaudi_state_dump_init(struct hl_device * hdev)9111 static void gaudi_state_dump_init(struct hl_device *hdev)
9112 {
9113 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9114 int i;
9115
9116 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9117 hash_add(sds->so_id_to_str_tb,
9118 &gaudi_so_id_to_str[i].node,
9119 gaudi_so_id_to_str[i].id);
9120
9121 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9122 hash_add(sds->monitor_id_to_str_tb,
9123 &gaudi_monitor_id_to_str[i].node,
9124 gaudi_monitor_id_to_str[i].id);
9125
9126 sds->props = gaudi_state_dump_specs_props;
9127
9128 sds->sync_namager_names = gaudi_sync_manager_names;
9129
9130 sds->funcs = gaudi_state_dump_funcs;
9131 }
9132
gaudi_get_stream_master_qid_arr(void)9133 static u32 *gaudi_get_stream_master_qid_arr(void)
9134 {
9135 return gaudi_stream_master;
9136 }
9137
gaudi_check_if_razwi_happened(struct hl_device * hdev)9138 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9139 {
9140 }
9141
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9142 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9143 {
9144 struct hl_device *hdev = dev_get_drvdata(dev);
9145 struct cpucp_info *cpucp_info;
9146
9147 cpucp_info = &hdev->asic_prop.cpucp_info;
9148
9149 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9150 }
9151
9152 static DEVICE_ATTR_RO(infineon_ver);
9153
9154 static struct attribute *gaudi_vrm_dev_attrs[] = {
9155 &dev_attr_infineon_ver.attr,
9156 NULL,
9157 };
9158
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9159 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9160 struct attribute_group *dev_vrm_attr_grp)
9161 {
9162 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9163 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9164 }
9165
gaudi_send_device_activity(struct hl_device * hdev,bool open)9166 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9167 {
9168 return 0;
9169 }
9170
9171 static const struct hl_asic_funcs gaudi_funcs = {
9172 .early_init = gaudi_early_init,
9173 .early_fini = gaudi_early_fini,
9174 .late_init = gaudi_late_init,
9175 .late_fini = gaudi_late_fini,
9176 .sw_init = gaudi_sw_init,
9177 .sw_fini = gaudi_sw_fini,
9178 .hw_init = gaudi_hw_init,
9179 .hw_fini = gaudi_hw_fini,
9180 .halt_engines = gaudi_halt_engines,
9181 .suspend = gaudi_suspend,
9182 .resume = gaudi_resume,
9183 .mmap = gaudi_mmap,
9184 .ring_doorbell = gaudi_ring_doorbell,
9185 .pqe_write = gaudi_pqe_write,
9186 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9187 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9188 .scrub_device_mem = gaudi_scrub_device_mem,
9189 .scrub_device_dram = gaudi_scrub_device_dram,
9190 .get_int_queue_base = gaudi_get_int_queue_base,
9191 .test_queues = gaudi_test_queues,
9192 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9193 .asic_dma_pool_free = gaudi_dma_pool_free,
9194 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9195 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9196 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9197 .cs_parser = gaudi_cs_parser,
9198 .asic_dma_map_sgtable = hl_dma_map_sgtable,
9199 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9200 .update_eq_ci = gaudi_update_eq_ci,
9201 .context_switch = gaudi_context_switch,
9202 .restore_phase_topology = gaudi_restore_phase_topology,
9203 .debugfs_read_dma = gaudi_debugfs_read_dma,
9204 .add_device_attr = gaudi_add_device_attr,
9205 .handle_eqe = gaudi_handle_eqe,
9206 .get_events_stat = gaudi_get_events_stat,
9207 .read_pte = gaudi_read_pte,
9208 .write_pte = gaudi_write_pte,
9209 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9210 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9211 .mmu_prefetch_cache_range = NULL,
9212 .send_heartbeat = gaudi_send_heartbeat,
9213 .debug_coresight = gaudi_debug_coresight,
9214 .is_device_idle = gaudi_is_device_idle,
9215 .compute_reset_late_init = gaudi_compute_reset_late_init,
9216 .hw_queues_lock = gaudi_hw_queues_lock,
9217 .hw_queues_unlock = gaudi_hw_queues_unlock,
9218 .get_pci_id = gaudi_get_pci_id,
9219 .get_eeprom_data = gaudi_get_eeprom_data,
9220 .get_monitor_dump = gaudi_get_monitor_dump,
9221 .send_cpu_message = gaudi_send_cpu_message,
9222 .pci_bars_map = gaudi_pci_bars_map,
9223 .init_iatu = gaudi_init_iatu,
9224 .rreg = hl_rreg,
9225 .wreg = hl_wreg,
9226 .halt_coresight = gaudi_halt_coresight,
9227 .ctx_init = gaudi_ctx_init,
9228 .ctx_fini = gaudi_ctx_fini,
9229 .pre_schedule_cs = gaudi_pre_schedule_cs,
9230 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9231 .load_firmware_to_device = gaudi_load_firmware_to_device,
9232 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9233 .get_signal_cb_size = gaudi_get_signal_cb_size,
9234 .get_wait_cb_size = gaudi_get_wait_cb_size,
9235 .gen_signal_cb = gaudi_gen_signal_cb,
9236 .gen_wait_cb = gaudi_gen_wait_cb,
9237 .reset_sob = gaudi_reset_sob,
9238 .reset_sob_group = gaudi_reset_sob_group,
9239 .get_device_time = gaudi_get_device_time,
9240 .pb_print_security_errors = NULL,
9241 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9242 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9243 .get_dec_base_addr = NULL,
9244 .scramble_addr = hl_mmu_scramble_addr,
9245 .descramble_addr = hl_mmu_descramble_addr,
9246 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9247 .get_hw_block_id = gaudi_get_hw_block_id,
9248 .hw_block_mmap = gaudi_block_mmap,
9249 .enable_events_from_fw = gaudi_enable_events_from_fw,
9250 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9251 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9252 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9253 .init_firmware_loader = gaudi_init_firmware_loader,
9254 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9255 .state_dump_init = gaudi_state_dump_init,
9256 .get_sob_addr = gaudi_get_sob_addr,
9257 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9258 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9259 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9260 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9261 .access_dev_mem = hl_access_dev_mem,
9262 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9263 .send_device_activity = gaudi_send_device_activity,
9264 };
9265
9266 /**
9267 * gaudi_set_asic_funcs - set GAUDI function pointers
9268 *
9269 * @hdev: pointer to hl_device structure
9270 *
9271 */
gaudi_set_asic_funcs(struct hl_device * hdev)9272 void gaudi_set_asic_funcs(struct hl_device *hdev)
9273 {
9274 hdev->asic_funcs = &gaudi_funcs;
9275 }
9276