1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 		"gaudi cpu eq"
122 };
123 
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134 
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
137 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
138 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
139 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
140 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
141 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
142 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
143 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145 
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
148 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
149 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
150 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
151 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
152 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
153 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
154 	[PACKET_FENCE]		= sizeof(struct packet_fence),
155 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
156 	[PACKET_NOP]		= sizeof(struct packet_nop),
157 	[PACKET_STOP]		= sizeof(struct packet_stop),
158 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
159 	[PACKET_WAIT]		= sizeof(struct packet_wait),
160 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
161 };
162 
validate_packet_id(enum packet_id id)163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 	switch (id) {
166 	case PACKET_WREG_32:
167 	case PACKET_WREG_BULK:
168 	case PACKET_MSG_LONG:
169 	case PACKET_MSG_SHORT:
170 	case PACKET_CP_DMA:
171 	case PACKET_REPEAT:
172 	case PACKET_MSG_PROT:
173 	case PACKET_FENCE:
174 	case PACKET_LIN_DMA:
175 	case PACKET_NOP:
176 	case PACKET_STOP:
177 	case PACKET_ARB_POINT:
178 	case PACKET_WAIT:
179 	case PACKET_LOAD_AND_EXE:
180 		return true;
181 	default:
182 		return false;
183 	}
184 }
185 
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 	"tpc_address_exceed_slm",
189 	"tpc_div_by_0",
190 	"tpc_spu_mac_overflow",
191 	"tpc_spu_addsub_overflow",
192 	"tpc_spu_abs_overflow",
193 	"tpc_spu_fp_dst_nan_inf",
194 	"tpc_spu_fp_dst_denorm",
195 	"tpc_vpu_mac_overflow",
196 	"tpc_vpu_addsub_overflow",
197 	"tpc_vpu_abs_overflow",
198 	"tpc_vpu_fp_dst_nan_inf",
199 	"tpc_vpu_fp_dst_denorm",
200 	"tpc_assertions",
201 	"tpc_illegal_instruction",
202 	"tpc_pc_wrap_around",
203 	"tpc_qm_sw_err",
204 	"tpc_hbw_rresp_err",
205 	"tpc_hbw_bresp_err",
206 	"tpc_lbw_rresp_err",
207 	"tpc_lbw_bresp_err"
208 };
209 
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 	"PQ AXI HBW error",
213 	"CQ AXI HBW error",
214 	"CP AXI HBW error",
215 	"CP error due to undefined OPCODE",
216 	"CP encountered STOP OPCODE",
217 	"CP AXI LBW error",
218 	"CP WRREG32 or WRBULK returned error",
219 	"N/A",
220 	"FENCE 0 inc over max value and clipped",
221 	"FENCE 1 inc over max value and clipped",
222 	"FENCE 2 inc over max value and clipped",
223 	"FENCE 3 inc over max value and clipped",
224 	"FENCE 0 dec under min value and clipped",
225 	"FENCE 1 dec under min value and clipped",
226 	"FENCE 2 dec under min value and clipped",
227 	"FENCE 3 dec under min value and clipped"
228 };
229 
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 	"Choice push while full error",
233 	"Choice Q watchdog error",
234 	"MSG AXI LBW returned with error"
235 };
236 
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352 
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382 
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396 
397 static s64 gaudi_state_dump_specs_props[] = {
398 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 	[SP_MON_OBJ_WR_ADDR_LOW] =
402 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 	[SP_MON_OBJ_WR_ADDR_HIGH] =
404 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 	[SP_FENCE0_CNT_OFFSET] =
426 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_FENCE0_RDATA_OFFSET] =
428 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 	[SP_NUM_CORES] = 1,
431 };
432 
433 static const int gaudi_queue_id_to_engine_id[] = {
434 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464 
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469 	"SYNC_MGR_E_N",
470 	"SYNC_MGR_W_N",
471 	"SYNC_MGR_E_S",
472 	"SYNC_MGR_W_S",
473 	NULL
474 };
475 
476 struct ecc_info_extract_params {
477 	u64 block_address;
478 	u32 num_memories;
479 	bool derr;
480 };
481 
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 								u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 					struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 					u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 					u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 				u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 				u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 				struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 		return HL_COLLECTIVE_MASTER;
505 
506 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 		return HL_COLLECTIVE_SLAVE;
509 
510 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 		return HL_COLLECTIVE_SLAVE;
513 
514 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 		return HL_COLLECTIVE_SLAVE;
517 
518 	return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520 
set_default_power_values(struct hl_device * hdev)521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 	struct asic_fixed_properties *prop = &hdev->asic_prop;
524 
525 	if (hdev->card_type == cpucp_card_type_pmc) {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527 
528 		if (prop->fw_security_enabled)
529 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 		else
531 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 	} else {
533 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 	}
536 }
537 
gaudi_set_fixed_properties(struct hl_device * hdev)538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 	struct asic_fixed_properties *prop = &hdev->asic_prop;
541 	u32 num_sync_stream_queues = 0;
542 	int i;
543 
544 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 	prop->hw_queues_props = kcalloc(prop->max_queues,
546 			sizeof(struct hw_queue_properties),
547 			GFP_KERNEL);
548 
549 	if (!prop->hw_queues_props)
550 		return -ENOMEM;
551 
552 	for (i = 0 ; i < prop->max_queues ; i++) {
553 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 			prop->hw_queues_props[i].driver_only = 0;
556 			prop->hw_queues_props[i].supports_sync_stream = 1;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 			num_sync_stream_queues++;
560 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 			prop->hw_queues_props[i].driver_only = 1;
563 			prop->hw_queues_props[i].supports_sync_stream = 0;
564 			prop->hw_queues_props[i].cb_alloc_flags =
565 				CB_ALLOC_KERNEL;
566 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 			prop->hw_queues_props[i].driver_only = 0;
569 			prop->hw_queues_props[i].supports_sync_stream = 0;
570 			prop->hw_queues_props[i].cb_alloc_flags =
571 				CB_ALLOC_USER;
572 
573 		}
574 		prop->hw_queues_props[i].collective_mode =
575 						get_collective_mode(hdev, i);
576 	}
577 
578 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 	prop->cfg_base_address = CFG_BASE;
580 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 	prop->host_base_address = HOST_PHYS_BASE;
582 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 	prop->collective_first_sob = 0;
586 	prop->collective_first_mon = 0;
587 
588 	/* 2 SOBs per internal queue stream are reserved for collective */
589 	prop->sync_stream_first_sob =
590 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 			* QMAN_STREAMS * HL_RSVD_SOBS;
592 
593 	/* 1 monitor per internal queue stream are reserved for collective
594 	 * 2 monitors per external queue stream are reserved for collective
595 	 */
596 	prop->sync_stream_first_mon =
597 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 			(NUMBER_OF_EXT_HW_QUEUES * 2);
599 
600 	prop->dram_base_address = DRAM_PHYS_BASE;
601 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604 
605 	prop->sram_base_address = SRAM_BASE_ADDR;
606 	prop->sram_size = SRAM_SIZE;
607 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 	prop->sram_user_base_address =
609 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610 
611 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613 
614 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 	if (hdev->pldm)
616 		prop->mmu_pgt_size = 0x800000; /* 8MB */
617 	else
618 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 	prop->mmu_pte_size = HL_PTE_SIZE;
620 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 	prop->dram_page_size = PAGE_SIZE_2MB;
623 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 	prop->dram_supports_virtual_memory = false;
625 
626 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 	prop->pmmu.end_addr =
638 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 	prop->pmmu.page_size = PAGE_SIZE_4KB;
640 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 	prop->pmmu.last_mask = LAST_MASK;
642 	/* TODO: will be duplicated until implementing per-MMU props */
643 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645 
646 	/* PMMU and HPMMU are the same except of page size */
647 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649 
650 	/* shifts and masks are the same in PMMU and DMMU */
651 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 	prop->dmmu.page_size = PAGE_SIZE_2MB;
655 
656 	prop->cfg_size = CFG_SIZE;
657 	prop->max_asid = MAX_ASID;
658 	prop->num_of_events = GAUDI_EVENT_SIZE;
659 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660 
661 	set_default_power_values(hdev);
662 
663 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665 
666 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668 
669 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670 					CARD_NAME_MAX_LEN);
671 
672 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673 
674 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_sob +
676 			(num_sync_stream_queues * HL_RSVD_SOBS);
677 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678 			prop->sync_stream_first_mon +
679 			(num_sync_stream_queues * HL_RSVD_MONS);
680 
681 	prop->first_available_user_interrupt = USHRT_MAX;
682 
683 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 		prop->first_available_cq[i] = USHRT_MAX;
685 
686 	prop->fw_cpu_boot_dev_sts0_valid = false;
687 	prop->fw_cpu_boot_dev_sts1_valid = false;
688 	prop->hard_reset_done_by_fw = false;
689 	prop->gic_interrupts_enable = true;
690 
691 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692 
693 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695 
696 	prop->use_get_power_for_reset_history = true;
697 
698 	prop->configurable_stop_on_err = true;
699 
700 	prop->set_max_power_on_device_init = true;
701 
702 	prop->dma_mask = 48;
703 
704 	return 0;
705 }
706 
gaudi_pci_bars_map(struct hl_device * hdev)707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 	bool is_wc[3] = {false, false, true};
711 	int rc;
712 
713 	rc = hl_pci_bars_map(hdev, name, is_wc);
714 	if (rc)
715 		return rc;
716 
717 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
719 
720 	return 0;
721 }
722 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725 	struct gaudi_device *gaudi = hdev->asic_specific;
726 	struct hl_inbound_pci_region pci_region;
727 	u64 old_addr = addr;
728 	int rc;
729 
730 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731 		return old_addr;
732 
733 	if (hdev->asic_prop.iatu_done_by_fw)
734 		return U64_MAX;
735 
736 	/* Inbound Region 2 - Bar 4 - Point to HBM */
737 	pci_region.mode = PCI_BAR_MATCH_MODE;
738 	pci_region.bar = HBM_BAR_ID;
739 	pci_region.addr = addr;
740 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741 	if (rc)
742 		return U64_MAX;
743 
744 	if (gaudi) {
745 		old_addr = gaudi->hbm_bar_cur_addr;
746 		gaudi->hbm_bar_cur_addr = addr;
747 	}
748 
749 	return old_addr;
750 }
751 
gaudi_init_iatu(struct hl_device * hdev)752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754 	struct hl_inbound_pci_region inbound_region;
755 	struct hl_outbound_pci_region outbound_region;
756 	int rc;
757 
758 	if (hdev->asic_prop.iatu_done_by_fw)
759 		return 0;
760 
761 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762 	inbound_region.mode = PCI_BAR_MATCH_MODE;
763 	inbound_region.bar = SRAM_BAR_ID;
764 	inbound_region.addr = SRAM_BASE_ADDR;
765 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766 	if (rc)
767 		goto done;
768 
769 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770 	inbound_region.mode = PCI_BAR_MATCH_MODE;
771 	inbound_region.bar = CFG_BAR_ID;
772 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
773 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774 	if (rc)
775 		goto done;
776 
777 	/* Inbound Region 2 - Bar 4 - Point to HBM */
778 	inbound_region.mode = PCI_BAR_MATCH_MODE;
779 	inbound_region.bar = HBM_BAR_ID;
780 	inbound_region.addr = DRAM_PHYS_BASE;
781 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782 	if (rc)
783 		goto done;
784 
785 	/* Outbound Region 0 - Point to Host */
786 	outbound_region.addr = HOST_PHYS_BASE;
787 	outbound_region.size = HOST_PHYS_SIZE;
788 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789 
790 done:
791 	return rc;
792 }
793 
gaudi_get_hw_state(struct hl_device * hdev)794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796 	return RREG32(mmHW_STATE);
797 }
798 
gaudi_early_init(struct hl_device * hdev)799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801 	struct asic_fixed_properties *prop = &hdev->asic_prop;
802 	struct pci_dev *pdev = hdev->pdev;
803 	resource_size_t pci_bar_size;
804 	u32 fw_boot_status;
805 	int rc;
806 
807 	rc = gaudi_set_fixed_properties(hdev);
808 	if (rc) {
809 		dev_err(hdev->dev, "Failed setting fixed properties\n");
810 		return rc;
811 	}
812 
813 	/* Check BAR sizes */
814 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815 
816 	if (pci_bar_size != SRAM_BAR_SIZE) {
817 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819 		rc = -ENODEV;
820 		goto free_queue_props;
821 	}
822 
823 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824 
825 	if (pci_bar_size != CFG_BAR_SIZE) {
826 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828 		rc = -ENODEV;
829 		goto free_queue_props;
830 	}
831 
832 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834 
835 	/* If FW security is enabled at this point it means no access to ELBI */
836 	if (hdev->asic_prop.fw_security_enabled) {
837 		hdev->asic_prop.iatu_done_by_fw = true;
838 
839 		/*
840 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841 		 * decision can only be taken based on PCI ID security.
842 		 */
843 		hdev->asic_prop.gic_interrupts_enable = false;
844 		goto pci_init;
845 	}
846 
847 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848 				&fw_boot_status);
849 	if (rc)
850 		goto free_queue_props;
851 
852 	/* Check whether FW is configuring iATU */
853 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855 		hdev->asic_prop.iatu_done_by_fw = true;
856 
857 pci_init:
858 	rc = hl_pci_init(hdev);
859 	if (rc)
860 		goto free_queue_props;
861 
862 	/* Before continuing in the initialization, we need to read the preboot
863 	 * version to determine whether we run with a security-enabled firmware
864 	 */
865 	rc = hl_fw_read_preboot_status(hdev);
866 	if (rc) {
867 		if (hdev->reset_on_preboot_fail)
868 			hdev->asic_funcs->hw_fini(hdev, true, false);
869 		goto pci_fini;
870 	}
871 
872 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874 		hdev->asic_funcs->hw_fini(hdev, true, false);
875 	}
876 
877 	return 0;
878 
879 pci_fini:
880 	hl_pci_fini(hdev);
881 free_queue_props:
882 	kfree(hdev->asic_prop.hw_queues_props);
883 	return rc;
884 }
885 
gaudi_early_fini(struct hl_device * hdev)886 static int gaudi_early_fini(struct hl_device *hdev)
887 {
888 	kfree(hdev->asic_prop.hw_queues_props);
889 	hl_pci_fini(hdev);
890 
891 	return 0;
892 }
893 
894 /**
895  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
896  *
897  * @hdev: pointer to hl_device structure
898  *
899  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
901 {
902 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
903 	struct asic_fixed_properties *prop = &hdev->asic_prop;
904 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
905 	int rc;
906 
907 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
908 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
909 		struct gaudi_device *gaudi = hdev->asic_specific;
910 
911 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
912 			return 0;
913 
914 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
915 
916 		if (rc)
917 			return rc;
918 
919 		freq = pll_freq_arr[2];
920 	} else {
921 		/* Backward compatibility */
922 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
923 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
924 		nr = RREG32(mmPSOC_CPU_PLL_NR);
925 		nf = RREG32(mmPSOC_CPU_PLL_NF);
926 		od = RREG32(mmPSOC_CPU_PLL_OD);
927 
928 		if (div_sel == DIV_SEL_REF_CLK ||
929 				div_sel == DIV_SEL_DIVIDED_REF) {
930 			if (div_sel == DIV_SEL_REF_CLK)
931 				freq = PLL_REF_CLK;
932 			else
933 				freq = PLL_REF_CLK / (div_fctr + 1);
934 		} else if (div_sel == DIV_SEL_PLL_CLK ||
935 			div_sel == DIV_SEL_DIVIDED_PLL) {
936 			pll_clk = PLL_REF_CLK * (nf + 1) /
937 					((nr + 1) * (od + 1));
938 			if (div_sel == DIV_SEL_PLL_CLK)
939 				freq = pll_clk;
940 			else
941 				freq = pll_clk / (div_fctr + 1);
942 		} else {
943 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
944 			freq = 0;
945 		}
946 	}
947 
948 	prop->psoc_timestamp_frequency = freq;
949 	prop->psoc_pci_pll_nr = nr;
950 	prop->psoc_pci_pll_nf = nf;
951 	prop->psoc_pci_pll_od = od;
952 	prop->psoc_pci_pll_div_factor = div_fctr;
953 
954 	return 0;
955 }
956 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)957 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
958 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
959 {
960 	struct asic_fixed_properties *prop = &hdev->asic_prop;
961 	struct packet_lin_dma *init_tpc_mem_pkt;
962 	struct hl_cs_job *job;
963 	struct hl_cb *cb;
964 	u64 dst_addr;
965 	u32 cb_size, ctl;
966 	u8 tpc_id;
967 	int rc;
968 
969 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
970 	if (!cb)
971 		return -EFAULT;
972 
973 	init_tpc_mem_pkt = cb->kernel_address;
974 	cb_size = sizeof(*init_tpc_mem_pkt);
975 	memset(init_tpc_mem_pkt, 0, cb_size);
976 
977 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
978 
979 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
980 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
981 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
982 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
983 
984 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
985 
986 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
987 
988 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
989 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
990 				round_up(prop->sram_user_base_address, SZ_8K));
991 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
992 
993 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
994 	if (!job) {
995 		dev_err(hdev->dev, "Failed to allocate a new job\n");
996 		rc = -ENOMEM;
997 		goto release_cb;
998 	}
999 
1000 	job->id = 0;
1001 	job->user_cb = cb;
1002 	atomic_inc(&job->user_cb->cs_cnt);
1003 	job->user_cb_size = cb_size;
1004 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1005 	job->patched_cb = job->user_cb;
1006 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1007 
1008 	hl_debugfs_add_job(hdev, job);
1009 
1010 	rc = gaudi_send_job_on_qman0(hdev, job);
1011 
1012 	if (rc)
1013 		goto free_job;
1014 
1015 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1016 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1017 		if (rc)
1018 			break;
1019 	}
1020 
1021 free_job:
1022 	hl_userptr_delete_list(hdev, &job->userptr_list);
1023 	hl_debugfs_remove_job(hdev, job);
1024 	kfree(job);
1025 	atomic_dec(&cb->cs_cnt);
1026 
1027 release_cb:
1028 	hl_cb_put(cb);
1029 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1030 
1031 	return rc;
1032 }
1033 
1034 /*
1035  * gaudi_init_tpc_mem() - Initialize TPC memories.
1036  * @hdev: Pointer to hl_device structure.
1037  *
1038  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1039  *
1040  * Return: 0 for success, negative value for error.
1041  */
gaudi_init_tpc_mem(struct hl_device * hdev)1042 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1043 {
1044 	const struct firmware *fw;
1045 	size_t fw_size;
1046 	void *cpu_addr;
1047 	dma_addr_t dma_handle;
1048 	int rc, count = 5;
1049 
1050 again:
1051 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1052 	if (rc == -EINTR && count-- > 0) {
1053 		msleep(50);
1054 		goto again;
1055 	}
1056 
1057 	if (rc) {
1058 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1059 				GAUDI_TPC_FW_FILE);
1060 		goto out;
1061 	}
1062 
1063 	fw_size = fw->size;
1064 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1065 	if (!cpu_addr) {
1066 		dev_err(hdev->dev,
1067 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1068 			fw_size);
1069 		rc = -ENOMEM;
1070 		goto out;
1071 	}
1072 
1073 	memcpy(cpu_addr, fw->data, fw_size);
1074 
1075 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1076 
1077 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1078 
1079 out:
1080 	release_firmware(fw);
1081 	return rc;
1082 }
1083 
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1084 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1085 {
1086 	struct gaudi_device *gaudi = hdev->asic_specific;
1087 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1088 	struct hl_hw_queue *q;
1089 	u32 i, sob_id, sob_group_id, queue_id;
1090 
1091 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1092 	sob_group_id =
1093 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1094 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1095 
1096 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1097 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1098 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1099 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1100 	}
1101 
1102 	/* Both DMA5 and TPC7 use the same resources since only a single
1103 	 * engine need to participate in the reduction process
1104 	 */
1105 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1106 	q = &hdev->kernel_queues[queue_id];
1107 	q->sync_stream_prop.collective_sob_id =
1108 			sob_id + NIC_NUMBER_OF_ENGINES;
1109 
1110 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1111 	q = &hdev->kernel_queues[queue_id];
1112 	q->sync_stream_prop.collective_sob_id =
1113 			sob_id + NIC_NUMBER_OF_ENGINES;
1114 }
1115 
gaudi_sob_group_hw_reset(struct kref * ref)1116 static void gaudi_sob_group_hw_reset(struct kref *ref)
1117 {
1118 	struct gaudi_hw_sob_group *hw_sob_group =
1119 		container_of(ref, struct gaudi_hw_sob_group, kref);
1120 	struct hl_device *hdev = hw_sob_group->hdev;
1121 	int i;
1122 
1123 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1124 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1125 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1126 
1127 	kref_init(&hw_sob_group->kref);
1128 }
1129 
gaudi_sob_group_reset_error(struct kref * ref)1130 static void gaudi_sob_group_reset_error(struct kref *ref)
1131 {
1132 	struct gaudi_hw_sob_group *hw_sob_group =
1133 		container_of(ref, struct gaudi_hw_sob_group, kref);
1134 	struct hl_device *hdev = hw_sob_group->hdev;
1135 
1136 	dev_crit(hdev->dev,
1137 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1138 		hw_sob_group->base_sob_id);
1139 }
1140 
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1141 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1142 {
1143 	struct gaudi_collective_properties *prop;
1144 	int i;
1145 
1146 	prop = &gaudi->collective_props;
1147 
1148 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1149 
1150 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1151 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1152 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1153 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1154 	/* Set collective engine bit */
1155 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1156 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1157 }
1158 
gaudi_collective_init(struct hl_device * hdev)1159 static int gaudi_collective_init(struct hl_device *hdev)
1160 {
1161 	u32 i, sob_id, reserved_sobs_per_group;
1162 	struct gaudi_collective_properties *prop;
1163 	struct gaudi_device *gaudi;
1164 
1165 	gaudi = hdev->asic_specific;
1166 	prop = &gaudi->collective_props;
1167 	sob_id = hdev->asic_prop.collective_first_sob;
1168 
1169 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1170 	reserved_sobs_per_group =
1171 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1172 
1173 	/* Init SOB groups */
1174 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1175 		prop->hw_sob_group[i].hdev = hdev;
1176 		prop->hw_sob_group[i].base_sob_id = sob_id;
1177 		sob_id += reserved_sobs_per_group;
1178 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1179 	}
1180 
1181 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1182 		prop->next_sob_group_val[i] = 1;
1183 		prop->curr_sob_group_idx[i] = 0;
1184 		gaudi_collective_map_sobs(hdev, i);
1185 	}
1186 
1187 	gaudi_collective_mstr_sob_mask_set(gaudi);
1188 
1189 	return 0;
1190 }
1191 
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1192 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1193 {
1194 	struct gaudi_device *gaudi = hdev->asic_specific;
1195 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1196 
1197 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1198 					gaudi_sob_group_hw_reset);
1199 }
1200 
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1201 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1202 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1203 {
1204 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1205 	struct gaudi_collective_properties *cprop;
1206 	struct hl_gen_wait_properties wait_prop;
1207 	struct hl_sync_stream_properties *prop;
1208 	struct gaudi_device *gaudi;
1209 
1210 	gaudi = hdev->asic_specific;
1211 	cprop = &gaudi->collective_props;
1212 	queue_id = job->hw_queue_id;
1213 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1214 
1215 	master_sob_base =
1216 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1217 	master_monitor = prop->collective_mstr_mon_id[0];
1218 
1219 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1220 
1221 	dev_dbg(hdev->dev,
1222 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1223 		master_sob_base, cprop->mstr_sob_mask[0],
1224 		cprop->next_sob_group_val[stream],
1225 		master_monitor, queue_id);
1226 
1227 	wait_prop.data = (void *) job->patched_cb;
1228 	wait_prop.sob_base = master_sob_base;
1229 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1230 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1231 	wait_prop.mon_id = master_monitor;
1232 	wait_prop.q_idx = queue_id;
1233 	wait_prop.size = cb_size;
1234 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1235 
1236 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1237 	master_monitor = prop->collective_mstr_mon_id[1];
1238 
1239 	dev_dbg(hdev->dev,
1240 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1241 		master_sob_base, cprop->mstr_sob_mask[1],
1242 		cprop->next_sob_group_val[stream],
1243 		master_monitor, queue_id);
1244 
1245 	wait_prop.sob_base = master_sob_base;
1246 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1247 	wait_prop.mon_id = master_monitor;
1248 	wait_prop.size = cb_size;
1249 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1250 }
1251 
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1252 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1253 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1254 {
1255 	struct hl_gen_wait_properties wait_prop;
1256 	struct hl_sync_stream_properties *prop;
1257 	u32 queue_id, cb_size = 0;
1258 
1259 	queue_id = job->hw_queue_id;
1260 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1261 
1262 	if (job->cs->encaps_signals) {
1263 		/* use the encaps signal handle store earlier in the flow
1264 		 * and set the SOB information from the encaps
1265 		 * signals handle
1266 		 */
1267 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1268 						cs_cmpl);
1269 
1270 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1271 				job->cs->sequence,
1272 				cs_cmpl->hw_sob->sob_id,
1273 				cs_cmpl->sob_val);
1274 	}
1275 
1276 	/* Add to wait CBs using slave monitor */
1277 	wait_prop.data = (void *) job->user_cb;
1278 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1279 	wait_prop.sob_mask = 0x1;
1280 	wait_prop.sob_val = cs_cmpl->sob_val;
1281 	wait_prop.mon_id = prop->collective_slave_mon_id;
1282 	wait_prop.q_idx = queue_id;
1283 	wait_prop.size = cb_size;
1284 
1285 	dev_dbg(hdev->dev,
1286 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1287 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1288 		prop->collective_slave_mon_id, queue_id);
1289 
1290 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1291 
1292 	dev_dbg(hdev->dev,
1293 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1294 		prop->collective_sob_id, queue_id);
1295 
1296 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1297 			prop->collective_sob_id, cb_size, false);
1298 }
1299 
gaudi_collective_wait_init_cs(struct hl_cs * cs)1300 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1301 {
1302 	struct hl_cs_compl *signal_cs_cmpl =
1303 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1304 	struct hl_cs_compl *cs_cmpl =
1305 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1306 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1307 	struct gaudi_collective_properties *cprop;
1308 	u32 stream, queue_id, sob_group_offset;
1309 	struct gaudi_device *gaudi;
1310 	struct hl_device *hdev;
1311 	struct hl_cs_job *job;
1312 	struct hl_ctx *ctx;
1313 
1314 	ctx = cs->ctx;
1315 	hdev = ctx->hdev;
1316 	gaudi = hdev->asic_specific;
1317 	cprop = &gaudi->collective_props;
1318 
1319 	if (cs->encaps_signals) {
1320 		cs_cmpl->hw_sob = handle->hw_sob;
1321 		/* at this checkpoint we only need the hw_sob pointer
1322 		 * for the completion check before start going over the jobs
1323 		 * of the master/slaves, the sob_value will be taken later on
1324 		 * in gaudi_collective_slave_init_job depends on each
1325 		 * job wait offset value.
1326 		 */
1327 		cs_cmpl->sob_val = 0;
1328 	} else {
1329 		/* copy the SOB id and value of the signal CS */
1330 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1331 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1332 	}
1333 
1334 	/* check again if the signal cs already completed.
1335 	 * if yes then don't send any wait cs since the hw_sob
1336 	 * could be in reset already. if signal is not completed
1337 	 * then get refcount to hw_sob to prevent resetting the sob
1338 	 * while wait cs is not submitted.
1339 	 * note that this check is protected by two locks,
1340 	 * hw queue lock and completion object lock,
1341 	 * and the same completion object lock also protects
1342 	 * the hw_sob reset handler function.
1343 	 * The hw_queue lock prevent out of sync of hw_sob
1344 	 * refcount value, changed by signal/wait flows.
1345 	 */
1346 	spin_lock(&signal_cs_cmpl->lock);
1347 
1348 	if (completion_done(&cs->signal_fence->completion)) {
1349 		spin_unlock(&signal_cs_cmpl->lock);
1350 		return -EINVAL;
1351 	}
1352 	/* Increment kref since all slave queues are now waiting on it */
1353 	kref_get(&cs_cmpl->hw_sob->kref);
1354 
1355 	spin_unlock(&signal_cs_cmpl->lock);
1356 
1357 	/* Calculate the stream from collective master queue (1st job) */
1358 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1359 	stream = job->hw_queue_id % 4;
1360 	sob_group_offset =
1361 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1362 
1363 	list_for_each_entry(job, &cs->job_list, cs_node) {
1364 		queue_id = job->hw_queue_id;
1365 
1366 		if (hdev->kernel_queues[queue_id].collective_mode ==
1367 				HL_COLLECTIVE_MASTER)
1368 			gaudi_collective_master_init_job(hdev, job, stream,
1369 						sob_group_offset);
1370 		else
1371 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1372 	}
1373 
1374 	cs_cmpl->sob_group = sob_group_offset;
1375 
1376 	/* Handle sob group kref and wraparound */
1377 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1378 	cprop->next_sob_group_val[stream]++;
1379 
1380 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1381 		/*
1382 		 * Decrement as we reached the max value.
1383 		 * The release function won't be called here as we've
1384 		 * just incremented the refcount.
1385 		 */
1386 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1387 				gaudi_sob_group_reset_error);
1388 		cprop->next_sob_group_val[stream] = 1;
1389 		/* only two SOBs are currently in use */
1390 		cprop->curr_sob_group_idx[stream] =
1391 			(cprop->curr_sob_group_idx[stream] + 1) &
1392 							(HL_RSVD_SOBS - 1);
1393 
1394 		gaudi_collective_map_sobs(hdev, stream);
1395 
1396 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1397 				cprop->curr_sob_group_idx[stream], stream);
1398 	}
1399 
1400 	mb();
1401 	hl_fence_put(cs->signal_fence);
1402 	cs->signal_fence = NULL;
1403 
1404 	return 0;
1405 }
1406 
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1407 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1408 {
1409 	u32 cacheline_end, additional_commands;
1410 
1411 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1412 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1413 
1414 	if (user_cb_size + additional_commands > cacheline_end)
1415 		return cacheline_end - user_cb_size + additional_commands;
1416 	else
1417 		return additional_commands;
1418 }
1419 
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1420 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1421 		struct hl_ctx *ctx, struct hl_cs *cs,
1422 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1423 		u32 encaps_signal_offset)
1424 {
1425 	struct hw_queue_properties *hw_queue_prop;
1426 	struct hl_cs_counters_atomic *cntr;
1427 	struct hl_cs_job *job;
1428 	struct hl_cb *cb;
1429 	u32 cb_size;
1430 	bool patched_cb;
1431 
1432 	cntr = &hdev->aggregated_cs_counters;
1433 
1434 	if (mode == HL_COLLECTIVE_MASTER) {
1435 		/* CB size of collective master queue contains
1436 		 * 4 msg short packets for monitor 1 configuration
1437 		 * 1 fence packet
1438 		 * 4 msg short packets for monitor 2 configuration
1439 		 * 1 fence packet
1440 		 * 2 msg prot packets for completion and MSI
1441 		 */
1442 		cb_size = sizeof(struct packet_msg_short) * 8 +
1443 				sizeof(struct packet_fence) * 2 +
1444 				sizeof(struct packet_msg_prot) * 2;
1445 		patched_cb = true;
1446 	} else {
1447 		/* CB size of collective slave queues contains
1448 		 * 4 msg short packets for monitor configuration
1449 		 * 1 fence packet
1450 		 * 1 additional msg short packet for sob signal
1451 		 */
1452 		cb_size = sizeof(struct packet_msg_short) * 5 +
1453 				sizeof(struct packet_fence);
1454 		patched_cb = false;
1455 	}
1456 
1457 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1458 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1459 	if (!job) {
1460 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1461 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1462 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1463 		return -ENOMEM;
1464 	}
1465 
1466 	/* Allocate internal mapped CB for non patched CBs */
1467 	cb = hl_cb_kernel_create(hdev, cb_size,
1468 			hdev->mmu_enable && !patched_cb);
1469 	if (!cb) {
1470 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1471 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1472 		kfree(job);
1473 		return -EFAULT;
1474 	}
1475 
1476 	job->id = 0;
1477 	job->cs = cs;
1478 	job->user_cb = cb;
1479 	atomic_inc(&job->user_cb->cs_cnt);
1480 	job->user_cb_size = cb_size;
1481 	job->hw_queue_id = queue_id;
1482 
1483 	/* since its guaranteed to have only one chunk in the collective wait
1484 	 * cs, we can use this chunk to set the encapsulated signal offset
1485 	 * in the jobs.
1486 	 */
1487 	if (cs->encaps_signals)
1488 		job->encaps_sig_wait_offset = encaps_signal_offset;
1489 
1490 	/*
1491 	 * No need in parsing, user CB is the patched CB.
1492 	 * We call hl_cb_destroy() out of two reasons - we don't need
1493 	 * the CB in the CB idr anymore and to decrement its refcount as
1494 	 * it was incremented inside hl_cb_kernel_create().
1495 	 */
1496 	if (patched_cb)
1497 		job->patched_cb = job->user_cb;
1498 	else
1499 		job->patched_cb = NULL;
1500 
1501 	job->job_cb_size = job->user_cb_size;
1502 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1503 
1504 	/* increment refcount as for external queues we get completion */
1505 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1506 		cs_get(cs);
1507 
1508 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1509 
1510 	list_add_tail(&job->cs_node, &cs->job_list);
1511 
1512 	hl_debugfs_add_job(hdev, job);
1513 
1514 	return 0;
1515 }
1516 
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1517 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1518 		struct hl_ctx *ctx, struct hl_cs *cs,
1519 		u32 wait_queue_id, u32 collective_engine_id,
1520 		u32 encaps_signal_offset)
1521 {
1522 	struct gaudi_device *gaudi = hdev->asic_specific;
1523 	struct hw_queue_properties *hw_queue_prop;
1524 	u32 queue_id, collective_queue, num_jobs;
1525 	u32 stream, nic_queue, nic_idx = 0;
1526 	bool skip;
1527 	int i, rc = 0;
1528 
1529 	/* Verify wait queue id is configured as master */
1530 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1531 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1532 		dev_err(hdev->dev,
1533 			"Queue %d is not configured as collective master\n",
1534 			wait_queue_id);
1535 		return -EINVAL;
1536 	}
1537 
1538 	/* Verify engine id is supported */
1539 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1540 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1541 		dev_err(hdev->dev,
1542 			"Collective wait does not support engine %u\n",
1543 			collective_engine_id);
1544 		return -EINVAL;
1545 	}
1546 
1547 	stream = wait_queue_id % 4;
1548 
1549 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1550 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1551 	else
1552 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1553 
1554 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1555 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1556 
1557 	/* First job goes to the collective master queue, it will wait for
1558 	 * the collective slave queues to finish execution.
1559 	 * The synchronization is done using two monitors:
1560 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1561 	 * reduction engine (DMA5/TPC7).
1562 	 *
1563 	 * Rest of the jobs goes to the collective slave queues which will
1564 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1565 	 */
1566 	for (i = 0 ; i < num_jobs ; i++) {
1567 		if (i == 0) {
1568 			queue_id = wait_queue_id;
1569 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1570 				HL_COLLECTIVE_MASTER, queue_id,
1571 				wait_queue_id, encaps_signal_offset);
1572 		} else {
1573 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1574 				if (gaudi->hw_cap_initialized &
1575 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1576 					skip = false;
1577 				else
1578 					skip = true;
1579 
1580 				queue_id = nic_queue;
1581 				nic_queue += 4;
1582 				nic_idx++;
1583 
1584 				if (skip)
1585 					continue;
1586 			} else {
1587 				queue_id = collective_queue;
1588 			}
1589 
1590 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1591 				HL_COLLECTIVE_SLAVE, queue_id,
1592 				wait_queue_id, encaps_signal_offset);
1593 		}
1594 
1595 		if (rc)
1596 			return rc;
1597 	}
1598 
1599 	return rc;
1600 }
1601 
gaudi_late_init(struct hl_device * hdev)1602 static int gaudi_late_init(struct hl_device *hdev)
1603 {
1604 	struct gaudi_device *gaudi = hdev->asic_specific;
1605 	int rc;
1606 
1607 	rc = gaudi->cpucp_info_get(hdev);
1608 	if (rc) {
1609 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1610 		return rc;
1611 	}
1612 
1613 	if ((hdev->card_type == cpucp_card_type_pci) &&
1614 			(hdev->nic_ports_mask & 0x3)) {
1615 		dev_info(hdev->dev,
1616 			"PCI card detected, only 8 ports are enabled\n");
1617 		hdev->nic_ports_mask &= ~0x3;
1618 
1619 		/* Stop and disable unused NIC QMANs */
1620 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1621 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1622 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1623 
1624 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1625 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1626 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627 
1628 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1629 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1630 
1631 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1632 	}
1633 
1634 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1635 	if (rc) {
1636 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1637 		return rc;
1638 	}
1639 
1640 	/* Scrub both SRAM and DRAM */
1641 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1642 	if (rc)
1643 		goto disable_pci_access;
1644 
1645 	rc = gaudi_fetch_psoc_frequency(hdev);
1646 	if (rc) {
1647 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1648 		goto disable_pci_access;
1649 	}
1650 
1651 	rc = gaudi_mmu_clear_pgt_range(hdev);
1652 	if (rc) {
1653 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1654 		goto disable_pci_access;
1655 	}
1656 
1657 	rc = gaudi_init_tpc_mem(hdev);
1658 	if (rc) {
1659 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1660 		goto disable_pci_access;
1661 	}
1662 
1663 	rc = gaudi_collective_init(hdev);
1664 	if (rc) {
1665 		dev_err(hdev->dev, "Failed to init collective\n");
1666 		goto disable_pci_access;
1667 	}
1668 
1669 	/* We only support a single ASID for the user, so for the sake of optimization, just
1670 	 * initialize the ASID one time during device initialization with the fixed value of 1
1671 	 */
1672 	gaudi_mmu_prepare(hdev, 1);
1673 
1674 	hl_fw_set_pll_profile(hdev);
1675 
1676 	return 0;
1677 
1678 disable_pci_access:
1679 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1680 
1681 	return rc;
1682 }
1683 
gaudi_late_fini(struct hl_device * hdev)1684 static void gaudi_late_fini(struct hl_device *hdev)
1685 {
1686 	hl_hwmon_release_resources(hdev);
1687 }
1688 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1689 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1690 {
1691 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1692 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1693 	int i, j, rc = 0;
1694 
1695 	/*
1696 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1697 	 * to '1' when accessing the host.
1698 	 * Bits 49:39 of the full host address are saved for a later
1699 	 * configuration of the HW to perform extension to 50 bits.
1700 	 * Because there is a single HW register that holds the extension bits,
1701 	 * these bits must be identical in all allocated range.
1702 	 */
1703 
1704 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1705 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1706 								&dma_addr_arr[i],
1707 								GFP_KERNEL | __GFP_ZERO);
1708 		if (!virt_addr_arr[i]) {
1709 			rc = -ENOMEM;
1710 			goto free_dma_mem_arr;
1711 		}
1712 
1713 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1714 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1715 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1716 			break;
1717 	}
1718 
1719 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1720 		dev_err(hdev->dev,
1721 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1722 		rc = -EFAULT;
1723 		goto free_dma_mem_arr;
1724 	}
1725 
1726 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1727 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1728 	hdev->cpu_pci_msb_addr =
1729 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1730 
1731 	if (!hdev->asic_prop.fw_security_enabled)
1732 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1733 
1734 free_dma_mem_arr:
1735 	for (j = 0 ; j < i ; j++)
1736 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1737 						dma_addr_arr[j]);
1738 
1739 	return rc;
1740 }
1741 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1742 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1743 {
1744 	struct gaudi_device *gaudi = hdev->asic_specific;
1745 	struct gaudi_internal_qman_info *q;
1746 	u32 i;
1747 
1748 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1749 		q = &gaudi->internal_qmans[i];
1750 		if (!q->pq_kernel_addr)
1751 			continue;
1752 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1753 	}
1754 }
1755 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1756 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1757 {
1758 	struct gaudi_device *gaudi = hdev->asic_specific;
1759 	struct gaudi_internal_qman_info *q;
1760 	int rc, i;
1761 
1762 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1763 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1764 			continue;
1765 
1766 		q = &gaudi->internal_qmans[i];
1767 
1768 		switch (i) {
1769 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1770 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1771 			break;
1772 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1773 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1774 			break;
1775 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1776 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1777 			break;
1778 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1779 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1780 			break;
1781 		default:
1782 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1783 			rc = -EINVAL;
1784 			goto free_internal_qmans_pq_mem;
1785 		}
1786 
1787 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1788 								GFP_KERNEL | __GFP_ZERO);
1789 		if (!q->pq_kernel_addr) {
1790 			rc = -ENOMEM;
1791 			goto free_internal_qmans_pq_mem;
1792 		}
1793 	}
1794 
1795 	return 0;
1796 
1797 free_internal_qmans_pq_mem:
1798 	gaudi_free_internal_qmans_pq_mem(hdev);
1799 	return rc;
1800 }
1801 
gaudi_set_pci_memory_regions(struct hl_device * hdev)1802 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1803 {
1804 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1805 	struct pci_mem_region *region;
1806 
1807 	/* CFG */
1808 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1809 	region->region_base = CFG_BASE;
1810 	region->region_size = CFG_SIZE;
1811 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1812 	region->bar_size = CFG_BAR_SIZE;
1813 	region->bar_id = CFG_BAR_ID;
1814 	region->used = 1;
1815 
1816 	/* SRAM */
1817 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1818 	region->region_base = SRAM_BASE_ADDR;
1819 	region->region_size = SRAM_SIZE;
1820 	region->offset_in_bar = 0;
1821 	region->bar_size = SRAM_BAR_SIZE;
1822 	region->bar_id = SRAM_BAR_ID;
1823 	region->used = 1;
1824 
1825 	/* DRAM */
1826 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1827 	region->region_base = DRAM_PHYS_BASE;
1828 	region->region_size = hdev->asic_prop.dram_size;
1829 	region->offset_in_bar = 0;
1830 	region->bar_size = prop->dram_pci_bar_size;
1831 	region->bar_id = HBM_BAR_ID;
1832 	region->used = 1;
1833 
1834 	/* SP SRAM */
1835 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1836 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1837 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1838 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1839 	region->bar_size = CFG_BAR_SIZE;
1840 	region->bar_id = CFG_BAR_ID;
1841 	region->used = 1;
1842 }
1843 
gaudi_sw_init(struct hl_device * hdev)1844 static int gaudi_sw_init(struct hl_device *hdev)
1845 {
1846 	struct gaudi_device *gaudi;
1847 	u32 i, event_id = 0;
1848 	int rc;
1849 
1850 	/* Allocate device structure */
1851 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1852 	if (!gaudi)
1853 		return -ENOMEM;
1854 
1855 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1856 		if (gaudi_irq_map_table[i].valid) {
1857 			if (event_id == GAUDI_EVENT_SIZE) {
1858 				dev_err(hdev->dev,
1859 					"Event array exceeds the limit of %u events\n",
1860 					GAUDI_EVENT_SIZE);
1861 				rc = -EINVAL;
1862 				goto free_gaudi_device;
1863 			}
1864 
1865 			gaudi->events[event_id++] =
1866 					gaudi_irq_map_table[i].fc_id;
1867 		}
1868 	}
1869 
1870 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1871 
1872 	hdev->asic_specific = gaudi;
1873 
1874 	/* Create DMA pool for small allocations */
1875 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1876 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1877 	if (!hdev->dma_pool) {
1878 		dev_err(hdev->dev, "failed to create DMA pool\n");
1879 		rc = -ENOMEM;
1880 		goto free_gaudi_device;
1881 	}
1882 
1883 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1884 	if (rc)
1885 		goto free_dma_pool;
1886 
1887 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1888 	if (!hdev->cpu_accessible_dma_pool) {
1889 		dev_err(hdev->dev,
1890 			"Failed to create CPU accessible DMA pool\n");
1891 		rc = -ENOMEM;
1892 		goto free_cpu_dma_mem;
1893 	}
1894 
1895 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1896 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1897 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1898 	if (rc) {
1899 		dev_err(hdev->dev,
1900 			"Failed to add memory to CPU accessible DMA pool\n");
1901 		rc = -EFAULT;
1902 		goto free_cpu_accessible_dma_pool;
1903 	}
1904 
1905 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1906 	if (rc)
1907 		goto free_cpu_accessible_dma_pool;
1908 
1909 	spin_lock_init(&gaudi->hw_queues_lock);
1910 
1911 	hdev->supports_sync_stream = true;
1912 	hdev->supports_coresight = true;
1913 	hdev->supports_staged_submission = true;
1914 	hdev->supports_wait_for_multi_cs = true;
1915 
1916 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1917 	hdev->stream_master_qid_arr =
1918 				hdev->asic_funcs->get_stream_master_qid_arr();
1919 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1920 
1921 	return 0;
1922 
1923 free_cpu_accessible_dma_pool:
1924 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1925 free_cpu_dma_mem:
1926 	if (!hdev->asic_prop.fw_security_enabled)
1927 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1928 					hdev->cpu_pci_msb_addr);
1929 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1930 					hdev->cpu_accessible_dma_address);
1931 free_dma_pool:
1932 	dma_pool_destroy(hdev->dma_pool);
1933 free_gaudi_device:
1934 	kfree(gaudi);
1935 	return rc;
1936 }
1937 
gaudi_sw_fini(struct hl_device * hdev)1938 static int gaudi_sw_fini(struct hl_device *hdev)
1939 {
1940 	struct gaudi_device *gaudi = hdev->asic_specific;
1941 
1942 	gaudi_free_internal_qmans_pq_mem(hdev);
1943 
1944 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1945 
1946 	if (!hdev->asic_prop.fw_security_enabled)
1947 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1948 					hdev->cpu_pci_msb_addr);
1949 
1950 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1951 					hdev->cpu_accessible_dma_address);
1952 
1953 	dma_pool_destroy(hdev->dma_pool);
1954 
1955 	kfree(gaudi);
1956 
1957 	return 0;
1958 }
1959 
gaudi_irq_handler_single(int irq,void * arg)1960 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1961 {
1962 	struct hl_device *hdev = arg;
1963 	int i;
1964 
1965 	if (hdev->disabled)
1966 		return IRQ_HANDLED;
1967 
1968 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1969 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1970 
1971 	hl_irq_handler_eq(irq, &hdev->event_queue);
1972 
1973 	return IRQ_HANDLED;
1974 }
1975 
1976 /*
1977  * For backward compatibility, new MSI interrupts should be set after the
1978  * existing CPU and NIC interrupts.
1979  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1980 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1981 				bool cpu_eq)
1982 {
1983 	int msi_vec;
1984 
1985 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1986 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1987 				GAUDI_EVENT_QUEUE_MSI_IDX);
1988 
1989 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1990 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1991 
1992 	return pci_irq_vector(hdev->pdev, msi_vec);
1993 }
1994 
gaudi_enable_msi_single(struct hl_device * hdev)1995 static int gaudi_enable_msi_single(struct hl_device *hdev)
1996 {
1997 	int rc, irq;
1998 
1999 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2000 
2001 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2002 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2003 			"gaudi single msi", hdev);
2004 	if (rc)
2005 		dev_err(hdev->dev,
2006 			"Failed to request single MSI IRQ\n");
2007 
2008 	return rc;
2009 }
2010 
gaudi_enable_msi_multi(struct hl_device * hdev)2011 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2012 {
2013 	int cq_cnt = hdev->asic_prop.completion_queues_count;
2014 	int rc, i, irq_cnt_init, irq;
2015 
2016 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2017 		irq = gaudi_pci_irq_vector(hdev, i, false);
2018 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2019 				&hdev->completion_queue[i]);
2020 		if (rc) {
2021 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2022 			goto free_irqs;
2023 		}
2024 	}
2025 
2026 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2027 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2028 				&hdev->event_queue);
2029 	if (rc) {
2030 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2031 		goto free_irqs;
2032 	}
2033 
2034 	return 0;
2035 
2036 free_irqs:
2037 	for (i = 0 ; i < irq_cnt_init ; i++)
2038 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2039 				&hdev->completion_queue[i]);
2040 	return rc;
2041 }
2042 
gaudi_enable_msi(struct hl_device * hdev)2043 static int gaudi_enable_msi(struct hl_device *hdev)
2044 {
2045 	struct gaudi_device *gaudi = hdev->asic_specific;
2046 	int rc;
2047 
2048 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2049 		return 0;
2050 
2051 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2052 	if (rc < 0) {
2053 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2054 		return rc;
2055 	}
2056 
2057 	if (rc < NUMBER_OF_INTERRUPTS) {
2058 		gaudi->multi_msi_mode = false;
2059 		rc = gaudi_enable_msi_single(hdev);
2060 	} else {
2061 		gaudi->multi_msi_mode = true;
2062 		rc = gaudi_enable_msi_multi(hdev);
2063 	}
2064 
2065 	if (rc)
2066 		goto free_pci_irq_vectors;
2067 
2068 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2069 
2070 	return 0;
2071 
2072 free_pci_irq_vectors:
2073 	pci_free_irq_vectors(hdev->pdev);
2074 	return rc;
2075 }
2076 
gaudi_sync_irqs(struct hl_device * hdev)2077 static void gaudi_sync_irqs(struct hl_device *hdev)
2078 {
2079 	struct gaudi_device *gaudi = hdev->asic_specific;
2080 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2081 
2082 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2083 		return;
2084 
2085 	/* Wait for all pending IRQs to be finished */
2086 	if (gaudi->multi_msi_mode) {
2087 		for (i = 0 ; i < cq_cnt ; i++)
2088 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2089 
2090 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2091 						GAUDI_EVENT_QUEUE_MSI_IDX,
2092 						true));
2093 	} else {
2094 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2095 	}
2096 }
2097 
gaudi_disable_msi(struct hl_device * hdev)2098 static void gaudi_disable_msi(struct hl_device *hdev)
2099 {
2100 	struct gaudi_device *gaudi = hdev->asic_specific;
2101 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2102 
2103 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2104 		return;
2105 
2106 	gaudi_sync_irqs(hdev);
2107 
2108 	if (gaudi->multi_msi_mode) {
2109 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2110 						true);
2111 		free_irq(irq, &hdev->event_queue);
2112 
2113 		for (i = 0 ; i < cq_cnt ; i++) {
2114 			irq = gaudi_pci_irq_vector(hdev, i, false);
2115 			free_irq(irq, &hdev->completion_queue[i]);
2116 		}
2117 	} else {
2118 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2119 	}
2120 
2121 	pci_free_irq_vectors(hdev->pdev);
2122 
2123 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2124 }
2125 
gaudi_init_scrambler_sram(struct hl_device * hdev)2126 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2127 {
2128 	struct gaudi_device *gaudi = hdev->asic_specific;
2129 
2130 	if (hdev->asic_prop.fw_security_enabled)
2131 		return;
2132 
2133 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2134 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2135 		return;
2136 
2137 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2138 		return;
2139 
2140 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2141 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2142 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2143 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2145 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2147 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2149 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2151 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156 
2157 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2166 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173 
2174 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2175 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2176 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2177 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2179 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2181 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2183 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2185 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190 
2191 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2192 }
2193 
gaudi_init_scrambler_hbm(struct hl_device * hdev)2194 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2195 {
2196 	struct gaudi_device *gaudi = hdev->asic_specific;
2197 
2198 	if (hdev->asic_prop.fw_security_enabled)
2199 		return;
2200 
2201 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2202 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2203 		return;
2204 
2205 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2206 		return;
2207 
2208 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 
2225 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2226 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2228 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2230 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2232 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2234 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2236 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2238 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2240 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241 
2242 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2243 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2244 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2245 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2247 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2249 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2251 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2253 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2255 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2257 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258 
2259 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2260 }
2261 
gaudi_init_e2e(struct hl_device * hdev)2262 static void gaudi_init_e2e(struct hl_device *hdev)
2263 {
2264 	if (hdev->asic_prop.fw_security_enabled)
2265 		return;
2266 
2267 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2268 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2269 		return;
2270 
2271 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2272 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2273 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2274 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2275 
2276 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2277 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2278 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2279 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2280 
2281 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2282 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2283 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2284 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2285 
2286 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2287 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2288 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2289 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2290 
2291 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2292 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2293 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2294 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2295 
2296 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2297 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2298 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2299 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2300 
2301 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2302 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2303 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2304 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2305 
2306 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2307 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2308 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2309 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2310 
2311 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2312 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2313 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2314 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2315 
2316 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2317 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2318 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2319 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2320 
2321 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2322 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2323 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2324 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2325 
2326 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2327 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2328 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2329 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2330 
2331 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2332 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2333 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2334 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2335 
2336 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2337 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2338 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2339 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2340 
2341 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2342 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2343 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2344 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2345 
2346 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2347 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2348 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2349 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2350 
2351 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2352 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2353 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2354 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2355 
2356 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2357 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2358 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2359 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2360 
2361 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2362 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2363 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2364 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2365 
2366 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2367 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2368 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2369 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2370 
2371 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2372 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2373 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2374 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2375 
2376 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2377 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2378 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2379 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2380 
2381 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2382 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2383 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2384 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2385 
2386 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2387 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2388 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2389 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2390 
2391 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2392 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2393 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2394 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2395 
2396 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2397 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2398 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2399 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2400 
2401 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2402 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2403 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2404 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2405 
2406 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2407 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2408 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2409 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410 
2411 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2412 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2413 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2414 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415 
2416 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2417 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2418 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2419 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2420 
2421 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2422 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2423 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2424 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2425 
2426 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2427 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2428 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2429 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2430 
2431 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2432 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2433 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2434 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2435 
2436 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2437 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2438 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2439 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2440 
2441 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2442 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2443 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2444 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2445 
2446 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2447 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2448 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2449 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2450 
2451 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2452 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2453 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2454 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2455 
2456 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2457 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2458 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2459 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2460 
2461 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2462 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2463 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2464 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2465 
2466 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2467 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2468 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2469 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2470 
2471 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2472 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2473 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2474 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2475 
2476 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2477 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2478 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2479 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2480 
2481 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2482 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2483 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2484 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2485 
2486 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2487 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2488 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2489 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2490 
2491 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2492 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2493 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2494 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2495 
2496 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2497 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2498 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2499 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2500 
2501 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2502 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2503 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2504 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2505 
2506 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2507 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2508 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2509 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2510 }
2511 
gaudi_init_hbm_cred(struct hl_device * hdev)2512 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2513 {
2514 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2515 
2516 	if (hdev->asic_prop.fw_security_enabled)
2517 		return;
2518 
2519 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2520 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2521 		return;
2522 
2523 	hbm0_wr = 0x33333333;
2524 	hbm0_rd = 0x77777777;
2525 	hbm1_wr = 0x55555555;
2526 	hbm1_rd = 0xDDDDDDDD;
2527 
2528 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2529 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2530 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2531 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2532 
2533 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2534 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2535 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2536 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2537 
2538 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2539 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2540 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2541 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2542 
2543 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2544 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2545 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2546 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2547 
2548 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2549 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2550 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2551 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2552 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2553 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2554 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2555 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2556 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2557 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2558 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2559 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2560 
2561 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2562 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2563 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2564 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2565 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2566 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2567 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2568 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2569 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2570 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2571 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2572 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2573 }
2574 
gaudi_init_golden_registers(struct hl_device * hdev)2575 static void gaudi_init_golden_registers(struct hl_device *hdev)
2576 {
2577 	u32 tpc_offset;
2578 	int tpc_id, i;
2579 
2580 	gaudi_init_e2e(hdev);
2581 	gaudi_init_hbm_cred(hdev);
2582 
2583 	for (tpc_id = 0, tpc_offset = 0;
2584 				tpc_id < TPC_NUMBER_OF_ENGINES;
2585 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2586 		/* Mask all arithmetic interrupts from TPC */
2587 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2588 		/* Set 16 cache lines */
2589 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2590 				ICACHE_FETCH_LINE_NUM, 2);
2591 	}
2592 
2593 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2594 	for (i = 0 ; i < 128 ; i += 8)
2595 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2596 
2597 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2598 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2599 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2600 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2601 }
2602 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2603 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2604 					int qman_id, dma_addr_t qman_pq_addr)
2605 {
2606 	struct cpu_dyn_regs *dyn_regs =
2607 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2608 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2609 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2610 	u32 q_off, dma_qm_offset;
2611 	u32 dma_qm_err_cfg, irq_handler_offset;
2612 
2613 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2614 
2615 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2616 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2617 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2618 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2619 	so_base_en_lo = lower_32_bits(CFG_BASE +
2620 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2621 	so_base_en_hi = upper_32_bits(CFG_BASE +
2622 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2623 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2624 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2625 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2626 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2627 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2628 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2629 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2630 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631 
2632 	q_off = dma_qm_offset + qman_id * 4;
2633 
2634 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2635 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2636 
2637 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2638 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2639 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2640 
2641 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2642 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2643 							QMAN_LDMA_SRC_OFFSET);
2644 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2645 							QMAN_LDMA_DST_OFFSET);
2646 
2647 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2648 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2649 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2650 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2651 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2652 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2653 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2654 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2655 
2656 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2657 
2658 	/* The following configuration is needed only once per QMAN */
2659 	if (qman_id == 0) {
2660 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2661 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2662 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2663 
2664 		/* Configure RAZWI IRQ */
2665 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2666 		if (hdev->stop_on_err)
2667 			dma_qm_err_cfg |=
2668 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2669 
2670 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2671 
2672 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2673 			lower_32_bits(CFG_BASE + irq_handler_offset));
2674 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2675 			upper_32_bits(CFG_BASE + irq_handler_offset));
2676 
2677 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2678 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2679 									dma_id);
2680 
2681 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2682 				QM_ARB_ERR_MSG_EN_MASK);
2683 
2684 		/* Set timeout to maximum */
2685 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2686 
2687 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2688 				QMAN_EXTERNAL_MAKE_TRUSTED);
2689 
2690 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2691 	}
2692 }
2693 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2694 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2695 {
2696 	struct cpu_dyn_regs *dyn_regs =
2697 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2698 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2699 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2700 	u32 irq_handler_offset;
2701 
2702 	/* Set to maximum possible according to physical size */
2703 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2704 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2705 
2706 	/* WA for H/W bug H3-2116 */
2707 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2708 
2709 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2710 	if (hdev->stop_on_err)
2711 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2712 
2713 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2714 
2715 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2716 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2717 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2718 
2719 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2720 		lower_32_bits(CFG_BASE + irq_handler_offset));
2721 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2722 		upper_32_bits(CFG_BASE + irq_handler_offset));
2723 
2724 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2725 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2726 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2727 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2728 	/* If the channel is secured, it should be in MMU bypass mode */
2729 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2730 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2731 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2732 }
2733 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2734 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2735 				u32 enable_mask)
2736 {
2737 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738 
2739 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2740 }
2741 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2742 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2743 {
2744 	struct gaudi_device *gaudi = hdev->asic_specific;
2745 	struct hl_hw_queue *q;
2746 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2747 
2748 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2749 		return;
2750 
2751 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2752 		dma_id = gaudi_dma_assignment[i];
2753 		/*
2754 		 * For queues after the CPU Q need to add 1 to get the correct
2755 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2756 		 * order to get the correct MSI register.
2757 		 */
2758 		if (dma_id > 1) {
2759 			cpu_skip = 1;
2760 			nic_skip = NIC_NUMBER_OF_ENGINES;
2761 		} else {
2762 			cpu_skip = 0;
2763 			nic_skip = 0;
2764 		}
2765 
2766 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2767 			q_idx = 4 * dma_id + j + cpu_skip;
2768 			q = &hdev->kernel_queues[q_idx];
2769 			q->cq_id = cq_id++;
2770 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2771 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2772 						q->bus_address);
2773 		}
2774 
2775 		gaudi_init_dma_core(hdev, dma_id);
2776 
2777 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2778 	}
2779 
2780 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2781 }
2782 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2783 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2784 					int qman_id, u64 qman_base_addr)
2785 {
2786 	struct cpu_dyn_regs *dyn_regs =
2787 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2788 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2789 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2790 	u32 dma_qm_err_cfg, irq_handler_offset;
2791 	u32 q_off, dma_qm_offset;
2792 
2793 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2794 
2795 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2796 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2797 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2798 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2799 	so_base_en_lo = lower_32_bits(CFG_BASE +
2800 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2801 	so_base_en_hi = upper_32_bits(CFG_BASE +
2802 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2803 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2804 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2805 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2806 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2807 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2808 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2809 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2810 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811 
2812 	q_off = dma_qm_offset + qman_id * 4;
2813 
2814 	if (qman_id < 4) {
2815 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2816 					lower_32_bits(qman_base_addr));
2817 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2818 					upper_32_bits(qman_base_addr));
2819 
2820 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2821 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2822 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2823 
2824 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2825 							QMAN_CPDMA_SIZE_OFFSET);
2826 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2827 							QMAN_CPDMA_SRC_OFFSET);
2828 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2829 							QMAN_CPDMA_DST_OFFSET);
2830 	} else {
2831 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2832 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2833 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2834 
2835 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2836 							QMAN_LDMA_SIZE_OFFSET);
2837 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2838 							QMAN_LDMA_SRC_OFFSET);
2839 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2840 							QMAN_LDMA_DST_OFFSET);
2841 
2842 		/* Configure RAZWI IRQ */
2843 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2844 		if (hdev->stop_on_err)
2845 			dma_qm_err_cfg |=
2846 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2847 
2848 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2849 
2850 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2851 			lower_32_bits(CFG_BASE + irq_handler_offset));
2852 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2853 			upper_32_bits(CFG_BASE + irq_handler_offset));
2854 
2855 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2856 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2857 									dma_id);
2858 
2859 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2860 				QM_ARB_ERR_MSG_EN_MASK);
2861 
2862 		/* Set timeout to maximum */
2863 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2864 
2865 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2866 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2867 				QMAN_INTERNAL_MAKE_TRUSTED);
2868 	}
2869 
2870 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2871 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2872 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2873 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2874 
2875 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2876 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2877 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2878 				mtr_base_ws_lo);
2879 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2880 				mtr_base_ws_hi);
2881 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2882 				so_base_ws_lo);
2883 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2884 				so_base_ws_hi);
2885 	}
2886 }
2887 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2888 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2889 {
2890 	struct gaudi_device *gaudi = hdev->asic_specific;
2891 	struct gaudi_internal_qman_info *q;
2892 	u64 qman_base_addr;
2893 	int i, j, dma_id, internal_q_index;
2894 
2895 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2896 		return;
2897 
2898 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2899 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2900 
2901 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2902 			 /*
2903 			  * Add the CPU queue in order to get the correct queue
2904 			  * number as all internal queue are placed after it
2905 			  */
2906 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2907 
2908 			q = &gaudi->internal_qmans[internal_q_index];
2909 			qman_base_addr = (u64) q->pq_dma_addr;
2910 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2911 						qman_base_addr);
2912 		}
2913 
2914 		/* Initializing lower CP for HBM DMA QMAN */
2915 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2916 
2917 		gaudi_init_dma_core(hdev, dma_id);
2918 
2919 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2920 	}
2921 
2922 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2923 }
2924 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2925 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2926 					int qman_id, u64 qman_base_addr)
2927 {
2928 	struct cpu_dyn_regs *dyn_regs =
2929 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2930 	u32 mtr_base_lo, mtr_base_hi;
2931 	u32 so_base_lo, so_base_hi;
2932 	u32 irq_handler_offset;
2933 	u32 q_off, mme_id;
2934 	u32 mme_qm_err_cfg;
2935 
2936 	mtr_base_lo = lower_32_bits(CFG_BASE +
2937 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2938 	mtr_base_hi = upper_32_bits(CFG_BASE +
2939 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2940 	so_base_lo = lower_32_bits(CFG_BASE +
2941 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2942 	so_base_hi = upper_32_bits(CFG_BASE +
2943 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2944 
2945 	q_off = mme_offset + qman_id * 4;
2946 
2947 	if (qman_id < 4) {
2948 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2949 					lower_32_bits(qman_base_addr));
2950 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2951 					upper_32_bits(qman_base_addr));
2952 
2953 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2954 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2955 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2956 
2957 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2958 							QMAN_CPDMA_SIZE_OFFSET);
2959 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2960 							QMAN_CPDMA_SRC_OFFSET);
2961 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2962 							QMAN_CPDMA_DST_OFFSET);
2963 	} else {
2964 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2965 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2966 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2967 
2968 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2969 							QMAN_LDMA_SIZE_OFFSET);
2970 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2971 							QMAN_LDMA_SRC_OFFSET);
2972 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2973 							QMAN_LDMA_DST_OFFSET);
2974 
2975 		/* Configure RAZWI IRQ */
2976 		mme_id = mme_offset /
2977 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2978 
2979 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2980 		if (hdev->stop_on_err)
2981 			mme_qm_err_cfg |=
2982 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2983 
2984 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2985 
2986 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2987 			lower_32_bits(CFG_BASE + irq_handler_offset));
2988 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2989 			upper_32_bits(CFG_BASE + irq_handler_offset));
2990 
2991 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2992 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2993 									mme_id);
2994 
2995 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2996 				QM_ARB_ERR_MSG_EN_MASK);
2997 
2998 		/* Set timeout to maximum */
2999 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3000 
3001 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3002 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3003 				QMAN_INTERNAL_MAKE_TRUSTED);
3004 	}
3005 
3006 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3007 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3008 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3009 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3010 }
3011 
gaudi_init_mme_qmans(struct hl_device * hdev)3012 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3013 {
3014 	struct gaudi_device *gaudi = hdev->asic_specific;
3015 	struct gaudi_internal_qman_info *q;
3016 	u64 qman_base_addr;
3017 	u32 mme_offset;
3018 	int i, internal_q_index;
3019 
3020 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3021 		return;
3022 
3023 	/*
3024 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3025 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3026 	 */
3027 
3028 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3029 
3030 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3031 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3032 		q = &gaudi->internal_qmans[internal_q_index];
3033 		qman_base_addr = (u64) q->pq_dma_addr;
3034 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3035 					qman_base_addr);
3036 		if (i == 3)
3037 			mme_offset = 0;
3038 	}
3039 
3040 	/* Initializing lower CP for MME QMANs */
3041 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3042 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3043 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3044 
3045 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3046 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3047 
3048 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3049 }
3050 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)3051 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3052 				int qman_id, u64 qman_base_addr)
3053 {
3054 	struct cpu_dyn_regs *dyn_regs =
3055 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3056 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3057 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3058 	u32 tpc_qm_err_cfg, irq_handler_offset;
3059 	u32 q_off, tpc_id;
3060 
3061 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3062 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3063 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3064 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065 	so_base_en_lo = lower_32_bits(CFG_BASE +
3066 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3067 	so_base_en_hi = upper_32_bits(CFG_BASE +
3068 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3069 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3070 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3071 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3072 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3073 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3074 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3075 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3076 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077 
3078 	q_off = tpc_offset + qman_id * 4;
3079 
3080 	tpc_id = tpc_offset /
3081 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3082 
3083 	if (qman_id < 4) {
3084 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3085 					lower_32_bits(qman_base_addr));
3086 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3087 					upper_32_bits(qman_base_addr));
3088 
3089 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3090 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3091 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3092 
3093 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3094 							QMAN_CPDMA_SIZE_OFFSET);
3095 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3096 							QMAN_CPDMA_SRC_OFFSET);
3097 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3098 							QMAN_CPDMA_DST_OFFSET);
3099 	} else {
3100 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3101 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3102 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3103 
3104 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3105 							QMAN_LDMA_SIZE_OFFSET);
3106 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3107 							QMAN_LDMA_SRC_OFFSET);
3108 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3109 							QMAN_LDMA_DST_OFFSET);
3110 
3111 		/* Configure RAZWI IRQ */
3112 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3113 		if (hdev->stop_on_err)
3114 			tpc_qm_err_cfg |=
3115 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3116 
3117 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3118 
3119 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3120 			lower_32_bits(CFG_BASE + irq_handler_offset));
3121 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3122 			upper_32_bits(CFG_BASE + irq_handler_offset));
3123 
3124 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3125 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3126 									tpc_id);
3127 
3128 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3129 				QM_ARB_ERR_MSG_EN_MASK);
3130 
3131 		/* Set timeout to maximum */
3132 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3133 
3134 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3135 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3136 				QMAN_INTERNAL_MAKE_TRUSTED);
3137 	}
3138 
3139 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3140 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3141 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3142 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3143 
3144 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3145 	if (tpc_id == 6) {
3146 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3147 				mtr_base_ws_lo);
3148 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3149 				mtr_base_ws_hi);
3150 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3151 				so_base_ws_lo);
3152 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3153 				so_base_ws_hi);
3154 	}
3155 }
3156 
gaudi_init_tpc_qmans(struct hl_device * hdev)3157 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3158 {
3159 	struct gaudi_device *gaudi = hdev->asic_specific;
3160 	struct gaudi_internal_qman_info *q;
3161 	u64 qman_base_addr;
3162 	u32 so_base_hi, tpc_offset = 0;
3163 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3164 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3165 	int i, tpc_id, internal_q_index;
3166 
3167 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3168 		return;
3169 
3170 	so_base_hi = upper_32_bits(CFG_BASE +
3171 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172 
3173 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3174 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3175 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3176 						tpc_id * QMAN_STREAMS + i;
3177 			q = &gaudi->internal_qmans[internal_q_index];
3178 			qman_base_addr = (u64) q->pq_dma_addr;
3179 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3180 						qman_base_addr);
3181 
3182 			if (i == 3) {
3183 				/* Initializing lower CP for TPC QMAN */
3184 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3185 
3186 				/* Enable the QMAN and TPC channel */
3187 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3188 						QMAN_TPC_ENABLE);
3189 			}
3190 		}
3191 
3192 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3193 				so_base_hi);
3194 
3195 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3196 
3197 		gaudi->hw_cap_initialized |=
3198 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3199 	}
3200 }
3201 
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3202 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3203 				int qman_id, u64 qman_base_addr, int nic_id)
3204 {
3205 	struct cpu_dyn_regs *dyn_regs =
3206 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3207 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3208 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3209 	u32 nic_qm_err_cfg, irq_handler_offset;
3210 	u32 q_off;
3211 
3212 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3213 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3214 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3215 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3216 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3217 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3218 	so_base_en_hi = upper_32_bits(CFG_BASE +
3219 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3220 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3221 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3222 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3223 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3224 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3225 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3226 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3227 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228 
3229 	q_off = nic_offset + qman_id * 4;
3230 
3231 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3232 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3233 
3234 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3235 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3236 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3237 
3238 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3239 							QMAN_LDMA_SIZE_OFFSET);
3240 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3241 							QMAN_LDMA_SRC_OFFSET);
3242 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3243 							QMAN_LDMA_DST_OFFSET);
3244 
3245 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3246 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3247 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3248 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3249 
3250 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3251 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3252 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3253 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3254 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3255 
3256 	if (qman_id == 0) {
3257 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3258 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3259 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3260 
3261 		/* Configure RAZWI IRQ */
3262 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3263 		if (hdev->stop_on_err)
3264 			nic_qm_err_cfg |=
3265 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3266 
3267 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3268 
3269 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3270 			lower_32_bits(CFG_BASE + irq_handler_offset));
3271 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3272 			upper_32_bits(CFG_BASE + irq_handler_offset));
3273 
3274 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3275 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3276 									nic_id);
3277 
3278 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3279 				QM_ARB_ERR_MSG_EN_MASK);
3280 
3281 		/* Set timeout to maximum */
3282 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3283 
3284 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3285 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3286 				QMAN_INTERNAL_MAKE_TRUSTED);
3287 	}
3288 }
3289 
gaudi_init_nic_qmans(struct hl_device * hdev)3290 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3291 {
3292 	struct gaudi_device *gaudi = hdev->asic_specific;
3293 	struct gaudi_internal_qman_info *q;
3294 	u64 qman_base_addr;
3295 	u32 nic_offset = 0;
3296 	u32 nic_delta_between_qmans =
3297 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3298 	u32 nic_delta_between_nics =
3299 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3300 	int i, nic_id, internal_q_index;
3301 
3302 	if (!hdev->nic_ports_mask)
3303 		return;
3304 
3305 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3306 		return;
3307 
3308 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3309 
3310 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3311 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3312 			nic_offset += nic_delta_between_qmans;
3313 			if (nic_id & 1) {
3314 				nic_offset -= (nic_delta_between_qmans * 2);
3315 				nic_offset += nic_delta_between_nics;
3316 			}
3317 			continue;
3318 		}
3319 
3320 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3321 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3322 						nic_id * QMAN_STREAMS + i;
3323 			q = &gaudi->internal_qmans[internal_q_index];
3324 			qman_base_addr = (u64) q->pq_dma_addr;
3325 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3326 						qman_base_addr, nic_id);
3327 		}
3328 
3329 		/* Enable the QMAN */
3330 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3331 
3332 		nic_offset += nic_delta_between_qmans;
3333 		if (nic_id & 1) {
3334 			nic_offset -= (nic_delta_between_qmans * 2);
3335 			nic_offset += nic_delta_between_nics;
3336 		}
3337 
3338 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3339 	}
3340 }
3341 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3342 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3343 {
3344 	struct gaudi_device *gaudi = hdev->asic_specific;
3345 
3346 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3347 		return;
3348 
3349 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3350 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3351 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3352 }
3353 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3354 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3355 {
3356 	struct gaudi_device *gaudi = hdev->asic_specific;
3357 
3358 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3359 		return;
3360 
3361 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3362 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3363 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3364 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3365 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3366 }
3367 
gaudi_disable_mme_qmans(struct hl_device * hdev)3368 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3369 {
3370 	struct gaudi_device *gaudi = hdev->asic_specific;
3371 
3372 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3373 		return;
3374 
3375 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3376 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3377 }
3378 
gaudi_disable_tpc_qmans(struct hl_device * hdev)3379 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3380 {
3381 	struct gaudi_device *gaudi = hdev->asic_specific;
3382 	u32 tpc_offset = 0;
3383 	int tpc_id;
3384 
3385 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3386 		return;
3387 
3388 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3389 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3390 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3391 	}
3392 }
3393 
gaudi_disable_nic_qmans(struct hl_device * hdev)3394 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3395 {
3396 	struct gaudi_device *gaudi = hdev->asic_specific;
3397 	u32 nic_mask, nic_offset = 0;
3398 	u32 nic_delta_between_qmans =
3399 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3400 	u32 nic_delta_between_nics =
3401 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3402 	int nic_id;
3403 
3404 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3405 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3406 
3407 		if (gaudi->hw_cap_initialized & nic_mask)
3408 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3409 
3410 		nic_offset += nic_delta_between_qmans;
3411 		if (nic_id & 1) {
3412 			nic_offset -= (nic_delta_between_qmans * 2);
3413 			nic_offset += nic_delta_between_nics;
3414 		}
3415 	}
3416 }
3417 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3418 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3419 {
3420 	struct gaudi_device *gaudi = hdev->asic_specific;
3421 
3422 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3423 		return;
3424 
3425 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3426 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3427 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3428 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3429 }
3430 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3431 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3432 {
3433 	struct gaudi_device *gaudi = hdev->asic_specific;
3434 
3435 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3436 		return;
3437 
3438 	/* Stop CPs of HBM DMA QMANs */
3439 
3440 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3441 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3445 }
3446 
gaudi_stop_mme_qmans(struct hl_device * hdev)3447 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3448 {
3449 	struct gaudi_device *gaudi = hdev->asic_specific;
3450 
3451 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3452 		return;
3453 
3454 	/* Stop CPs of MME QMANs */
3455 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3456 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3457 }
3458 
gaudi_stop_tpc_qmans(struct hl_device * hdev)3459 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3460 {
3461 	struct gaudi_device *gaudi = hdev->asic_specific;
3462 
3463 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3464 		return;
3465 
3466 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3467 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3468 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474 }
3475 
gaudi_stop_nic_qmans(struct hl_device * hdev)3476 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3477 {
3478 	struct gaudi_device *gaudi = hdev->asic_specific;
3479 
3480 	/* Stop upper CPs of QMANs */
3481 
3482 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3483 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3484 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3485 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3486 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3487 
3488 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3489 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3490 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3491 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3492 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3493 
3494 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3495 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3496 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3497 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3498 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3499 
3500 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3501 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3502 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3503 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3504 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3505 
3506 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3507 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3508 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3509 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3510 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3511 
3512 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3513 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3514 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3515 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3516 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3517 
3518 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3519 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3520 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3521 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3522 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3523 
3524 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3525 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3526 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3527 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3528 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3529 
3530 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3531 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3532 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3533 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3534 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3535 
3536 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3537 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3538 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3539 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3540 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3541 }
3542 
gaudi_pci_dma_stall(struct hl_device * hdev)3543 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3544 {
3545 	struct gaudi_device *gaudi = hdev->asic_specific;
3546 
3547 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3548 		return;
3549 
3550 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3551 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3552 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3553 }
3554 
gaudi_hbm_dma_stall(struct hl_device * hdev)3555 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3556 {
3557 	struct gaudi_device *gaudi = hdev->asic_specific;
3558 
3559 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3560 		return;
3561 
3562 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3563 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3564 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567 }
3568 
gaudi_mme_stall(struct hl_device * hdev)3569 static void gaudi_mme_stall(struct hl_device *hdev)
3570 {
3571 	struct gaudi_device *gaudi = hdev->asic_specific;
3572 
3573 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3574 		return;
3575 
3576 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3577 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3578 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3579 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3580 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3581 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3582 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3583 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3584 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3585 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3586 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3587 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3588 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3589 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3590 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3591 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3592 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3593 }
3594 
gaudi_tpc_stall(struct hl_device * hdev)3595 static void gaudi_tpc_stall(struct hl_device *hdev)
3596 {
3597 	struct gaudi_device *gaudi = hdev->asic_specific;
3598 
3599 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3600 		return;
3601 
3602 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3603 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3604 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3609 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3610 }
3611 
gaudi_disable_clock_gating(struct hl_device * hdev)3612 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3613 {
3614 	u32 qman_offset;
3615 	int i;
3616 
3617 	if (hdev->asic_prop.fw_security_enabled)
3618 		return;
3619 
3620 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3621 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3622 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3623 
3624 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3625 	}
3626 
3627 	WREG32(mmMME0_QM_CGM_CFG, 0);
3628 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3629 	WREG32(mmMME2_QM_CGM_CFG, 0);
3630 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3631 
3632 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3633 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3634 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3635 
3636 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3637 	}
3638 }
3639 
gaudi_enable_timestamp(struct hl_device * hdev)3640 static void gaudi_enable_timestamp(struct hl_device *hdev)
3641 {
3642 	/* Disable the timestamp counter */
3643 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3644 
3645 	/* Zero the lower/upper parts of the 64-bit counter */
3646 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3647 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3648 
3649 	/* Enable the counter */
3650 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3651 }
3652 
gaudi_disable_timestamp(struct hl_device * hdev)3653 static void gaudi_disable_timestamp(struct hl_device *hdev)
3654 {
3655 	/* Disable the timestamp counter */
3656 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3657 }
3658 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3659 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3660 {
3661 	u32 wait_timeout_ms;
3662 
3663 	if (hdev->pldm)
3664 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3665 	else
3666 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3667 
3668 	if (fw_reset)
3669 		goto skip_engines;
3670 
3671 	gaudi_stop_nic_qmans(hdev);
3672 	gaudi_stop_mme_qmans(hdev);
3673 	gaudi_stop_tpc_qmans(hdev);
3674 	gaudi_stop_hbm_dma_qmans(hdev);
3675 	gaudi_stop_pci_dma_qmans(hdev);
3676 
3677 	msleep(wait_timeout_ms);
3678 
3679 	gaudi_pci_dma_stall(hdev);
3680 	gaudi_hbm_dma_stall(hdev);
3681 	gaudi_tpc_stall(hdev);
3682 	gaudi_mme_stall(hdev);
3683 
3684 	msleep(wait_timeout_ms);
3685 
3686 	gaudi_disable_nic_qmans(hdev);
3687 	gaudi_disable_mme_qmans(hdev);
3688 	gaudi_disable_tpc_qmans(hdev);
3689 	gaudi_disable_hbm_dma_qmans(hdev);
3690 	gaudi_disable_pci_dma_qmans(hdev);
3691 
3692 	gaudi_disable_timestamp(hdev);
3693 
3694 skip_engines:
3695 	gaudi_disable_msi(hdev);
3696 }
3697 
gaudi_mmu_init(struct hl_device * hdev)3698 static int gaudi_mmu_init(struct hl_device *hdev)
3699 {
3700 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3701 	struct gaudi_device *gaudi = hdev->asic_specific;
3702 	u64 hop0_addr;
3703 	int rc, i;
3704 
3705 	if (!hdev->mmu_enable)
3706 		return 0;
3707 
3708 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3709 		return 0;
3710 
3711 	for (i = 0 ; i < prop->max_asid ; i++) {
3712 		hop0_addr = prop->mmu_pgt_addr +
3713 				(i * prop->mmu_hop_table_size);
3714 
3715 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3716 		if (rc) {
3717 			dev_err(hdev->dev,
3718 				"failed to set hop0 addr for asid %d\n", i);
3719 			goto err;
3720 		}
3721 	}
3722 
3723 	/* init MMU cache manage page */
3724 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3725 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3726 
3727 	/* mem cache invalidation */
3728 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3729 
3730 	hl_mmu_invalidate_cache(hdev, true, 0);
3731 
3732 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3733 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3734 
3735 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3736 
3737 	/*
3738 	 * The H/W expects the first PI after init to be 1. After wraparound
3739 	 * we'll write 0.
3740 	 */
3741 	gaudi->mmu_cache_inv_pi = 1;
3742 
3743 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3744 
3745 	return 0;
3746 
3747 err:
3748 	return rc;
3749 }
3750 
gaudi_load_firmware_to_device(struct hl_device * hdev)3751 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3752 {
3753 	void __iomem *dst;
3754 
3755 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3756 
3757 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3758 }
3759 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3761 {
3762 	void __iomem *dst;
3763 
3764 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3765 
3766 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3767 }
3768 
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3769 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3770 {
3771 	struct dynamic_fw_load_mgr *dynamic_loader;
3772 	struct cpu_dyn_regs *dyn_regs;
3773 
3774 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3775 
3776 	/*
3777 	 * here we update initial values for few specific dynamic regs (as
3778 	 * before reading the first descriptor from FW those value has to be
3779 	 * hard-coded) in later stages of the protocol those values will be
3780 	 * updated automatically by reading the FW descriptor so data there
3781 	 * will always be up-to-date
3782 	 */
3783 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3784 	dyn_regs->kmd_msg_to_cpu =
3785 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3786 	dyn_regs->cpu_cmd_status_to_host =
3787 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3788 
3789 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3790 }
3791 
gaudi_init_static_firmware_loader(struct hl_device * hdev)3792 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3793 {
3794 	struct static_fw_load_mgr *static_loader;
3795 
3796 	static_loader = &hdev->fw_loader.static_loader;
3797 
3798 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3799 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3800 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3801 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3802 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3803 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3804 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3805 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3806 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3807 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3808 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3809 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3810 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3811 			GAUDI_PLDM_RESET_WAIT_MSEC :
3812 			GAUDI_CPU_RESET_WAIT_MSEC;
3813 }
3814 
gaudi_init_firmware_preload_params(struct hl_device * hdev)3815 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3816 {
3817 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3818 
3819 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3820 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3821 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3822 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3823 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3824 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3825 }
3826 
gaudi_init_firmware_loader(struct hl_device * hdev)3827 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3828 {
3829 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3830 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3831 
3832 	/* fill common fields */
3833 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3834 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3835 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3836 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3837 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3838 	fw_loader->skip_bmc = !hdev->bmc_enable;
3839 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3840 	fw_loader->dram_bar_id = HBM_BAR_ID;
3841 
3842 	if (prop->dynamic_fw_load)
3843 		gaudi_init_dynamic_firmware_loader(hdev);
3844 	else
3845 		gaudi_init_static_firmware_loader(hdev);
3846 }
3847 
gaudi_init_cpu(struct hl_device * hdev)3848 static int gaudi_init_cpu(struct hl_device *hdev)
3849 {
3850 	struct gaudi_device *gaudi = hdev->asic_specific;
3851 	int rc;
3852 
3853 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3854 		return 0;
3855 
3856 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3857 		return 0;
3858 
3859 	/*
3860 	 * The device CPU works with 40 bits addresses.
3861 	 * This register sets the extension to 50 bits.
3862 	 */
3863 	if (!hdev->asic_prop.fw_security_enabled)
3864 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3865 
3866 	rc = hl_fw_init_cpu(hdev);
3867 
3868 	if (rc)
3869 		return rc;
3870 
3871 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3872 
3873 	return 0;
3874 }
3875 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3876 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3877 {
3878 	struct cpu_dyn_regs *dyn_regs =
3879 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3880 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3881 	struct gaudi_device *gaudi = hdev->asic_specific;
3882 	u32 status, irq_handler_offset;
3883 	struct hl_eq *eq;
3884 	struct hl_hw_queue *cpu_pq =
3885 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3886 	int err;
3887 
3888 	if (!hdev->cpu_queues_enable)
3889 		return 0;
3890 
3891 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3892 		return 0;
3893 
3894 	eq = &hdev->event_queue;
3895 
3896 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3897 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3898 
3899 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3900 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3901 
3902 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3903 			lower_32_bits(hdev->cpu_accessible_dma_address));
3904 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3905 			upper_32_bits(hdev->cpu_accessible_dma_address));
3906 
3907 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3908 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3909 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3910 
3911 	/* Used for EQ CI */
3912 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3913 
3914 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3915 
3916 	if (gaudi->multi_msi_mode)
3917 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3918 	else
3919 		WREG32(mmCPU_IF_QUEUE_INIT,
3920 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3921 
3922 	irq_handler_offset = prop->gic_interrupts_enable ?
3923 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3924 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3925 
3926 	WREG32(irq_handler_offset,
3927 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3928 
3929 	err = hl_poll_timeout(
3930 		hdev,
3931 		mmCPU_IF_QUEUE_INIT,
3932 		status,
3933 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3934 		1000,
3935 		cpu_timeout);
3936 
3937 	if (err) {
3938 		dev_err(hdev->dev,
3939 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3940 		return -EIO;
3941 	}
3942 
3943 	/* update FW application security bits */
3944 	if (prop->fw_cpu_boot_dev_sts0_valid)
3945 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3946 	if (prop->fw_cpu_boot_dev_sts1_valid)
3947 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3948 
3949 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3950 	return 0;
3951 }
3952 
gaudi_pre_hw_init(struct hl_device * hdev)3953 static void gaudi_pre_hw_init(struct hl_device *hdev)
3954 {
3955 	/* Perform read from the device to make sure device is up */
3956 	RREG32(mmHW_STATE);
3957 
3958 	if (!hdev->asic_prop.fw_security_enabled) {
3959 		/* Set the access through PCI bars (Linux driver only) as
3960 		 * secured
3961 		 */
3962 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3963 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3964 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3965 
3966 		/* Perform read to flush the waiting writes to ensure
3967 		 * configuration was set in the device
3968 		 */
3969 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3970 	}
3971 
3972 	/*
3973 	 * Let's mark in the H/W that we have reached this point. We check
3974 	 * this value in the reset_before_init function to understand whether
3975 	 * we need to reset the chip before doing H/W init. This register is
3976 	 * cleared by the H/W upon H/W reset
3977 	 */
3978 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3979 }
3980 
gaudi_hw_init(struct hl_device * hdev)3981 static int gaudi_hw_init(struct hl_device *hdev)
3982 {
3983 	struct gaudi_device *gaudi = hdev->asic_specific;
3984 	int rc;
3985 
3986 	gaudi_pre_hw_init(hdev);
3987 
3988 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3989 	 * So we set it here and if anyone tries to move it later to
3990 	 * a different address, there will be an error
3991 	 */
3992 	if (hdev->asic_prop.iatu_done_by_fw)
3993 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3994 
3995 	/*
3996 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3997 	 * base address of dram
3998 	 */
3999 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4000 		dev_err(hdev->dev,
4001 			"failed to map HBM bar to DRAM base address\n");
4002 		return -EIO;
4003 	}
4004 
4005 	rc = gaudi_init_cpu(hdev);
4006 	if (rc) {
4007 		dev_err(hdev->dev, "failed to initialize CPU\n");
4008 		return rc;
4009 	}
4010 
4011 	/* In case the clock gating was enabled in preboot we need to disable
4012 	 * it here before touching the MME/TPC registers.
4013 	 */
4014 	gaudi_disable_clock_gating(hdev);
4015 
4016 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4017 	gaudi_init_scrambler_sram(hdev);
4018 
4019 	/* This is here just in case we are working without CPU */
4020 	gaudi_init_scrambler_hbm(hdev);
4021 
4022 	gaudi_init_golden_registers(hdev);
4023 
4024 	rc = gaudi_mmu_init(hdev);
4025 	if (rc)
4026 		return rc;
4027 
4028 	gaudi_init_security(hdev);
4029 
4030 	gaudi_init_pci_dma_qmans(hdev);
4031 
4032 	gaudi_init_hbm_dma_qmans(hdev);
4033 
4034 	gaudi_init_mme_qmans(hdev);
4035 
4036 	gaudi_init_tpc_qmans(hdev);
4037 
4038 	gaudi_init_nic_qmans(hdev);
4039 
4040 	gaudi_enable_timestamp(hdev);
4041 
4042 	/* MSI must be enabled before CPU queues and NIC are initialized */
4043 	rc = gaudi_enable_msi(hdev);
4044 	if (rc)
4045 		goto disable_queues;
4046 
4047 	/* must be called after MSI was enabled */
4048 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4049 	if (rc) {
4050 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4051 			rc);
4052 		goto disable_msi;
4053 	}
4054 
4055 	/* Perform read from the device to flush all configuration */
4056 	RREG32(mmHW_STATE);
4057 
4058 	return 0;
4059 
4060 disable_msi:
4061 	gaudi_disable_msi(hdev);
4062 disable_queues:
4063 	gaudi_disable_mme_qmans(hdev);
4064 	gaudi_disable_pci_dma_qmans(hdev);
4065 
4066 	return rc;
4067 }
4068 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4069 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4070 {
4071 	struct cpu_dyn_regs *dyn_regs =
4072 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4073 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4074 	struct gaudi_device *gaudi = hdev->asic_specific;
4075 	bool driver_performs_reset;
4076 
4077 	if (!hard_reset) {
4078 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4079 		return;
4080 	}
4081 
4082 	if (hdev->pldm) {
4083 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4084 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4085 	} else {
4086 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4087 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4088 	}
4089 
4090 	if (fw_reset) {
4091 		dev_dbg(hdev->dev,
4092 			"Firmware performs HARD reset, going to wait %dms\n",
4093 			reset_timeout_ms);
4094 
4095 		goto skip_reset;
4096 	}
4097 
4098 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4099 					!hdev->asic_prop.hard_reset_done_by_fw);
4100 
4101 	/* Set device to handle FLR by H/W as we will put the device CPU to
4102 	 * halt mode
4103 	 */
4104 	if (driver_performs_reset)
4105 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4106 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4107 
4108 	/* If linux is loaded in the device CPU we need to communicate with it
4109 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4110 	 * registers in case of old F/Ws
4111 	 */
4112 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4113 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4114 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4115 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4116 
4117 		WREG32(irq_handler_offset,
4118 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4119 
4120 		/* This is a hail-mary attempt to revive the card in the small chance that the
4121 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4122 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4123 		 * reset as if Linux wasn't loaded.
4124 		 *
4125 		 * We do it only if the reset cause was HB, because that would be the indication
4126 		 * of such an event.
4127 		 *
4128 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4129 		 * damage.
4130 		 */
4131 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4132 			if (hdev->asic_prop.hard_reset_done_by_fw)
4133 				hl_fw_ask_hard_reset_without_linux(hdev);
4134 			else
4135 				hl_fw_ask_halt_machine_without_linux(hdev);
4136 		}
4137 	} else {
4138 		if (hdev->asic_prop.hard_reset_done_by_fw)
4139 			hl_fw_ask_hard_reset_without_linux(hdev);
4140 		else
4141 			hl_fw_ask_halt_machine_without_linux(hdev);
4142 	}
4143 
4144 	if (driver_performs_reset) {
4145 
4146 		/* Configure the reset registers. Must be done as early as
4147 		 * possible in case we fail during H/W initialization
4148 		 */
4149 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4150 						(CFG_RST_H_DMA_MASK |
4151 						CFG_RST_H_MME_MASK |
4152 						CFG_RST_H_SM_MASK |
4153 						CFG_RST_H_TPC_7_MASK));
4154 
4155 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4156 
4157 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4158 						(CFG_RST_H_HBM_MASK |
4159 						CFG_RST_H_TPC_7_MASK |
4160 						CFG_RST_H_NIC_MASK |
4161 						CFG_RST_H_SM_MASK |
4162 						CFG_RST_H_DMA_MASK |
4163 						CFG_RST_H_MME_MASK |
4164 						CFG_RST_H_CPU_MASK |
4165 						CFG_RST_H_MMU_MASK));
4166 
4167 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4168 						(CFG_RST_L_IF_MASK |
4169 						CFG_RST_L_PSOC_MASK |
4170 						CFG_RST_L_TPC_MASK));
4171 
4172 		msleep(cpu_timeout_ms);
4173 
4174 		/* Tell ASIC not to re-initialize PCIe */
4175 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4176 
4177 		/* Restart BTL/BLR upon hard-reset */
4178 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4179 
4180 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4181 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4182 
4183 		dev_dbg(hdev->dev,
4184 			"Issued HARD reset command, going to wait %dms\n",
4185 			reset_timeout_ms);
4186 	} else {
4187 		dev_dbg(hdev->dev,
4188 			"Firmware performs HARD reset, going to wait %dms\n",
4189 			reset_timeout_ms);
4190 	}
4191 
4192 skip_reset:
4193 	/*
4194 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4195 	 * itself is in reset. Need to wait until the reset is deasserted
4196 	 */
4197 	msleep(reset_timeout_ms);
4198 
4199 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4200 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4201 		dev_err(hdev->dev,
4202 			"Timeout while waiting for device to reset 0x%x\n",
4203 			status);
4204 
4205 	if (gaudi) {
4206 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4207 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4208 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4209 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4210 						HW_CAP_HBM_SCRAMBLER);
4211 
4212 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4213 
4214 		hdev->device_cpu_is_halted = false;
4215 	}
4216 }
4217 
gaudi_suspend(struct hl_device * hdev)4218 static int gaudi_suspend(struct hl_device *hdev)
4219 {
4220 	int rc;
4221 
4222 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4223 	if (rc)
4224 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4225 
4226 	return rc;
4227 }
4228 
gaudi_resume(struct hl_device * hdev)4229 static int gaudi_resume(struct hl_device *hdev)
4230 {
4231 	return gaudi_init_iatu(hdev);
4232 }
4233 
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4234 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4235 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4236 {
4237 	int rc;
4238 
4239 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4240 			VM_DONTCOPY | VM_NORESERVE;
4241 
4242 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4243 				(dma_addr - HOST_PHYS_BASE), size);
4244 	if (rc)
4245 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4246 
4247 	return rc;
4248 }
4249 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4250 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4251 {
4252 	struct cpu_dyn_regs *dyn_regs =
4253 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4254 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4255 	struct gaudi_device *gaudi = hdev->asic_specific;
4256 	bool invalid_queue = false;
4257 	int dma_id;
4258 
4259 	switch (hw_queue_id) {
4260 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4261 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4262 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4263 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4264 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4265 		break;
4266 
4267 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4268 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4269 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4270 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4271 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4272 		break;
4273 
4274 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4275 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4276 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4277 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4278 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4279 		break;
4280 
4281 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4282 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4283 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4284 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4285 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4286 		break;
4287 
4288 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4289 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4290 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4291 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4292 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4293 		break;
4294 
4295 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4296 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4297 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4298 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4299 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4300 		break;
4301 
4302 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4303 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4304 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4305 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4306 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4307 		break;
4308 
4309 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4310 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4311 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4312 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4313 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4314 		break;
4315 
4316 	case GAUDI_QUEUE_ID_CPU_PQ:
4317 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4318 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4319 		else
4320 			invalid_queue = true;
4321 		break;
4322 
4323 	case GAUDI_QUEUE_ID_MME_0_0:
4324 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4325 		break;
4326 
4327 	case GAUDI_QUEUE_ID_MME_0_1:
4328 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4329 		break;
4330 
4331 	case GAUDI_QUEUE_ID_MME_0_2:
4332 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4333 		break;
4334 
4335 	case GAUDI_QUEUE_ID_MME_0_3:
4336 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4337 		break;
4338 
4339 	case GAUDI_QUEUE_ID_MME_1_0:
4340 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4341 		break;
4342 
4343 	case GAUDI_QUEUE_ID_MME_1_1:
4344 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4345 		break;
4346 
4347 	case GAUDI_QUEUE_ID_MME_1_2:
4348 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4349 		break;
4350 
4351 	case GAUDI_QUEUE_ID_MME_1_3:
4352 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4353 		break;
4354 
4355 	case GAUDI_QUEUE_ID_TPC_0_0:
4356 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4357 		break;
4358 
4359 	case GAUDI_QUEUE_ID_TPC_0_1:
4360 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4361 		break;
4362 
4363 	case GAUDI_QUEUE_ID_TPC_0_2:
4364 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4365 		break;
4366 
4367 	case GAUDI_QUEUE_ID_TPC_0_3:
4368 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4369 		break;
4370 
4371 	case GAUDI_QUEUE_ID_TPC_1_0:
4372 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4373 		break;
4374 
4375 	case GAUDI_QUEUE_ID_TPC_1_1:
4376 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4377 		break;
4378 
4379 	case GAUDI_QUEUE_ID_TPC_1_2:
4380 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4381 		break;
4382 
4383 	case GAUDI_QUEUE_ID_TPC_1_3:
4384 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4385 		break;
4386 
4387 	case GAUDI_QUEUE_ID_TPC_2_0:
4388 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4389 		break;
4390 
4391 	case GAUDI_QUEUE_ID_TPC_2_1:
4392 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4393 		break;
4394 
4395 	case GAUDI_QUEUE_ID_TPC_2_2:
4396 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4397 		break;
4398 
4399 	case GAUDI_QUEUE_ID_TPC_2_3:
4400 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4401 		break;
4402 
4403 	case GAUDI_QUEUE_ID_TPC_3_0:
4404 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4405 		break;
4406 
4407 	case GAUDI_QUEUE_ID_TPC_3_1:
4408 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4409 		break;
4410 
4411 	case GAUDI_QUEUE_ID_TPC_3_2:
4412 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4413 		break;
4414 
4415 	case GAUDI_QUEUE_ID_TPC_3_3:
4416 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4417 		break;
4418 
4419 	case GAUDI_QUEUE_ID_TPC_4_0:
4420 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4421 		break;
4422 
4423 	case GAUDI_QUEUE_ID_TPC_4_1:
4424 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4425 		break;
4426 
4427 	case GAUDI_QUEUE_ID_TPC_4_2:
4428 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4429 		break;
4430 
4431 	case GAUDI_QUEUE_ID_TPC_4_3:
4432 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4433 		break;
4434 
4435 	case GAUDI_QUEUE_ID_TPC_5_0:
4436 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4437 		break;
4438 
4439 	case GAUDI_QUEUE_ID_TPC_5_1:
4440 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4441 		break;
4442 
4443 	case GAUDI_QUEUE_ID_TPC_5_2:
4444 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4445 		break;
4446 
4447 	case GAUDI_QUEUE_ID_TPC_5_3:
4448 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4449 		break;
4450 
4451 	case GAUDI_QUEUE_ID_TPC_6_0:
4452 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4453 		break;
4454 
4455 	case GAUDI_QUEUE_ID_TPC_6_1:
4456 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4457 		break;
4458 
4459 	case GAUDI_QUEUE_ID_TPC_6_2:
4460 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4461 		break;
4462 
4463 	case GAUDI_QUEUE_ID_TPC_6_3:
4464 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4465 		break;
4466 
4467 	case GAUDI_QUEUE_ID_TPC_7_0:
4468 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4469 		break;
4470 
4471 	case GAUDI_QUEUE_ID_TPC_7_1:
4472 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4473 		break;
4474 
4475 	case GAUDI_QUEUE_ID_TPC_7_2:
4476 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4477 		break;
4478 
4479 	case GAUDI_QUEUE_ID_TPC_7_3:
4480 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4481 		break;
4482 
4483 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4484 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4485 			invalid_queue = true;
4486 
4487 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4488 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4489 		break;
4490 
4491 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4492 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4493 			invalid_queue = true;
4494 
4495 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4496 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4497 		break;
4498 
4499 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4500 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4501 			invalid_queue = true;
4502 
4503 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4504 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4505 		break;
4506 
4507 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4508 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4509 			invalid_queue = true;
4510 
4511 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4512 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4513 		break;
4514 
4515 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4516 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4517 			invalid_queue = true;
4518 
4519 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4520 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4521 		break;
4522 
4523 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4524 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4525 			invalid_queue = true;
4526 
4527 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4528 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4529 		break;
4530 
4531 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4532 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4533 			invalid_queue = true;
4534 
4535 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4536 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4537 		break;
4538 
4539 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4540 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4541 			invalid_queue = true;
4542 
4543 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4544 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4545 		break;
4546 
4547 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4548 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4549 			invalid_queue = true;
4550 
4551 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4552 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4553 		break;
4554 
4555 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4556 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4557 			invalid_queue = true;
4558 
4559 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4560 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4561 		break;
4562 
4563 	default:
4564 		invalid_queue = true;
4565 	}
4566 
4567 	if (invalid_queue) {
4568 		/* Should never get here */
4569 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4570 			hw_queue_id);
4571 		return;
4572 	}
4573 
4574 	db_value = pi;
4575 
4576 	/* ring the doorbell */
4577 	WREG32(db_reg_offset, db_value);
4578 
4579 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4580 		/* make sure device CPU will read latest data from host */
4581 		mb();
4582 
4583 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4584 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4585 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4586 
4587 		WREG32(irq_handler_offset,
4588 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4589 	}
4590 }
4591 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4592 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4593 				struct hl_bd *bd)
4594 {
4595 	__le64 *pbd = (__le64 *) bd;
4596 
4597 	/* The QMANs are on the host memory so a simple copy suffice */
4598 	pqe[0] = pbd[0];
4599 	pqe[1] = pbd[1];
4600 }
4601 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4602 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4603 					dma_addr_t *dma_handle, gfp_t flags)
4604 {
4605 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4606 						dma_handle, flags);
4607 
4608 	/* Shift to the device's base physical address of host memory */
4609 	if (kernel_addr)
4610 		*dma_handle += HOST_PHYS_BASE;
4611 
4612 	return kernel_addr;
4613 }
4614 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4615 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4616 		void *cpu_addr, dma_addr_t dma_handle)
4617 {
4618 	/* Cancel the device's base physical address of host memory */
4619 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4620 
4621 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4622 }
4623 
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4624 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4625 {
4626 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4627 	u64 cur_addr = prop->dram_user_base_address;
4628 	u32 chunk_size, busy;
4629 	int rc, dma_id;
4630 
4631 	while (cur_addr < prop->dram_end_address) {
4632 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4633 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4634 
4635 			chunk_size =
4636 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4637 
4638 			dev_dbg(hdev->dev,
4639 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4640 				cur_addr, cur_addr + chunk_size);
4641 
4642 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4643 					lower_32_bits(val));
4644 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4645 					upper_32_bits(val));
4646 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4647 						lower_32_bits(cur_addr));
4648 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4649 						upper_32_bits(cur_addr));
4650 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4651 					chunk_size);
4652 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4653 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4654 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4655 
4656 			cur_addr += chunk_size;
4657 
4658 			if (cur_addr == prop->dram_end_address)
4659 				break;
4660 		}
4661 
4662 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4663 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4664 
4665 			rc = hl_poll_timeout(
4666 				hdev,
4667 				mmDMA0_CORE_STS0 + dma_offset,
4668 				busy,
4669 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4670 				1000,
4671 				HBM_SCRUBBING_TIMEOUT_US);
4672 
4673 			if (rc) {
4674 				dev_err(hdev->dev,
4675 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4676 					dma_id);
4677 				return -EIO;
4678 			}
4679 		}
4680 	}
4681 
4682 	return 0;
4683 }
4684 
gaudi_scrub_device_mem(struct hl_device * hdev)4685 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4686 {
4687 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4688 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4689 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4690 	u64 addr, size, val = hdev->memory_scrub_val;
4691 	ktime_t timeout;
4692 	int rc = 0;
4693 
4694 	if (!hdev->memory_scrub)
4695 		return 0;
4696 
4697 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4698 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4699 		if (ktime_compare(ktime_get(), timeout) > 0) {
4700 			dev_err(hdev->dev, "waiting for idle timeout\n");
4701 			return -ETIMEDOUT;
4702 		}
4703 		usleep_range((1000 >> 2) + 1, 1000);
4704 	}
4705 
4706 	/* Scrub SRAM */
4707 	addr = prop->sram_user_base_address;
4708 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4709 
4710 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4711 			addr, addr + size, val);
4712 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4713 	if (rc) {
4714 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4715 		return rc;
4716 	}
4717 
4718 	/* Scrub HBM using all DMA channels in parallel */
4719 	rc = gaudi_scrub_device_dram(hdev, val);
4720 	if (rc) {
4721 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4722 		return rc;
4723 	}
4724 
4725 	return 0;
4726 }
4727 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4728 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4729 				u32 queue_id, dma_addr_t *dma_handle,
4730 				u16 *queue_len)
4731 {
4732 	struct gaudi_device *gaudi = hdev->asic_specific;
4733 	struct gaudi_internal_qman_info *q;
4734 
4735 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4736 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4737 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4738 		return NULL;
4739 	}
4740 
4741 	q = &gaudi->internal_qmans[queue_id];
4742 	*dma_handle = q->pq_dma_addr;
4743 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4744 
4745 	return q->pq_kernel_addr;
4746 }
4747 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4748 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4749 				u16 len, u32 timeout, u64 *result)
4750 {
4751 	struct gaudi_device *gaudi = hdev->asic_specific;
4752 
4753 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4754 		if (result)
4755 			*result = 0;
4756 		return 0;
4757 	}
4758 
4759 	if (!timeout)
4760 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4761 
4762 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4763 						timeout, result);
4764 }
4765 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4766 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4767 {
4768 	struct packet_msg_prot *fence_pkt;
4769 	dma_addr_t pkt_dma_addr;
4770 	u32 fence_val, tmp, timeout_usec;
4771 	dma_addr_t fence_dma_addr;
4772 	u32 *fence_ptr;
4773 	int rc;
4774 
4775 	if (hdev->pldm)
4776 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4777 	else
4778 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4779 
4780 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4781 
4782 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4783 	if (!fence_ptr) {
4784 		dev_err(hdev->dev,
4785 			"Failed to allocate memory for H/W queue %d testing\n",
4786 			hw_queue_id);
4787 		return -ENOMEM;
4788 	}
4789 
4790 	*fence_ptr = 0;
4791 
4792 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4793 						&pkt_dma_addr);
4794 	if (!fence_pkt) {
4795 		dev_err(hdev->dev,
4796 			"Failed to allocate packet for H/W queue %d testing\n",
4797 			hw_queue_id);
4798 		rc = -ENOMEM;
4799 		goto free_fence_ptr;
4800 	}
4801 
4802 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4803 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4804 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4805 
4806 	fence_pkt->ctl = cpu_to_le32(tmp);
4807 	fence_pkt->value = cpu_to_le32(fence_val);
4808 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4809 
4810 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4811 					sizeof(struct packet_msg_prot),
4812 					pkt_dma_addr);
4813 	if (rc) {
4814 		dev_err(hdev->dev,
4815 			"Failed to send fence packet to H/W queue %d\n",
4816 			hw_queue_id);
4817 		goto free_pkt;
4818 	}
4819 
4820 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4821 					1000, timeout_usec, true);
4822 
4823 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4824 
4825 	if (rc == -ETIMEDOUT) {
4826 		dev_err(hdev->dev,
4827 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4828 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4829 		rc = -EIO;
4830 	}
4831 
4832 free_pkt:
4833 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4834 free_fence_ptr:
4835 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4836 	return rc;
4837 }
4838 
gaudi_test_cpu_queue(struct hl_device * hdev)4839 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4840 {
4841 	struct gaudi_device *gaudi = hdev->asic_specific;
4842 
4843 	/*
4844 	 * check capability here as send_cpu_message() won't update the result
4845 	 * value if no capability
4846 	 */
4847 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4848 		return 0;
4849 
4850 	return hl_fw_test_cpu_queue(hdev);
4851 }
4852 
gaudi_test_queues(struct hl_device * hdev)4853 static int gaudi_test_queues(struct hl_device *hdev)
4854 {
4855 	int i, rc, ret_val = 0;
4856 
4857 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4858 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4859 			rc = gaudi_test_queue(hdev, i);
4860 			if (rc)
4861 				ret_val = -EINVAL;
4862 		}
4863 	}
4864 
4865 	rc = gaudi_test_cpu_queue(hdev);
4866 	if (rc)
4867 		ret_val = -EINVAL;
4868 
4869 	return ret_val;
4870 }
4871 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4872 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4873 		gfp_t mem_flags, dma_addr_t *dma_handle)
4874 {
4875 	void *kernel_addr;
4876 
4877 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4878 		return NULL;
4879 
4880 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4881 
4882 	/* Shift to the device's base physical address of host memory */
4883 	if (kernel_addr)
4884 		*dma_handle += HOST_PHYS_BASE;
4885 
4886 	return kernel_addr;
4887 }
4888 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4889 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4890 			dma_addr_t dma_addr)
4891 {
4892 	/* Cancel the device's base physical address of host memory */
4893 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4894 
4895 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4896 }
4897 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4898 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4899 					size_t size, dma_addr_t *dma_handle)
4900 {
4901 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4902 }
4903 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4904 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4905 						size_t size, void *vaddr)
4906 {
4907 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4908 }
4909 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4910 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4911 {
4912 	struct scatterlist *sg, *sg_next_iter;
4913 	u32 count, dma_desc_cnt;
4914 	u64 len, len_next;
4915 	dma_addr_t addr, addr_next;
4916 
4917 	dma_desc_cnt = 0;
4918 
4919 	for_each_sgtable_dma_sg(sgt, sg, count) {
4920 		len = sg_dma_len(sg);
4921 		addr = sg_dma_address(sg);
4922 
4923 		if (len == 0)
4924 			break;
4925 
4926 		while ((count + 1) < sgt->nents) {
4927 			sg_next_iter = sg_next(sg);
4928 			len_next = sg_dma_len(sg_next_iter);
4929 			addr_next = sg_dma_address(sg_next_iter);
4930 
4931 			if (len_next == 0)
4932 				break;
4933 
4934 			if ((addr + len == addr_next) &&
4935 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4936 				len += len_next;
4937 				count++;
4938 				sg = sg_next_iter;
4939 			} else {
4940 				break;
4941 			}
4942 		}
4943 
4944 		dma_desc_cnt++;
4945 	}
4946 
4947 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4948 }
4949 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4950 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4951 				struct hl_cs_parser *parser,
4952 				struct packet_lin_dma *user_dma_pkt,
4953 				u64 addr, enum dma_data_direction dir)
4954 {
4955 	struct hl_userptr *userptr;
4956 	int rc;
4957 
4958 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4959 			parser->job_userptr_list, &userptr))
4960 		goto already_pinned;
4961 
4962 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4963 	if (!userptr)
4964 		return -ENOMEM;
4965 
4966 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4967 				userptr);
4968 	if (rc)
4969 		goto free_userptr;
4970 
4971 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4972 
4973 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4974 	if (rc) {
4975 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4976 		goto unpin_memory;
4977 	}
4978 
4979 	userptr->dma_mapped = true;
4980 	userptr->dir = dir;
4981 
4982 already_pinned:
4983 	parser->patched_cb_size +=
4984 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4985 
4986 	return 0;
4987 
4988 unpin_memory:
4989 	list_del(&userptr->job_node);
4990 	hl_unpin_host_memory(hdev, userptr);
4991 free_userptr:
4992 	kfree(userptr);
4993 	return rc;
4994 }
4995 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4996 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4997 				struct hl_cs_parser *parser,
4998 				struct packet_lin_dma *user_dma_pkt,
4999 				bool src_in_host)
5000 {
5001 	enum dma_data_direction dir;
5002 	bool skip_host_mem_pin = false, user_memset;
5003 	u64 addr;
5004 	int rc = 0;
5005 
5006 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5007 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5008 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5009 
5010 	if (src_in_host) {
5011 		if (user_memset)
5012 			skip_host_mem_pin = true;
5013 
5014 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5015 		dir = DMA_TO_DEVICE;
5016 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5017 	} else {
5018 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5019 		dir = DMA_FROM_DEVICE;
5020 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5021 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5022 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5023 	}
5024 
5025 	if (skip_host_mem_pin)
5026 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5027 	else
5028 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5029 						addr, dir);
5030 
5031 	return rc;
5032 }
5033 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5034 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5035 				struct hl_cs_parser *parser,
5036 				struct packet_lin_dma *user_dma_pkt)
5037 {
5038 	bool src_in_host = false;
5039 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5040 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5041 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5042 
5043 	dev_dbg(hdev->dev, "DMA packet details:\n");
5044 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5045 				le64_to_cpu(user_dma_pkt->src_addr));
5046 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5047 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5048 
5049 	/*
5050 	 * Special handling for DMA with size 0. Bypass all validations
5051 	 * because no transactions will be done except for WR_COMP, which
5052 	 * is not a security issue
5053 	 */
5054 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5055 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5056 		return 0;
5057 	}
5058 
5059 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5060 		src_in_host = true;
5061 
5062 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5063 						src_in_host);
5064 }
5065 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5066 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5067 					struct hl_cs_parser *parser,
5068 					struct packet_load_and_exe *user_pkt)
5069 {
5070 	u32 cfg;
5071 
5072 	cfg = le32_to_cpu(user_pkt->cfg);
5073 
5074 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5075 		dev_err(hdev->dev,
5076 			"User not allowed to use Load and Execute\n");
5077 		return -EPERM;
5078 	}
5079 
5080 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5081 
5082 	return 0;
5083 }
5084 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5085 static int gaudi_validate_cb(struct hl_device *hdev,
5086 			struct hl_cs_parser *parser, bool is_mmu)
5087 {
5088 	u32 cb_parsed_length = 0;
5089 	int rc = 0;
5090 
5091 	parser->patched_cb_size = 0;
5092 
5093 	/* cb_user_size is more than 0 so loop will always be executed */
5094 	while (cb_parsed_length < parser->user_cb_size) {
5095 		enum packet_id pkt_id;
5096 		u16 pkt_size;
5097 		struct gaudi_packet *user_pkt;
5098 
5099 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5100 
5101 		pkt_id = (enum packet_id) (
5102 				(le64_to_cpu(user_pkt->header) &
5103 				PACKET_HEADER_PACKET_ID_MASK) >>
5104 					PACKET_HEADER_PACKET_ID_SHIFT);
5105 
5106 		if (!validate_packet_id(pkt_id)) {
5107 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5108 			rc = -EINVAL;
5109 			break;
5110 		}
5111 
5112 		pkt_size = gaudi_packet_sizes[pkt_id];
5113 		cb_parsed_length += pkt_size;
5114 		if (cb_parsed_length > parser->user_cb_size) {
5115 			dev_err(hdev->dev,
5116 				"packet 0x%x is out of CB boundary\n", pkt_id);
5117 			rc = -EINVAL;
5118 			break;
5119 		}
5120 
5121 		switch (pkt_id) {
5122 		case PACKET_MSG_PROT:
5123 			dev_err(hdev->dev,
5124 				"User not allowed to use MSG_PROT\n");
5125 			rc = -EPERM;
5126 			break;
5127 
5128 		case PACKET_CP_DMA:
5129 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5130 			rc = -EPERM;
5131 			break;
5132 
5133 		case PACKET_STOP:
5134 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5135 			rc = -EPERM;
5136 			break;
5137 
5138 		case PACKET_WREG_BULK:
5139 			dev_err(hdev->dev,
5140 				"User not allowed to use WREG_BULK\n");
5141 			rc = -EPERM;
5142 			break;
5143 
5144 		case PACKET_LOAD_AND_EXE:
5145 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5146 				(struct packet_load_and_exe *) user_pkt);
5147 			break;
5148 
5149 		case PACKET_LIN_DMA:
5150 			parser->contains_dma_pkt = true;
5151 			if (is_mmu)
5152 				parser->patched_cb_size += pkt_size;
5153 			else
5154 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5155 					(struct packet_lin_dma *) user_pkt);
5156 			break;
5157 
5158 		case PACKET_WREG_32:
5159 		case PACKET_MSG_LONG:
5160 		case PACKET_MSG_SHORT:
5161 		case PACKET_REPEAT:
5162 		case PACKET_FENCE:
5163 		case PACKET_NOP:
5164 		case PACKET_ARB_POINT:
5165 			parser->patched_cb_size += pkt_size;
5166 			break;
5167 
5168 		default:
5169 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5170 				pkt_id);
5171 			rc = -EINVAL;
5172 			break;
5173 		}
5174 
5175 		if (rc)
5176 			break;
5177 	}
5178 
5179 	/*
5180 	 * The new CB should have space at the end for two MSG_PROT packets:
5181 	 * 1. Optional NOP padding for cacheline alignment
5182 	 * 2. A packet that will act as a completion packet
5183 	 * 3. A packet that will generate MSI interrupt
5184 	 */
5185 	if (parser->completion)
5186 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5187 			parser->patched_cb_size);
5188 
5189 	return rc;
5190 }
5191 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5192 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5193 				struct hl_cs_parser *parser,
5194 				struct packet_lin_dma *user_dma_pkt,
5195 				struct packet_lin_dma *new_dma_pkt,
5196 				u32 *new_dma_pkt_size)
5197 {
5198 	struct hl_userptr *userptr;
5199 	struct scatterlist *sg, *sg_next_iter;
5200 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5201 	u64 len, len_next;
5202 	dma_addr_t dma_addr, dma_addr_next;
5203 	u64 device_memory_addr, addr;
5204 	enum dma_data_direction dir;
5205 	struct sg_table *sgt;
5206 	bool src_in_host = false;
5207 	bool skip_host_mem_pin = false;
5208 	bool user_memset;
5209 
5210 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5211 
5212 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5213 		src_in_host = true;
5214 
5215 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5216 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5217 
5218 	if (src_in_host) {
5219 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5220 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5221 		dir = DMA_TO_DEVICE;
5222 		if (user_memset)
5223 			skip_host_mem_pin = true;
5224 	} else {
5225 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5226 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5227 		dir = DMA_FROM_DEVICE;
5228 	}
5229 
5230 	if ((!skip_host_mem_pin) &&
5231 		(!hl_userptr_is_pinned(hdev, addr,
5232 					le32_to_cpu(user_dma_pkt->tsize),
5233 					parser->job_userptr_list, &userptr))) {
5234 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5235 				addr, user_dma_pkt->tsize);
5236 		return -EFAULT;
5237 	}
5238 
5239 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5240 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5241 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5242 		return 0;
5243 	}
5244 
5245 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5246 
5247 	sgt = userptr->sgt;
5248 	dma_desc_cnt = 0;
5249 
5250 	for_each_sgtable_dma_sg(sgt, sg, count) {
5251 		len = sg_dma_len(sg);
5252 		dma_addr = sg_dma_address(sg);
5253 
5254 		if (len == 0)
5255 			break;
5256 
5257 		while ((count + 1) < sgt->nents) {
5258 			sg_next_iter = sg_next(sg);
5259 			len_next = sg_dma_len(sg_next_iter);
5260 			dma_addr_next = sg_dma_address(sg_next_iter);
5261 
5262 			if (len_next == 0)
5263 				break;
5264 
5265 			if ((dma_addr + len == dma_addr_next) &&
5266 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5267 				len += len_next;
5268 				count++;
5269 				sg = sg_next_iter;
5270 			} else {
5271 				break;
5272 			}
5273 		}
5274 
5275 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5276 		if (likely(dma_desc_cnt))
5277 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5278 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5279 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5280 		new_dma_pkt->tsize = cpu_to_le32(len);
5281 
5282 		if (dir == DMA_TO_DEVICE) {
5283 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5284 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5285 		} else {
5286 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5287 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5288 		}
5289 
5290 		if (!user_memset)
5291 			device_memory_addr += len;
5292 		dma_desc_cnt++;
5293 		new_dma_pkt++;
5294 	}
5295 
5296 	if (!dma_desc_cnt) {
5297 		dev_err(hdev->dev,
5298 			"Error of 0 SG entries when patching DMA packet\n");
5299 		return -EFAULT;
5300 	}
5301 
5302 	/* Fix the last dma packet - wrcomp must be as user set it */
5303 	new_dma_pkt--;
5304 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5305 
5306 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5307 
5308 	return 0;
5309 }
5310 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5311 static int gaudi_patch_cb(struct hl_device *hdev,
5312 				struct hl_cs_parser *parser)
5313 {
5314 	u32 cb_parsed_length = 0;
5315 	u32 cb_patched_cur_length = 0;
5316 	int rc = 0;
5317 
5318 	/* cb_user_size is more than 0 so loop will always be executed */
5319 	while (cb_parsed_length < parser->user_cb_size) {
5320 		enum packet_id pkt_id;
5321 		u16 pkt_size;
5322 		u32 new_pkt_size = 0;
5323 		struct gaudi_packet *user_pkt, *kernel_pkt;
5324 
5325 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5326 		kernel_pkt = parser->patched_cb->kernel_address +
5327 					cb_patched_cur_length;
5328 
5329 		pkt_id = (enum packet_id) (
5330 				(le64_to_cpu(user_pkt->header) &
5331 				PACKET_HEADER_PACKET_ID_MASK) >>
5332 					PACKET_HEADER_PACKET_ID_SHIFT);
5333 
5334 		if (!validate_packet_id(pkt_id)) {
5335 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5336 			rc = -EINVAL;
5337 			break;
5338 		}
5339 
5340 		pkt_size = gaudi_packet_sizes[pkt_id];
5341 		cb_parsed_length += pkt_size;
5342 		if (cb_parsed_length > parser->user_cb_size) {
5343 			dev_err(hdev->dev,
5344 				"packet 0x%x is out of CB boundary\n", pkt_id);
5345 			rc = -EINVAL;
5346 			break;
5347 		}
5348 
5349 		switch (pkt_id) {
5350 		case PACKET_LIN_DMA:
5351 			rc = gaudi_patch_dma_packet(hdev, parser,
5352 					(struct packet_lin_dma *) user_pkt,
5353 					(struct packet_lin_dma *) kernel_pkt,
5354 					&new_pkt_size);
5355 			cb_patched_cur_length += new_pkt_size;
5356 			break;
5357 
5358 		case PACKET_MSG_PROT:
5359 			dev_err(hdev->dev,
5360 				"User not allowed to use MSG_PROT\n");
5361 			rc = -EPERM;
5362 			break;
5363 
5364 		case PACKET_CP_DMA:
5365 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5366 			rc = -EPERM;
5367 			break;
5368 
5369 		case PACKET_STOP:
5370 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5371 			rc = -EPERM;
5372 			break;
5373 
5374 		case PACKET_WREG_32:
5375 		case PACKET_WREG_BULK:
5376 		case PACKET_MSG_LONG:
5377 		case PACKET_MSG_SHORT:
5378 		case PACKET_REPEAT:
5379 		case PACKET_FENCE:
5380 		case PACKET_NOP:
5381 		case PACKET_ARB_POINT:
5382 		case PACKET_LOAD_AND_EXE:
5383 			memcpy(kernel_pkt, user_pkt, pkt_size);
5384 			cb_patched_cur_length += pkt_size;
5385 			break;
5386 
5387 		default:
5388 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5389 				pkt_id);
5390 			rc = -EINVAL;
5391 			break;
5392 		}
5393 
5394 		if (rc)
5395 			break;
5396 	}
5397 
5398 	return rc;
5399 }
5400 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5401 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5402 		struct hl_cs_parser *parser)
5403 {
5404 	u64 handle;
5405 	u32 patched_cb_size;
5406 	struct hl_cb *user_cb;
5407 	int rc;
5408 
5409 	/*
5410 	 * The new CB should have space at the end for two MSG_PROT packets:
5411 	 * 1. Optional NOP padding for cacheline alignment
5412 	 * 2. A packet that will act as a completion packet
5413 	 * 3. A packet that will generate MSI interrupt
5414 	 */
5415 	if (parser->completion)
5416 		parser->patched_cb_size = parser->user_cb_size +
5417 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5418 	else
5419 		parser->patched_cb_size = parser->user_cb_size;
5420 
5421 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5422 				parser->patched_cb_size, false, false,
5423 				&handle);
5424 
5425 	if (rc) {
5426 		dev_err(hdev->dev,
5427 			"Failed to allocate patched CB for DMA CS %d\n",
5428 			rc);
5429 		return rc;
5430 	}
5431 
5432 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5433 	/* hl_cb_get should never fail */
5434 	if (!parser->patched_cb) {
5435 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5436 		rc = -EFAULT;
5437 		goto out;
5438 	}
5439 
5440 	/*
5441 	 * We are protected from overflow because the check
5442 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5443 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5444 	 *
5445 	 * There is no option to reach here without going through that check because:
5446 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5447 	 *    an external queue.
5448 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5449 	 */
5450 	memcpy(parser->patched_cb->kernel_address,
5451 		parser->user_cb->kernel_address,
5452 		parser->user_cb_size);
5453 
5454 	patched_cb_size = parser->patched_cb_size;
5455 
5456 	/* Validate patched CB instead of user CB */
5457 	user_cb = parser->user_cb;
5458 	parser->user_cb = parser->patched_cb;
5459 	rc = gaudi_validate_cb(hdev, parser, true);
5460 	parser->user_cb = user_cb;
5461 
5462 	if (rc) {
5463 		hl_cb_put(parser->patched_cb);
5464 		goto out;
5465 	}
5466 
5467 	if (patched_cb_size != parser->patched_cb_size) {
5468 		dev_err(hdev->dev, "user CB size mismatch\n");
5469 		hl_cb_put(parser->patched_cb);
5470 		rc = -EINVAL;
5471 		goto out;
5472 	}
5473 
5474 out:
5475 	/*
5476 	 * Always call cb destroy here because we still have 1 reference
5477 	 * to it by calling cb_get earlier. After the job will be completed,
5478 	 * cb_put will release it, but here we want to remove it from the
5479 	 * idr
5480 	 */
5481 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5482 
5483 	return rc;
5484 }
5485 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5486 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5487 		struct hl_cs_parser *parser)
5488 {
5489 	u64 handle;
5490 	int rc;
5491 
5492 	rc = gaudi_validate_cb(hdev, parser, false);
5493 
5494 	if (rc)
5495 		goto free_userptr;
5496 
5497 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5498 				parser->patched_cb_size, false, false,
5499 				&handle);
5500 	if (rc) {
5501 		dev_err(hdev->dev,
5502 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5503 		goto free_userptr;
5504 	}
5505 
5506 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5507 	/* hl_cb_get should never fail here */
5508 	if (!parser->patched_cb) {
5509 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5510 		rc = -EFAULT;
5511 		goto out;
5512 	}
5513 
5514 	rc = gaudi_patch_cb(hdev, parser);
5515 
5516 	if (rc)
5517 		hl_cb_put(parser->patched_cb);
5518 
5519 out:
5520 	/*
5521 	 * Always call cb destroy here because we still have 1 reference
5522 	 * to it by calling cb_get earlier. After the job will be completed,
5523 	 * cb_put will release it, but here we want to remove it from the
5524 	 * idr
5525 	 */
5526 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5527 
5528 free_userptr:
5529 	if (rc)
5530 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5531 	return rc;
5532 }
5533 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5534 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5535 					struct hl_cs_parser *parser)
5536 {
5537 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5538 	struct gaudi_device *gaudi = hdev->asic_specific;
5539 	u32 nic_queue_offset, nic_mask_q_id;
5540 
5541 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5542 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5543 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5544 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5545 
5546 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5547 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5548 			return -EINVAL;
5549 		}
5550 	}
5551 
5552 	/* For internal queue jobs just check if CB address is valid */
5553 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5554 					parser->user_cb_size,
5555 					asic_prop->sram_user_base_address,
5556 					asic_prop->sram_end_address))
5557 		return 0;
5558 
5559 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5560 					parser->user_cb_size,
5561 					asic_prop->dram_user_base_address,
5562 					asic_prop->dram_end_address))
5563 		return 0;
5564 
5565 	/* PMMU and HPMMU addresses are equal, check only one of them */
5566 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5567 					parser->user_cb_size,
5568 					asic_prop->pmmu.start_addr,
5569 					asic_prop->pmmu.end_addr))
5570 		return 0;
5571 
5572 	dev_err(hdev->dev,
5573 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5574 		parser->user_cb, parser->user_cb_size);
5575 
5576 	return -EFAULT;
5577 }
5578 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5579 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5580 {
5581 	struct gaudi_device *gaudi = hdev->asic_specific;
5582 
5583 	if (parser->queue_type == QUEUE_TYPE_INT)
5584 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5585 
5586 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5587 		return gaudi_parse_cb_mmu(hdev, parser);
5588 	else
5589 		return gaudi_parse_cb_no_mmu(hdev, parser);
5590 }
5591 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5592 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5593 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5594 				u32 msi_vec, bool eb)
5595 {
5596 	struct gaudi_device *gaudi = hdev->asic_specific;
5597 	struct packet_msg_prot *cq_pkt;
5598 	struct packet_nop *cq_padding;
5599 	u64 msi_addr;
5600 	u32 tmp;
5601 
5602 	cq_padding = kernel_address + original_len;
5603 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5604 
5605 	while ((void *)cq_padding < (void *)cq_pkt) {
5606 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5607 		cq_padding++;
5608 	}
5609 
5610 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5611 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5612 
5613 	if (eb)
5614 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5615 
5616 	cq_pkt->ctl = cpu_to_le32(tmp);
5617 	cq_pkt->value = cpu_to_le32(cq_val);
5618 	cq_pkt->addr = cpu_to_le64(cq_addr);
5619 
5620 	cq_pkt++;
5621 
5622 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5623 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5624 	cq_pkt->ctl = cpu_to_le32(tmp);
5625 	cq_pkt->value = cpu_to_le32(1);
5626 
5627 	if (gaudi->multi_msi_mode)
5628 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5629 	else
5630 		msi_addr = mmPCIE_CORE_MSI_REQ;
5631 
5632 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5633 }
5634 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5635 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5636 {
5637 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5638 }
5639 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5640 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5641 					u32 size, u64 val)
5642 {
5643 	struct packet_lin_dma *lin_dma_pkt;
5644 	struct hl_cs_job *job;
5645 	u32 cb_size, ctl, err_cause;
5646 	struct hl_cb *cb;
5647 	int rc;
5648 
5649 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5650 	if (!cb)
5651 		return -EFAULT;
5652 
5653 	lin_dma_pkt = cb->kernel_address;
5654 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5655 	cb_size = sizeof(*lin_dma_pkt);
5656 
5657 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5658 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5659 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5660 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5661 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5662 
5663 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5664 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5665 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5666 	lin_dma_pkt->tsize = cpu_to_le32(size);
5667 
5668 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5669 	if (!job) {
5670 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5671 		rc = -ENOMEM;
5672 		goto release_cb;
5673 	}
5674 
5675 	/* Verify DMA is OK */
5676 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5677 	if (err_cause && !hdev->init_done) {
5678 		dev_dbg(hdev->dev,
5679 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5680 			err_cause);
5681 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5682 	}
5683 
5684 	job->id = 0;
5685 	job->user_cb = cb;
5686 	atomic_inc(&job->user_cb->cs_cnt);
5687 	job->user_cb_size = cb_size;
5688 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5689 	job->patched_cb = job->user_cb;
5690 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5691 
5692 	hl_debugfs_add_job(hdev, job);
5693 
5694 	rc = gaudi_send_job_on_qman0(hdev, job);
5695 	hl_debugfs_remove_job(hdev, job);
5696 	kfree(job);
5697 	atomic_dec(&cb->cs_cnt);
5698 
5699 	/* Verify DMA is OK */
5700 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5701 	if (err_cause) {
5702 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5703 		rc = -EIO;
5704 		if (!hdev->init_done) {
5705 			dev_dbg(hdev->dev,
5706 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5707 				err_cause);
5708 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5709 		}
5710 	}
5711 
5712 release_cb:
5713 	hl_cb_put(cb);
5714 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5715 
5716 	return rc;
5717 }
5718 
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5719 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5720 					u32 num_regs, u32 val)
5721 {
5722 	struct packet_msg_long *pkt;
5723 	struct hl_cs_job *job;
5724 	u32 cb_size, ctl;
5725 	struct hl_cb *cb;
5726 	int i, rc;
5727 
5728 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5729 
5730 	if (cb_size > SZ_2M) {
5731 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5732 		return -ENOMEM;
5733 	}
5734 
5735 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5736 	if (!cb)
5737 		return -EFAULT;
5738 
5739 	pkt = cb->kernel_address;
5740 
5741 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5742 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5743 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5744 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5745 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5746 
5747 	for (i = 0; i < num_regs ; i++, pkt++) {
5748 		pkt->ctl = cpu_to_le32(ctl);
5749 		pkt->value = cpu_to_le32(val);
5750 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5751 	}
5752 
5753 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5754 	if (!job) {
5755 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5756 		rc = -ENOMEM;
5757 		goto release_cb;
5758 	}
5759 
5760 	job->id = 0;
5761 	job->user_cb = cb;
5762 	atomic_inc(&job->user_cb->cs_cnt);
5763 	job->user_cb_size = cb_size;
5764 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5765 	job->patched_cb = job->user_cb;
5766 	job->job_cb_size = cb_size;
5767 
5768 	hl_debugfs_add_job(hdev, job);
5769 
5770 	rc = gaudi_send_job_on_qman0(hdev, job);
5771 	hl_debugfs_remove_job(hdev, job);
5772 	kfree(job);
5773 	atomic_dec(&cb->cs_cnt);
5774 
5775 release_cb:
5776 	hl_cb_put(cb);
5777 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5778 
5779 	return rc;
5780 }
5781 
gaudi_restore_sm_registers(struct hl_device * hdev)5782 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5783 {
5784 	u64 base_addr;
5785 	u32 num_regs;
5786 	int rc;
5787 
5788 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5789 	num_regs = NUM_OF_SOB_IN_BLOCK;
5790 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5791 	if (rc) {
5792 		dev_err(hdev->dev, "failed resetting SM registers");
5793 		return -ENOMEM;
5794 	}
5795 
5796 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5797 	num_regs = NUM_OF_SOB_IN_BLOCK;
5798 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5799 	if (rc) {
5800 		dev_err(hdev->dev, "failed resetting SM registers");
5801 		return -ENOMEM;
5802 	}
5803 
5804 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5805 	num_regs = NUM_OF_SOB_IN_BLOCK;
5806 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5807 	if (rc) {
5808 		dev_err(hdev->dev, "failed resetting SM registers");
5809 		return -ENOMEM;
5810 	}
5811 
5812 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5813 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5814 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5815 	if (rc) {
5816 		dev_err(hdev->dev, "failed resetting SM registers");
5817 		return -ENOMEM;
5818 	}
5819 
5820 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5821 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5822 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5823 	if (rc) {
5824 		dev_err(hdev->dev, "failed resetting SM registers");
5825 		return -ENOMEM;
5826 	}
5827 
5828 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5829 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5830 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5831 	if (rc) {
5832 		dev_err(hdev->dev, "failed resetting SM registers");
5833 		return -ENOMEM;
5834 	}
5835 
5836 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5837 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5838 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5839 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5840 	if (rc) {
5841 		dev_err(hdev->dev, "failed resetting SM registers");
5842 		return -ENOMEM;
5843 	}
5844 
5845 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5846 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5847 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5848 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5849 	if (rc) {
5850 		dev_err(hdev->dev, "failed resetting SM registers");
5851 		return -ENOMEM;
5852 	}
5853 
5854 	return 0;
5855 }
5856 
gaudi_restore_dma_registers(struct hl_device * hdev)5857 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5858 {
5859 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5860 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5861 	int i;
5862 
5863 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5864 		u64 sob_addr = CFG_BASE +
5865 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5866 				(i * sob_delta);
5867 		u32 dma_offset = i * DMA_CORE_OFFSET;
5868 
5869 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5870 				lower_32_bits(sob_addr));
5871 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5872 				upper_32_bits(sob_addr));
5873 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5874 
5875 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5876 		 * modified by the user for SRAM reduction
5877 		 */
5878 		if (i > 1)
5879 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5880 								0x00000001);
5881 	}
5882 }
5883 
gaudi_restore_qm_registers(struct hl_device * hdev)5884 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5885 {
5886 	u32 qman_offset;
5887 	int i;
5888 
5889 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5890 		qman_offset = i * DMA_QMAN_OFFSET;
5891 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5892 	}
5893 
5894 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5895 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5896 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5897 	}
5898 
5899 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5900 		qman_offset = i * TPC_QMAN_OFFSET;
5901 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5902 	}
5903 
5904 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5905 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5906 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5907 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5908 	}
5909 }
5910 
gaudi_restore_user_registers(struct hl_device * hdev)5911 static int gaudi_restore_user_registers(struct hl_device *hdev)
5912 {
5913 	int rc;
5914 
5915 	rc = gaudi_restore_sm_registers(hdev);
5916 	if (rc)
5917 		return rc;
5918 
5919 	gaudi_restore_dma_registers(hdev);
5920 	gaudi_restore_qm_registers(hdev);
5921 
5922 	return 0;
5923 }
5924 
gaudi_context_switch(struct hl_device * hdev,u32 asid)5925 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5926 {
5927 	return 0;
5928 }
5929 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5930 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5931 {
5932 	u32 size = hdev->asic_prop.mmu_pgt_size +
5933 			hdev->asic_prop.mmu_cache_mng_size;
5934 	struct gaudi_device *gaudi = hdev->asic_specific;
5935 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5936 
5937 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5938 		return 0;
5939 
5940 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5941 }
5942 
gaudi_restore_phase_topology(struct hl_device * hdev)5943 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5944 {
5945 
5946 }
5947 
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5948 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5949 					u32 size_to_dma, dma_addr_t dma_addr)
5950 {
5951 	u32 err_cause, val;
5952 	u64 dma_offset;
5953 	int rc;
5954 
5955 	dma_offset = dma_id * DMA_CORE_OFFSET;
5956 
5957 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5958 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5959 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5960 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5961 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5962 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5963 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5964 
5965 	rc = hl_poll_timeout(
5966 		hdev,
5967 		mmDMA0_CORE_STS0 + dma_offset,
5968 		val,
5969 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5970 		0,
5971 		1000000);
5972 
5973 	if (rc) {
5974 		dev_err(hdev->dev,
5975 			"DMA %d timed-out during reading of 0x%llx\n",
5976 			dma_id, addr);
5977 		return -EIO;
5978 	}
5979 
5980 	/* Verify DMA is OK */
5981 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982 	if (err_cause) {
5983 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5984 		dev_dbg(hdev->dev,
5985 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5986 			err_cause);
5987 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5988 
5989 		return -EIO;
5990 	}
5991 
5992 	return 0;
5993 }
5994 
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5995 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5996 				void *blob_addr)
5997 {
5998 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5999 	u32 qm_glbl_sts0, qm_cgm_sts;
6000 	u64 dma_offset, qm_offset;
6001 	dma_addr_t dma_addr;
6002 	void *kernel_addr;
6003 	bool is_eng_idle;
6004 	int rc = 0, dma_id;
6005 
6006 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6007 
6008 	if (!kernel_addr)
6009 		return -ENOMEM;
6010 
6011 	hdev->asic_funcs->hw_queues_lock(hdev);
6012 
6013 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6014 	dma_offset = dma_id * DMA_CORE_OFFSET;
6015 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6016 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6017 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6018 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6019 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6020 		      IS_DMA_IDLE(dma_core_sts0);
6021 
6022 	if (!is_eng_idle) {
6023 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6024 		dma_offset = dma_id * DMA_CORE_OFFSET;
6025 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6026 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6027 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6028 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6029 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6030 			      IS_DMA_IDLE(dma_core_sts0);
6031 
6032 		if (!is_eng_idle) {
6033 			dev_err_ratelimited(hdev->dev,
6034 				"Can't read via DMA because it is BUSY\n");
6035 			rc = -EAGAIN;
6036 			goto out;
6037 		}
6038 	}
6039 
6040 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6041 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6042 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6043 
6044 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6045 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6046 	 * ASID
6047 	 */
6048 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6049 
6050 	/* Verify DMA is OK */
6051 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6052 	if (err_cause) {
6053 		dev_dbg(hdev->dev,
6054 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6055 			err_cause);
6056 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6057 	}
6058 
6059 	pos = 0;
6060 	size_left = size;
6061 	size_to_dma = SZ_2M;
6062 
6063 	while (size_left > 0) {
6064 
6065 		if (size_left < SZ_2M)
6066 			size_to_dma = size_left;
6067 
6068 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6069 						dma_addr);
6070 		if (rc)
6071 			break;
6072 
6073 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6074 
6075 		if (size_left <= SZ_2M)
6076 			break;
6077 
6078 		pos += SZ_2M;
6079 		addr += SZ_2M;
6080 		size_left -= SZ_2M;
6081 	}
6082 
6083 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6084 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6085 	 * ASID
6086 	 */
6087 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6088 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6089 
6090 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6091 
6092 out:
6093 	hdev->asic_funcs->hw_queues_unlock(hdev);
6094 
6095 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6096 
6097 	return rc;
6098 }
6099 
gaudi_read_pte(struct hl_device * hdev,u64 addr)6100 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6101 {
6102 	struct gaudi_device *gaudi = hdev->asic_specific;
6103 
6104 	if (hdev->reset_info.hard_reset_pending)
6105 		return U64_MAX;
6106 
6107 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6108 			(addr - gaudi->hbm_bar_cur_addr));
6109 }
6110 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6111 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6112 {
6113 	struct gaudi_device *gaudi = hdev->asic_specific;
6114 
6115 	if (hdev->reset_info.hard_reset_pending)
6116 		return;
6117 
6118 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6119 			(addr - gaudi->hbm_bar_cur_addr));
6120 }
6121 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6122 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6123 {
6124 	/* mask to zero the MMBP and ASID bits */
6125 	WREG32_AND(reg, ~0x7FF);
6126 	WREG32_OR(reg, asid);
6127 }
6128 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6129 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6130 {
6131 	struct gaudi_device *gaudi = hdev->asic_specific;
6132 
6133 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6134 		return;
6135 
6136 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6137 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6138 		return;
6139 	}
6140 
6141 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6144 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6146 
6147 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152 
6153 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158 
6159 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6161 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164 
6165 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6168 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6170 
6171 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176 
6177 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182 
6183 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6185 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6188 
6189 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6197 
6198 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6205 
6206 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6213 
6214 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6215 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6221 
6222 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6223 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6224 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6225 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6226 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6227 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6228 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6229 
6230 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6231 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6232 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6233 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6234 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6235 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6236 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6237 
6238 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6239 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6240 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6241 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6242 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6243 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6244 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6245 
6246 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6247 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6248 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6249 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6250 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6251 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6252 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6253 
6254 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6255 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6256 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6257 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6258 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6259 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6260 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6261 
6262 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6263 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6264 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6265 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6266 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6267 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6268 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6269 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6270 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6271 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6272 
6273 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6274 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6275 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6276 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6277 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6278 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6279 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6280 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6281 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6282 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6283 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6284 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6285 
6286 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6287 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6288 				asid);
6289 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6290 				asid);
6291 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6292 				asid);
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6296 				asid);
6297 	}
6298 
6299 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6301 				asid);
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6305 				asid);
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6309 				asid);
6310 	}
6311 
6312 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6314 				asid);
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6318 				asid);
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6322 				asid);
6323 	}
6324 
6325 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6327 				asid);
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6331 				asid);
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6335 				asid);
6336 	}
6337 
6338 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6339 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6340 				asid);
6341 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6342 				asid);
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6344 				asid);
6345 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6346 				asid);
6347 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6348 				asid);
6349 	}
6350 
6351 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6352 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6353 				asid);
6354 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6355 				asid);
6356 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6357 				asid);
6358 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6359 				asid);
6360 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6361 				asid);
6362 	}
6363 
6364 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6365 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6366 				asid);
6367 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6368 				asid);
6369 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6370 				asid);
6371 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6372 				asid);
6373 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6374 				asid);
6375 	}
6376 
6377 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6378 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6379 				asid);
6380 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6381 				asid);
6382 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6383 				asid);
6384 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6385 				asid);
6386 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6387 				asid);
6388 	}
6389 
6390 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6391 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6392 				asid);
6393 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6394 				asid);
6395 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6396 				asid);
6397 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6398 				asid);
6399 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6400 				asid);
6401 	}
6402 
6403 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6404 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6405 				asid);
6406 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6407 				asid);
6408 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6409 				asid);
6410 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6411 				asid);
6412 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6413 				asid);
6414 	}
6415 
6416 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6417 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6418 }
6419 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6420 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6421 		struct hl_cs_job *job)
6422 {
6423 	struct packet_msg_prot *fence_pkt;
6424 	u32 *fence_ptr;
6425 	dma_addr_t fence_dma_addr;
6426 	struct hl_cb *cb;
6427 	u32 tmp, timeout, dma_offset;
6428 	int rc;
6429 
6430 	if (hdev->pldm)
6431 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6432 	else
6433 		timeout = HL_DEVICE_TIMEOUT_USEC;
6434 
6435 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6436 		dev_err_ratelimited(hdev->dev,
6437 			"Can't send driver job on QMAN0 because the device is not idle\n");
6438 		return -EBUSY;
6439 	}
6440 
6441 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6442 	if (!fence_ptr) {
6443 		dev_err(hdev->dev,
6444 			"Failed to allocate fence memory for QMAN0\n");
6445 		return -ENOMEM;
6446 	}
6447 
6448 	cb = job->patched_cb;
6449 
6450 	fence_pkt = cb->kernel_address +
6451 			job->job_cb_size - sizeof(struct packet_msg_prot);
6452 
6453 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6454 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6455 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6456 
6457 	fence_pkt->ctl = cpu_to_le32(tmp);
6458 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6459 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6460 
6461 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6462 
6463 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6464 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465 
6466 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6467 					job->job_cb_size, cb->bus_address);
6468 	if (rc) {
6469 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6470 		goto free_fence_ptr;
6471 	}
6472 
6473 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6474 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6475 				timeout, true);
6476 
6477 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6478 
6479 	if (rc == -ETIMEDOUT) {
6480 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6481 		goto free_fence_ptr;
6482 	}
6483 
6484 free_fence_ptr:
6485 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6486 
6487 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6488 	return rc;
6489 }
6490 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6491 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6492 {
6493 	if (event_type >= GAUDI_EVENT_SIZE)
6494 		goto event_not_supported;
6495 
6496 	if (!gaudi_irq_map_table[event_type].valid)
6497 		goto event_not_supported;
6498 
6499 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6500 
6501 	return;
6502 
6503 event_not_supported:
6504 	snprintf(desc, size, "N/A");
6505 }
6506 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,s32 * engine_id_1,s32 * engine_id_2)6507 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6508 							bool is_write, s32 *engine_id_1,
6509 							s32 *engine_id_2)
6510 {
6511 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6512 
6513 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6514 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6515 
6516 	switch (x_y) {
6517 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6518 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6519 		dma_id[0] = 0;
6520 		dma_id[1] = 2;
6521 		break;
6522 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6523 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6524 		dma_id[0] = 1;
6525 		dma_id[1] = 3;
6526 		break;
6527 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6528 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6529 		dma_id[0] = 4;
6530 		dma_id[1] = 6;
6531 		break;
6532 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6533 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6534 		dma_id[0] = 5;
6535 		dma_id[1] = 7;
6536 		break;
6537 	default:
6538 		goto unknown_initiator;
6539 	}
6540 
6541 	for (i = 0 ; i < 2 ; i++) {
6542 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6543 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6544 	}
6545 
6546 	switch (x_y) {
6547 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6548 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6549 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6550 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6551 			return "DMA0";
6552 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6553 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6554 			return "DMA2";
6555 		} else {
6556 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6557 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6558 			return "DMA0 or DMA2";
6559 		}
6560 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6561 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6562 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6563 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6564 			return "DMA1";
6565 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6566 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6567 			return "DMA3";
6568 		} else {
6569 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6570 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6571 			return "DMA1 or DMA3";
6572 		}
6573 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6574 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6575 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6576 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6577 			return "DMA4";
6578 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6579 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6580 			return "DMA6";
6581 		} else {
6582 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6583 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6584 			return "DMA4 or DMA6";
6585 		}
6586 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6587 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6588 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6589 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6590 			return "DMA5";
6591 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6592 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6593 			return "DMA7";
6594 		} else {
6595 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6596 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6597 			return "DMA5 or DMA7";
6598 		}
6599 	}
6600 
6601 unknown_initiator:
6602 	return "unknown initiator";
6603 }
6604 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u32 * engine_id_1,u32 * engine_id_2)6605 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6606 							u32 *engine_id_1, u32 *engine_id_2)
6607 {
6608 	u32 val, x_y, axi_id;
6609 
6610 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6611 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6612 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6613 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6614 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6615 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6616 
6617 	switch (x_y) {
6618 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6619 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6620 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6621 			return "TPC0";
6622 		}
6623 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6624 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6625 			return "NIC0";
6626 		}
6627 		break;
6628 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6629 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6630 		return "TPC1";
6631 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6632 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6633 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6634 		return "MME0";
6635 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6636 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6637 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6638 		return "MME1";
6639 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6640 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6641 		return "TPC2";
6642 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6643 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6644 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6645 			return "TPC3";
6646 		}
6647 		/* PCI, CPU or PSOC does not have engine id*/
6648 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6649 			return "PCI";
6650 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6651 			return "CPU";
6652 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6653 			return "PSOC";
6654 		break;
6655 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6656 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6657 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6658 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6659 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6660 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6661 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6662 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6663 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6664 				engine_id_1, engine_id_2);
6665 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6666 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6667 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6668 			return "TPC4";
6669 		}
6670 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6671 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6672 			return "NIC1";
6673 		}
6674 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6675 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6676 			return "NIC2";
6677 		}
6678 		break;
6679 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6680 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6681 		return "TPC5";
6682 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6683 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6684 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6685 		return "MME2";
6686 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6687 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6688 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6689 		return "MME3";
6690 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6691 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6692 		return "TPC6";
6693 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6694 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6695 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6696 			return "TPC7";
6697 		}
6698 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6699 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6700 			return "NIC4";
6701 		}
6702 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6703 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6704 			return "NIC5";
6705 		}
6706 		break;
6707 	default:
6708 		break;
6709 	}
6710 
6711 	dev_err(hdev->dev,
6712 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6713 		val,
6714 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6715 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6716 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6717 			RAZWI_INITIATOR_AXI_ID_MASK);
6718 
6719 	return "unknown initiator";
6720 }
6721 
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u32 * engine_id_1,u32 * engine_id_2)6722 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
6723 						u32 *engine_id_2)
6724 {
6725 
6726 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6727 		dev_err_ratelimited(hdev->dev,
6728 			"RAZWI event caused by illegal write of %s\n",
6729 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6730 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6731 	}
6732 
6733 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6734 		dev_err_ratelimited(hdev->dev,
6735 			"RAZWI event caused by illegal read of %s\n",
6736 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6737 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6738 	}
6739 }
6740 
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u8 * type)6741 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
6742 {
6743 	struct gaudi_device *gaudi = hdev->asic_specific;
6744 	u32 val;
6745 
6746 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6747 		return;
6748 
6749 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6750 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6751 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6752 		*addr <<= 32;
6753 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6754 
6755 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6756 		*type = HL_RAZWI_PAGE_FAULT;
6757 
6758 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6759 	}
6760 
6761 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6762 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6763 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6764 		*addr <<= 32;
6765 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6766 
6767 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6768 		*type = HL_RAZWI_MMU_ACCESS_ERROR;
6769 
6770 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6771 	}
6772 }
6773 
6774 /*
6775  *  +-------------------+------------------------------------------------------+
6776  *  | Configuration Reg |                     Description                      |
6777  *  |      Address      |                                                      |
6778  *  +-------------------+------------------------------------------------------+
6779  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6780  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6781  *  |                   |0xF34 memory wrappers 63:32                           |
6782  *  |                   |0xF38 memory wrappers 95:64                           |
6783  *  |                   |0xF3C memory wrappers 127:96                          |
6784  *  +-------------------+------------------------------------------------------+
6785  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6786  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6787  *  |                   |0xF44 memory wrappers 63:32                           |
6788  *  |                   |0xF48 memory wrappers 95:64                           |
6789  *  |                   |0xF4C memory wrappers 127:96                          |
6790  *  +-------------------+------------------------------------------------------+
6791  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6792 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6793 		struct ecc_info_extract_params *params, u64 *ecc_address,
6794 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6795 {
6796 	u32 i, num_mem_regs, reg, err_bit;
6797 	u64 err_addr, err_word = 0;
6798 
6799 	num_mem_regs = params->num_memories / 32 +
6800 			((params->num_memories % 32) ? 1 : 0);
6801 
6802 	if (params->block_address >= CFG_BASE)
6803 		params->block_address -= CFG_BASE;
6804 
6805 	if (params->derr)
6806 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6807 	else
6808 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6809 
6810 	/* Set invalid wrapper index */
6811 	*memory_wrapper_idx = 0xFF;
6812 
6813 	/* Iterate through memory wrappers, a single bit must be set */
6814 	for (i = 0 ; i < num_mem_regs ; i++) {
6815 		err_addr += i * 4;
6816 		err_word = RREG32(err_addr);
6817 		if (err_word) {
6818 			err_bit = __ffs(err_word);
6819 			*memory_wrapper_idx = err_bit + (32 * i);
6820 			break;
6821 		}
6822 	}
6823 
6824 	if (*memory_wrapper_idx == 0xFF) {
6825 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6826 		return -EINVAL;
6827 	}
6828 
6829 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6830 			*memory_wrapper_idx);
6831 
6832 	*ecc_address =
6833 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6834 	*ecc_syndrom =
6835 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6836 
6837 	/* Clear error indication */
6838 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6839 	if (params->derr)
6840 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6841 	else
6842 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6843 
6844 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6845 
6846 	return 0;
6847 }
6848 
6849 /*
6850  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6851  *
6852  * @idx: the current pi/ci value
6853  * @q_len: the queue length (power of 2)
6854  *
6855  * @return the cyclically decremented index
6856  */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6857 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6858 {
6859 	u32 mask = q_len - 1;
6860 
6861 	/*
6862 	 * modular decrement is equivalent to adding (queue_size -1)
6863 	 * later we take LSBs to make sure the value is in the
6864 	 * range [0, queue_len - 1]
6865 	 */
6866 	return (idx + q_len - 1) & mask;
6867 }
6868 
6869 /**
6870  * gaudi_handle_sw_config_stream_data - print SW config stream data
6871  *
6872  * @hdev: pointer to the habanalabs device structure
6873  * @stream: the QMAN's stream
6874  * @qman_base: base address of QMAN registers block
6875  * @event_mask: mask of the last events occurred
6876  */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6877 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6878 						u64 qman_base, u64 event_mask)
6879 {
6880 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6881 	u32 cq_ptr_lo_off, size;
6882 
6883 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6884 
6885 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6886 						stream * cq_ptr_lo_off;
6887 	cq_ptr_hi = cq_ptr_lo +
6888 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6889 	cq_tsize = cq_ptr_lo +
6890 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6891 
6892 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6893 	size = RREG32(cq_tsize);
6894 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6895 							stream, cq_ptr, size);
6896 
6897 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6898 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6899 		hdev->captured_err_info.undef_opcode.cq_size = size;
6900 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6901 	}
6902 }
6903 
6904 /**
6905  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6906  *
6907  * @hdev: pointer to the habanalabs device structure
6908  * @qid_base: first QID of the QMAN (out of 4 streams)
6909  * @stream: the QMAN's stream
6910  * @qman_base: base address of QMAN registers block
6911  * @event_mask: mask of the last events occurred
6912  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6913  */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6914 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6915 						u32 stream, u64 qman_base,
6916 						u64 event_mask,
6917 						bool pr_sw_conf)
6918 {
6919 	u32 ci, qm_ci_stream_off, queue_len;
6920 	struct hl_hw_queue *q;
6921 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6922 	int i;
6923 
6924 	q = &hdev->kernel_queues[qid_base + stream];
6925 
6926 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6927 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6928 						stream * qm_ci_stream_off;
6929 
6930 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6931 					q->int_queue_len : HL_QUEUE_LENGTH;
6932 
6933 	hdev->asic_funcs->hw_queues_lock(hdev);
6934 
6935 	if (pr_sw_conf)
6936 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6937 
6938 	ci = RREG32(pq_ci);
6939 
6940 	/* we should start printing form ci -1 */
6941 	ci = gaudi_queue_idx_dec(ci, queue_len);
6942 	memset(addr, 0, sizeof(addr));
6943 
6944 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6945 		struct hl_bd *bd;
6946 		u32 len;
6947 
6948 		bd = q->kernel_address;
6949 		bd += ci;
6950 
6951 		len = le32_to_cpu(bd->len);
6952 		/* len 0 means uninitialized entry- break */
6953 		if (!len)
6954 			break;
6955 
6956 		addr[i] = le64_to_cpu(bd->ptr);
6957 
6958 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6959 							stream, ci, addr[i], len);
6960 
6961 		/* get previous ci, wrap if needed */
6962 		ci = gaudi_queue_idx_dec(ci, queue_len);
6963 	}
6964 
6965 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6966 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6967 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6968 
6969 		if (arr_idx == 0) {
6970 			undef_opcode->timestamp = ktime_get();
6971 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6972 		}
6973 
6974 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6975 		undef_opcode->cb_addr_streams_len++;
6976 	}
6977 
6978 	hdev->asic_funcs->hw_queues_unlock(hdev);
6979 }
6980 
6981 /**
6982  * handle_qman_data_on_err - extract QMAN data on error
6983  *
6984  * @hdev: pointer to the habanalabs device structure
6985  * @qid_base: first QID of the QMAN (out of 4 streams)
6986  * @stream: the QMAN's stream
6987  * @qman_base: base address of QMAN registers block
6988  * @event_mask: mask of the last events occurred
6989  *
6990  * This function attempt to exatract as much data as possible on QMAN error.
6991  * On upper CP print the SW config stream data and last 8 PQEs.
6992  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6993  */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6994 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6995 				   u32 stream, u64 qman_base, u64 event_mask)
6996 {
6997 	u32 i;
6998 
6999 	if (stream != QMAN_STREAMS) {
7000 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7001 			qman_base, event_mask, true);
7002 		return;
7003 	}
7004 
7005 	/* handle Lower-CP */
7006 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7007 
7008 	for (i = 0; i < QMAN_STREAMS; i++)
7009 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7010 			qman_base, event_mask, false);
7011 }
7012 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)7013 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7014 					  const char *qm_name,
7015 					  u64 qman_base,
7016 					  u32 qid_base,
7017 					  u64 *event_mask)
7018 {
7019 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7020 	u64 glbl_sts_addr, arb_err_addr;
7021 	char reg_desc[32];
7022 
7023 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7024 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7025 
7026 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7027 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7028 		glbl_sts_clr_val = 0;
7029 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7030 
7031 		if (!glbl_sts_val)
7032 			continue;
7033 
7034 		if (i == QMAN_STREAMS)
7035 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7036 		else
7037 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7038 
7039 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7040 			if (glbl_sts_val & BIT(j)) {
7041 				dev_err_ratelimited(hdev->dev,
7042 						"%s %s. err cause: %s\n",
7043 						qm_name, reg_desc,
7044 						gaudi_qman_error_cause[j]);
7045 				glbl_sts_clr_val |= BIT(j);
7046 			}
7047 		}
7048 		/* check for undefined opcode */
7049 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7050 				hdev->captured_err_info.undef_opcode.write_enable) {
7051 			memset(&hdev->captured_err_info.undef_opcode, 0,
7052 						sizeof(hdev->captured_err_info.undef_opcode));
7053 
7054 			hdev->captured_err_info.undef_opcode.write_enable = false;
7055 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7056 		}
7057 
7058 		/* Write 1 clear errors */
7059 		if (!hdev->stop_on_err)
7060 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7061 		else
7062 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7063 	}
7064 
7065 	arb_err_val = RREG32(arb_err_addr);
7066 
7067 	if (!arb_err_val)
7068 		return;
7069 
7070 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7071 		if (arb_err_val & BIT(j)) {
7072 			dev_err_ratelimited(hdev->dev,
7073 					"%s ARB_ERR. err cause: %s\n",
7074 					qm_name,
7075 					gaudi_qman_arb_error_cause[j]);
7076 		}
7077 	}
7078 }
7079 
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7080 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7081 		struct hl_eq_sm_sei_data *sei_data)
7082 {
7083 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7084 
7085 	/* Flip the bits as the enum is ordered in the opposite way */
7086 	index = (index ^ 0x3) & 0x3;
7087 
7088 	switch (sei_data->sei_cause) {
7089 	case SM_SEI_SO_OVERFLOW:
7090 		dev_err_ratelimited(hdev->dev,
7091 			"%s SEI Error: SOB Group %u overflow/underflow",
7092 			gaudi_sync_manager_names[index],
7093 			le32_to_cpu(sei_data->sei_log));
7094 		break;
7095 	case SM_SEI_LBW_4B_UNALIGNED:
7096 		dev_err_ratelimited(hdev->dev,
7097 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7098 			gaudi_sync_manager_names[index],
7099 			le32_to_cpu(sei_data->sei_log));
7100 		break;
7101 	case SM_SEI_AXI_RESPONSE_ERR:
7102 		dev_err_ratelimited(hdev->dev,
7103 			"%s SEI Error: AXI ID %u response error",
7104 			gaudi_sync_manager_names[index],
7105 			le32_to_cpu(sei_data->sei_log));
7106 		break;
7107 	default:
7108 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7109 				le32_to_cpu(sei_data->sei_log));
7110 		break;
7111 	}
7112 }
7113 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7114 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7115 		struct hl_eq_ecc_data *ecc_data)
7116 {
7117 	struct ecc_info_extract_params params;
7118 	u64 ecc_address = 0, ecc_syndrom = 0;
7119 	u8 index, memory_wrapper_idx = 0;
7120 	bool extract_info_from_fw;
7121 	int rc;
7122 
7123 	if (hdev->asic_prop.fw_security_enabled) {
7124 		extract_info_from_fw = true;
7125 		goto extract_ecc_info;
7126 	}
7127 
7128 	switch (event_type) {
7129 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7130 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7131 		extract_info_from_fw = true;
7132 		break;
7133 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7134 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7135 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7136 		params.num_memories = 90;
7137 		params.derr = false;
7138 		extract_info_from_fw = false;
7139 		break;
7140 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7141 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7142 		params.block_address =
7143 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7144 		params.num_memories = 90;
7145 		params.derr = true;
7146 		extract_info_from_fw = false;
7147 		break;
7148 	case GAUDI_EVENT_MME0_ACC_SERR:
7149 	case GAUDI_EVENT_MME1_ACC_SERR:
7150 	case GAUDI_EVENT_MME2_ACC_SERR:
7151 	case GAUDI_EVENT_MME3_ACC_SERR:
7152 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7153 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7154 		params.num_memories = 128;
7155 		params.derr = false;
7156 		extract_info_from_fw = false;
7157 		break;
7158 	case GAUDI_EVENT_MME0_ACC_DERR:
7159 	case GAUDI_EVENT_MME1_ACC_DERR:
7160 	case GAUDI_EVENT_MME2_ACC_DERR:
7161 	case GAUDI_EVENT_MME3_ACC_DERR:
7162 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7163 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7164 		params.num_memories = 128;
7165 		params.derr = true;
7166 		extract_info_from_fw = false;
7167 		break;
7168 	case GAUDI_EVENT_MME0_SBAB_SERR:
7169 	case GAUDI_EVENT_MME1_SBAB_SERR:
7170 	case GAUDI_EVENT_MME2_SBAB_SERR:
7171 	case GAUDI_EVENT_MME3_SBAB_SERR:
7172 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7173 		params.block_address =
7174 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7175 		params.num_memories = 33;
7176 		params.derr = false;
7177 		extract_info_from_fw = false;
7178 		break;
7179 	case GAUDI_EVENT_MME0_SBAB_DERR:
7180 	case GAUDI_EVENT_MME1_SBAB_DERR:
7181 	case GAUDI_EVENT_MME2_SBAB_DERR:
7182 	case GAUDI_EVENT_MME3_SBAB_DERR:
7183 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7184 		params.block_address =
7185 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7186 		params.num_memories = 33;
7187 		params.derr = true;
7188 		extract_info_from_fw = false;
7189 		break;
7190 	default:
7191 		return;
7192 	}
7193 
7194 extract_ecc_info:
7195 	if (extract_info_from_fw) {
7196 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7197 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7198 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7199 	} else {
7200 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7201 				&ecc_syndrom, &memory_wrapper_idx);
7202 		if (rc)
7203 			return;
7204 	}
7205 
7206 	dev_err(hdev->dev,
7207 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7208 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7209 }
7210 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7211 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7212 {
7213 	u64 qman_base;
7214 	char desc[32];
7215 	u32 qid_base;
7216 	u8 index;
7217 
7218 	switch (event_type) {
7219 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7220 		index = event_type - GAUDI_EVENT_TPC0_QM;
7221 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7222 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7223 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7224 		break;
7225 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7226 		if (event_type == GAUDI_EVENT_MME0_QM) {
7227 			index = 0;
7228 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7229 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7230 			index = 2;
7231 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7232 		}
7233 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7234 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7235 		break;
7236 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7237 		index = event_type - GAUDI_EVENT_DMA0_QM;
7238 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7239 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7240 		if (index > 1)
7241 			qid_base++;
7242 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7243 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7244 		break;
7245 	case GAUDI_EVENT_NIC0_QM0:
7246 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7247 		qman_base = mmNIC0_QM0_BASE;
7248 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7249 		break;
7250 	case GAUDI_EVENT_NIC0_QM1:
7251 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7252 		qman_base = mmNIC0_QM1_BASE;
7253 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7254 		break;
7255 	case GAUDI_EVENT_NIC1_QM0:
7256 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7257 		qman_base = mmNIC1_QM0_BASE;
7258 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7259 		break;
7260 	case GAUDI_EVENT_NIC1_QM1:
7261 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7262 		qman_base = mmNIC1_QM1_BASE;
7263 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7264 		break;
7265 	case GAUDI_EVENT_NIC2_QM0:
7266 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7267 		qman_base = mmNIC2_QM0_BASE;
7268 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7269 		break;
7270 	case GAUDI_EVENT_NIC2_QM1:
7271 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7272 		qman_base = mmNIC2_QM1_BASE;
7273 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7274 		break;
7275 	case GAUDI_EVENT_NIC3_QM0:
7276 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7277 		qman_base = mmNIC3_QM0_BASE;
7278 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7279 		break;
7280 	case GAUDI_EVENT_NIC3_QM1:
7281 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7282 		qman_base = mmNIC3_QM1_BASE;
7283 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7284 		break;
7285 	case GAUDI_EVENT_NIC4_QM0:
7286 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7287 		qman_base = mmNIC4_QM0_BASE;
7288 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7289 		break;
7290 	case GAUDI_EVENT_NIC4_QM1:
7291 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7292 		qman_base = mmNIC4_QM1_BASE;
7293 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7294 		break;
7295 	default:
7296 		return;
7297 	}
7298 
7299 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7300 }
7301 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7302 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7303 					bool razwi)
7304 {
7305 	u32 engine_id_1, engine_id_2;
7306 	char desc[64] = "";
7307 	u64 razwi_addr = 0;
7308 	u8 razwi_type;
7309 	int rc;
7310 
7311 	/*
7312 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7313 	 * engine id it will get valid value.
7314 	 * Init razwi type to default, will be changed only if razwi caused by page fault of
7315 	 * MMU access error
7316 	 */
7317 	engine_id_1 = U16_MAX;
7318 	engine_id_2 = U16_MAX;
7319 	razwi_type = U8_MAX;
7320 
7321 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7322 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7323 		event_type, desc);
7324 
7325 	if (razwi) {
7326 		gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7327 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7328 
7329 		/* In case it's the first razwi, save its parameters*/
7330 		rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
7331 		if (rc) {
7332 			hdev->captured_err_info.razwi.timestamp = ktime_get();
7333 			hdev->captured_err_info.razwi.addr = razwi_addr;
7334 			hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
7335 			hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
7336 			/*
7337 			 * If first engine id holds non valid value the razwi initiator
7338 			 * does not have engine id
7339 			 */
7340 			hdev->captured_err_info.razwi.non_engine_initiator =
7341 									(engine_id_1 == U16_MAX);
7342 			hdev->captured_err_info.razwi.type = razwi_type;
7343 
7344 		}
7345 	}
7346 }
7347 
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7348 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7349 					struct cpucp_pkt_sync_err *sync_err)
7350 {
7351 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7352 
7353 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7354 			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7355 }
7356 
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7357 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7358 					struct hl_eq_fw_alive *fw_alive)
7359 {
7360 	dev_err(hdev->dev,
7361 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7362 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7363 		"Minor" : "Critical", fw_alive->process_id,
7364 		fw_alive->thread_id, fw_alive->uptime_seconds);
7365 }
7366 
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7367 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7368 						void *data)
7369 {
7370 	char desc[64] = "", *type;
7371 	struct eq_nic_sei_event *eq_nic_sei = data;
7372 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7373 
7374 	switch (eq_nic_sei->axi_error_cause) {
7375 	case RXB:
7376 		type = "RXB";
7377 		break;
7378 	case RXE:
7379 		type = "RXE";
7380 		break;
7381 	case TXS:
7382 		type = "TXS";
7383 		break;
7384 	case TXE:
7385 		type = "TXE";
7386 		break;
7387 	case QPC_RESP:
7388 		type = "QPC_RESP";
7389 		break;
7390 	case NON_AXI_ERR:
7391 		type = "NON_AXI_ERR";
7392 		break;
7393 	case TMR:
7394 		type = "TMR";
7395 		break;
7396 	default:
7397 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7398 			eq_nic_sei->axi_error_cause);
7399 		type = "N/A";
7400 		break;
7401 	}
7402 
7403 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7404 			eq_nic_sei->id);
7405 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7406 		event_type, desc);
7407 }
7408 
gaudi_compute_reset_late_init(struct hl_device * hdev)7409 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7410 {
7411 	/* GAUDI doesn't support any reset except hard-reset */
7412 	return -EPERM;
7413 }
7414 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7415 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7416 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7417 {
7418 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7419 	int rc = 0;
7420 
7421 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7422 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7423 		if (!hbm_ecc_data) {
7424 			dev_err(hdev->dev, "No FW ECC data");
7425 			return 0;
7426 		}
7427 
7428 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7429 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7430 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7431 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7433 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7435 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7437 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7438 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7439 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7440 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7441 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7442 
7443 		dev_err(hdev->dev,
7444 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7445 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7446 		dev_err(hdev->dev,
7447 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7448 			device, ch, hbm_ecc_data->first_addr, type,
7449 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7450 			hbm_ecc_data->dec_cnt);
7451 		return 0;
7452 	}
7453 
7454 	if (hdev->asic_prop.fw_security_enabled) {
7455 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7456 		return 0;
7457 	}
7458 
7459 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7460 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7461 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7462 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7463 		if (val) {
7464 			rc = -EIO;
7465 			dev_err(hdev->dev,
7466 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7467 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7468 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7469 				(val >> 4) & 0x1);
7470 
7471 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7472 			dev_err(hdev->dev,
7473 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7474 				device, ch * 2,
7475 				RREG32(base + ch * 0x1000 + 0x064),
7476 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7477 				(val2 & 0xFF0000) >> 16,
7478 				(val2 & 0xFF000000) >> 24);
7479 		}
7480 
7481 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7482 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7483 		if (val) {
7484 			rc = -EIO;
7485 			dev_err(hdev->dev,
7486 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7487 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7488 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7489 				(val >> 4) & 0x1);
7490 
7491 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7492 			dev_err(hdev->dev,
7493 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7494 				device, ch * 2 + 1,
7495 				RREG32(base + ch * 0x1000 + 0x074),
7496 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7497 				(val2 & 0xFF0000) >> 16,
7498 				(val2 & 0xFF000000) >> 24);
7499 		}
7500 
7501 		/* Clear interrupts */
7502 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7503 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7504 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7505 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7506 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7507 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7508 	}
7509 
7510 	val  = RREG32(base + 0x8F30);
7511 	val2 = RREG32(base + 0x8F34);
7512 	if (val | val2) {
7513 		rc = -EIO;
7514 		dev_err(hdev->dev,
7515 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7516 			device, val, val2);
7517 	}
7518 	val  = RREG32(base + 0x8F40);
7519 	val2 = RREG32(base + 0x8F44);
7520 	if (val | val2) {
7521 		rc = -EIO;
7522 		dev_err(hdev->dev,
7523 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7524 			device, val, val2);
7525 	}
7526 
7527 	return rc;
7528 }
7529 
gaudi_hbm_event_to_dev(u16 hbm_event_type)7530 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7531 {
7532 	switch (hbm_event_type) {
7533 	case GAUDI_EVENT_HBM0_SPI_0:
7534 	case GAUDI_EVENT_HBM0_SPI_1:
7535 		return 0;
7536 	case GAUDI_EVENT_HBM1_SPI_0:
7537 	case GAUDI_EVENT_HBM1_SPI_1:
7538 		return 1;
7539 	case GAUDI_EVENT_HBM2_SPI_0:
7540 	case GAUDI_EVENT_HBM2_SPI_1:
7541 		return 2;
7542 	case GAUDI_EVENT_HBM3_SPI_0:
7543 	case GAUDI_EVENT_HBM3_SPI_1:
7544 		return 3;
7545 	default:
7546 		break;
7547 	}
7548 
7549 	/* Should never happen */
7550 	return 0;
7551 }
7552 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7553 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7554 					char *interrupt_name)
7555 {
7556 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7557 	bool soft_reset_required = false;
7558 
7559 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7560 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7561 
7562 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7563 		if (tpc_interrupts_cause & BIT(i)) {
7564 			dev_err_ratelimited(hdev->dev,
7565 					"TPC%d_%s interrupt cause: %s\n",
7566 					tpc_id, interrupt_name,
7567 					gaudi_tpc_interrupts_cause[i]);
7568 			/* If this is QM error, we need to soft-reset */
7569 			if (i == 15)
7570 				soft_reset_required = true;
7571 		}
7572 
7573 	/* Clear interrupts */
7574 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7575 
7576 	return soft_reset_required;
7577 }
7578 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7579 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7580 {
7581 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7582 }
7583 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7584 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7585 {
7586 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7587 }
7588 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7589 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
7590 {
7591 	ktime_t zero_time = ktime_set(0, 0);
7592 
7593 	mutex_lock(&hdev->clk_throttling.lock);
7594 
7595 	switch (event_type) {
7596 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7597 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7598 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7599 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7600 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7601 		dev_info_ratelimited(hdev->dev,
7602 			"Clock throttling due to power consumption\n");
7603 		break;
7604 
7605 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7606 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7607 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7608 		dev_info_ratelimited(hdev->dev,
7609 			"Power envelop is safe, back to optimal clock\n");
7610 		break;
7611 
7612 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7613 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7614 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7615 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7616 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7617 		dev_info_ratelimited(hdev->dev,
7618 			"Clock throttling due to overheating\n");
7619 		break;
7620 
7621 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7622 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7623 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7624 		dev_info_ratelimited(hdev->dev,
7625 			"Thermal envelop is safe, back to optimal clock\n");
7626 		break;
7627 
7628 	default:
7629 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7630 			event_type);
7631 		break;
7632 	}
7633 
7634 	mutex_unlock(&hdev->clk_throttling.lock);
7635 }
7636 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7637 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7638 {
7639 	struct gaudi_device *gaudi = hdev->asic_specific;
7640 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7641 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7642 	u32 fw_fatal_err_flag = 0, flags = 0;
7643 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7644 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7645 	bool reset_required, reset_direct = false;
7646 	u8 cause;
7647 	int rc;
7648 
7649 	if (event_type >= GAUDI_EVENT_SIZE) {
7650 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7651 				event_type, GAUDI_EVENT_SIZE - 1);
7652 		return;
7653 	}
7654 
7655 	gaudi->events_stat[event_type]++;
7656 	gaudi->events_stat_aggregate[event_type]++;
7657 
7658 	switch (event_type) {
7659 	case GAUDI_EVENT_PCIE_CORE_DERR:
7660 	case GAUDI_EVENT_PCIE_IF_DERR:
7661 	case GAUDI_EVENT_PCIE_PHY_DERR:
7662 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7663 	case GAUDI_EVENT_MME0_ACC_DERR:
7664 	case GAUDI_EVENT_MME0_SBAB_DERR:
7665 	case GAUDI_EVENT_MME1_ACC_DERR:
7666 	case GAUDI_EVENT_MME1_SBAB_DERR:
7667 	case GAUDI_EVENT_MME2_ACC_DERR:
7668 	case GAUDI_EVENT_MME2_SBAB_DERR:
7669 	case GAUDI_EVENT_MME3_ACC_DERR:
7670 	case GAUDI_EVENT_MME3_SBAB_DERR:
7671 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7672 		fallthrough;
7673 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7674 	case GAUDI_EVENT_PSOC_MEM_DERR:
7675 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7676 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7677 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7678 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7679 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7680 	case GAUDI_EVENT_MMU_DERR:
7681 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7682 		gaudi_print_irq_info(hdev, event_type, true);
7683 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7684 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7685 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7686 		goto reset_device;
7687 
7688 	case GAUDI_EVENT_GIC500:
7689 	case GAUDI_EVENT_AXI_ECC:
7690 	case GAUDI_EVENT_L2_RAM_ECC:
7691 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7692 		gaudi_print_irq_info(hdev, event_type, false);
7693 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7694 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7695 		goto reset_device;
7696 
7697 	case GAUDI_EVENT_HBM0_SPI_0:
7698 	case GAUDI_EVENT_HBM1_SPI_0:
7699 	case GAUDI_EVENT_HBM2_SPI_0:
7700 	case GAUDI_EVENT_HBM3_SPI_0:
7701 		gaudi_print_irq_info(hdev, event_type, false);
7702 		gaudi_hbm_read_interrupts(hdev,
7703 				gaudi_hbm_event_to_dev(event_type),
7704 				&eq_entry->hbm_ecc_data);
7705 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7706 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7707 		goto reset_device;
7708 
7709 	case GAUDI_EVENT_HBM0_SPI_1:
7710 	case GAUDI_EVENT_HBM1_SPI_1:
7711 	case GAUDI_EVENT_HBM2_SPI_1:
7712 	case GAUDI_EVENT_HBM3_SPI_1:
7713 		gaudi_print_irq_info(hdev, event_type, false);
7714 		gaudi_hbm_read_interrupts(hdev,
7715 				gaudi_hbm_event_to_dev(event_type),
7716 				&eq_entry->hbm_ecc_data);
7717 		hl_fw_unmask_irq(hdev, event_type);
7718 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7719 		break;
7720 
7721 	case GAUDI_EVENT_TPC0_DEC:
7722 	case GAUDI_EVENT_TPC1_DEC:
7723 	case GAUDI_EVENT_TPC2_DEC:
7724 	case GAUDI_EVENT_TPC3_DEC:
7725 	case GAUDI_EVENT_TPC4_DEC:
7726 	case GAUDI_EVENT_TPC5_DEC:
7727 	case GAUDI_EVENT_TPC6_DEC:
7728 	case GAUDI_EVENT_TPC7_DEC:
7729 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7730 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7731 		 * The SW upper layer will inspect an internal mapped area to indicate
7732 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7733 		 */
7734 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7735 		gaudi_print_irq_info(hdev, event_type, true);
7736 		reset_required = gaudi_tpc_read_interrupts(hdev,
7737 					tpc_dec_event_to_tpc_id(event_type),
7738 					"AXI_SLV_DEC_Error");
7739 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7740 		if (reset_required) {
7741 			dev_err(hdev->dev, "reset required due to %s\n",
7742 				gaudi_irq_map_table[event_type].name);
7743 
7744 			reset_direct = true;
7745 			goto reset_device;
7746 		} else {
7747 			hl_fw_unmask_irq(hdev, event_type);
7748 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7749 		}
7750 		break;
7751 
7752 	case GAUDI_EVENT_TPC0_KRN_ERR:
7753 	case GAUDI_EVENT_TPC1_KRN_ERR:
7754 	case GAUDI_EVENT_TPC2_KRN_ERR:
7755 	case GAUDI_EVENT_TPC3_KRN_ERR:
7756 	case GAUDI_EVENT_TPC4_KRN_ERR:
7757 	case GAUDI_EVENT_TPC5_KRN_ERR:
7758 	case GAUDI_EVENT_TPC6_KRN_ERR:
7759 	case GAUDI_EVENT_TPC7_KRN_ERR:
7760 		gaudi_print_irq_info(hdev, event_type, true);
7761 		reset_required = gaudi_tpc_read_interrupts(hdev,
7762 					tpc_krn_event_to_tpc_id(event_type),
7763 					"KRN_ERR");
7764 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7765 		if (reset_required) {
7766 			dev_err(hdev->dev, "reset required due to %s\n",
7767 				gaudi_irq_map_table[event_type].name);
7768 
7769 			reset_direct = true;
7770 			goto reset_device;
7771 		} else {
7772 			hl_fw_unmask_irq(hdev, event_type);
7773 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7774 		}
7775 		break;
7776 
7777 	case GAUDI_EVENT_PCIE_CORE_SERR:
7778 	case GAUDI_EVENT_PCIE_IF_SERR:
7779 	case GAUDI_EVENT_PCIE_PHY_SERR:
7780 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7781 	case GAUDI_EVENT_MME0_ACC_SERR:
7782 	case GAUDI_EVENT_MME0_SBAB_SERR:
7783 	case GAUDI_EVENT_MME1_ACC_SERR:
7784 	case GAUDI_EVENT_MME1_SBAB_SERR:
7785 	case GAUDI_EVENT_MME2_ACC_SERR:
7786 	case GAUDI_EVENT_MME2_SBAB_SERR:
7787 	case GAUDI_EVENT_MME3_ACC_SERR:
7788 	case GAUDI_EVENT_MME3_SBAB_SERR:
7789 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7790 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7791 	case GAUDI_EVENT_PSOC_MEM_SERR:
7792 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7793 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7794 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7795 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7796 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7797 		fallthrough;
7798 	case GAUDI_EVENT_MMU_SERR:
7799 		gaudi_print_irq_info(hdev, event_type, true);
7800 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7801 		hl_fw_unmask_irq(hdev, event_type);
7802 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7803 		break;
7804 
7805 	case GAUDI_EVENT_PCIE_DEC:
7806 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7807 	case GAUDI_EVENT_PSOC_AXI_DEC:
7808 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7809 		gaudi_print_irq_info(hdev, event_type, true);
7810 		hl_fw_unmask_irq(hdev, event_type);
7811 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7812 		break;
7813 
7814 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7815 	case GAUDI_EVENT_MMU_WR_PERM:
7816 		gaudi_print_irq_info(hdev, event_type, true);
7817 		hl_fw_unmask_irq(hdev, event_type);
7818 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7819 		break;
7820 
7821 	case GAUDI_EVENT_MME0_WBC_RSP:
7822 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7823 	case GAUDI_EVENT_MME1_WBC_RSP:
7824 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7825 	case GAUDI_EVENT_MME2_WBC_RSP:
7826 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7827 	case GAUDI_EVENT_MME3_WBC_RSP:
7828 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7829 	case GAUDI_EVENT_RAZWI_OR_ADC:
7830 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7831 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7832 		fallthrough;
7833 	case GAUDI_EVENT_NIC0_QM0:
7834 	case GAUDI_EVENT_NIC0_QM1:
7835 	case GAUDI_EVENT_NIC1_QM0:
7836 	case GAUDI_EVENT_NIC1_QM1:
7837 	case GAUDI_EVENT_NIC2_QM0:
7838 	case GAUDI_EVENT_NIC2_QM1:
7839 	case GAUDI_EVENT_NIC3_QM0:
7840 	case GAUDI_EVENT_NIC3_QM1:
7841 	case GAUDI_EVENT_NIC4_QM0:
7842 	case GAUDI_EVENT_NIC4_QM1:
7843 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7844 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7845 		gaudi_print_irq_info(hdev, event_type, true);
7846 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7847 		hl_fw_unmask_irq(hdev, event_type);
7848 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7849 		break;
7850 
7851 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7852 		gaudi_print_irq_info(hdev, event_type, true);
7853 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7854 		goto reset_device;
7855 
7856 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7857 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7858 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7859 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7860 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7861 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7862 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7863 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7864 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7865 		gaudi_print_irq_info(hdev, event_type, false);
7866 		hl_fw_unmask_irq(hdev, event_type);
7867 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7868 		break;
7869 
7870 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7871 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7872 		hl_fw_unmask_irq(hdev, event_type);
7873 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7874 		break;
7875 
7876 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7877 		gaudi_print_irq_info(hdev, event_type, false);
7878 		gaudi_print_sm_sei_info(hdev, event_type,
7879 					&eq_entry->sm_sei_data);
7880 		rc = hl_state_dump(hdev);
7881 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7882 		if (rc)
7883 			dev_err(hdev->dev,
7884 				"Error during system state dump %d\n", rc);
7885 		hl_fw_unmask_irq(hdev, event_type);
7886 		break;
7887 
7888 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7889 		break;
7890 
7891 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7892 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7893 		gaudi_print_clk_change_info(hdev, event_type);
7894 		hl_fw_unmask_irq(hdev, event_type);
7895 		break;
7896 
7897 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7898 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7899 		dev_err(hdev->dev,
7900 			"Received high temp H/W interrupt %d (cause %d)\n",
7901 			event_type, cause);
7902 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7903 		break;
7904 
7905 	case GAUDI_EVENT_DEV_RESET_REQ:
7906 		gaudi_print_irq_info(hdev, event_type, false);
7907 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7908 		goto reset_device;
7909 
7910 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7911 		gaudi_print_irq_info(hdev, event_type, false);
7912 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7913 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7914 		goto reset_device;
7915 
7916 	case GAUDI_EVENT_FW_ALIVE_S:
7917 		gaudi_print_irq_info(hdev, event_type, false);
7918 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7919 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7920 		goto reset_device;
7921 
7922 	default:
7923 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7924 				event_type);
7925 		break;
7926 	}
7927 
7928 	if (event_mask)
7929 		hl_notifier_event_send_all(hdev, event_mask);
7930 
7931 	return;
7932 
7933 reset_device:
7934 	reset_required = true;
7935 
7936 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7937 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7938 
7939 		/* notify on device unavailable while the reset triggered by fw */
7940 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7941 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7942 	} else if (hdev->hard_reset_on_fw_events) {
7943 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7944 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7945 	} else {
7946 		reset_required = false;
7947 	}
7948 
7949 	/* despite reset doesn't execute. a notification on
7950 	 * occurred event needs to be sent here
7951 	 */
7952 	hl_notifier_event_send_all(hdev, event_mask);
7953 	if (reset_required)
7954 		hl_device_reset(hdev, flags);
7955 	else
7956 		hl_fw_unmask_irq(hdev, event_type);
7957 }
7958 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7959 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7960 {
7961 	struct gaudi_device *gaudi = hdev->asic_specific;
7962 
7963 	if (aggregate) {
7964 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7965 		return gaudi->events_stat_aggregate;
7966 	}
7967 
7968 	*size = (u32) sizeof(gaudi->events_stat);
7969 	return gaudi->events_stat;
7970 }
7971 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7972 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7973 {
7974 	struct gaudi_device *gaudi = hdev->asic_specific;
7975 	u32 status, timeout_usec;
7976 	int rc;
7977 
7978 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7979 		hdev->reset_info.hard_reset_pending)
7980 		return 0;
7981 
7982 	if (hdev->pldm)
7983 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7984 	else
7985 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7986 
7987 	/* L0 & L1 invalidation */
7988 	WREG32(mmSTLB_INV_PS, 3);
7989 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7990 	WREG32(mmSTLB_INV_PS, 2);
7991 
7992 	rc = hl_poll_timeout(
7993 		hdev,
7994 		mmSTLB_INV_PS,
7995 		status,
7996 		!status,
7997 		1000,
7998 		timeout_usec);
7999 
8000 	WREG32(mmSTLB_INV_SET, 0);
8001 
8002 	return rc;
8003 }
8004 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)8005 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8006 						bool is_hard, u32 flags,
8007 						u32 asid, u64 va, u64 size)
8008 {
8009 	/* Treat as invalidate all because there is no range invalidation
8010 	 * in Gaudi
8011 	 */
8012 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8013 }
8014 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)8015 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8016 {
8017 	u32 status, timeout_usec;
8018 	int rc;
8019 
8020 	if (hdev->pldm)
8021 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8022 	else
8023 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8024 
8025 	WREG32(MMU_ASID, asid);
8026 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8027 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8028 	WREG32(MMU_BUSY, 0x80000000);
8029 
8030 	rc = hl_poll_timeout(
8031 		hdev,
8032 		MMU_BUSY,
8033 		status,
8034 		!(status & 0x80000000),
8035 		1000,
8036 		timeout_usec);
8037 
8038 	if (rc) {
8039 		dev_err(hdev->dev,
8040 			"Timeout during MMU hop0 config of asid %d\n", asid);
8041 		return rc;
8042 	}
8043 
8044 	return 0;
8045 }
8046 
gaudi_send_heartbeat(struct hl_device * hdev)8047 static int gaudi_send_heartbeat(struct hl_device *hdev)
8048 {
8049 	struct gaudi_device *gaudi = hdev->asic_specific;
8050 
8051 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8052 		return 0;
8053 
8054 	return hl_fw_send_heartbeat(hdev);
8055 }
8056 
gaudi_cpucp_info_get(struct hl_device * hdev)8057 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8058 {
8059 	struct gaudi_device *gaudi = hdev->asic_specific;
8060 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8061 	int rc;
8062 
8063 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8064 		return 0;
8065 
8066 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8067 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8068 					mmCPU_BOOT_ERR1);
8069 	if (rc)
8070 		return rc;
8071 
8072 	if (!strlen(prop->cpucp_info.card_name))
8073 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8074 				CARD_NAME_MAX_LEN);
8075 
8076 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8077 
8078 	set_default_power_values(hdev);
8079 
8080 	return 0;
8081 }
8082 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8083 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8084 		struct engines_data *e)
8085 {
8086 	struct gaudi_device *gaudi = hdev->asic_specific;
8087 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8088 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8089 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8090 	unsigned long *mask = (unsigned long *)mask_arr;
8091 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8092 	bool is_idle = true, is_eng_idle, is_slave;
8093 	u64 offset;
8094 	int i, dma_id, port;
8095 
8096 	if (e)
8097 		hl_engine_data_sprintf(e,
8098 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8099 			"---  -------  ------------  ----------  -------------\n");
8100 
8101 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8102 		dma_id = gaudi_dma_assignment[i];
8103 		offset = dma_id * DMA_QMAN_OFFSET;
8104 
8105 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8106 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8107 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8108 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8109 				IS_DMA_IDLE(dma_core_sts0);
8110 		is_idle &= is_eng_idle;
8111 
8112 		if (mask && !is_eng_idle)
8113 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8114 		if (e)
8115 			hl_engine_data_sprintf(e, fmt, dma_id,
8116 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8117 				qm_cgm_sts, dma_core_sts0);
8118 	}
8119 
8120 	if (e)
8121 		hl_engine_data_sprintf(e,
8122 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8123 			"---  -------  ------------  ----------  ----------\n");
8124 
8125 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8126 		offset = i * TPC_QMAN_OFFSET;
8127 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8128 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8129 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8130 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8131 				IS_TPC_IDLE(tpc_cfg_sts);
8132 		is_idle &= is_eng_idle;
8133 
8134 		if (mask && !is_eng_idle)
8135 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8136 		if (e)
8137 			hl_engine_data_sprintf(e, fmt, i,
8138 				is_eng_idle ? "Y" : "N",
8139 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8140 	}
8141 
8142 	if (e)
8143 		hl_engine_data_sprintf(e,
8144 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8145 			"---  -------  ------------  ----------  -----------\n");
8146 
8147 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8148 		offset = i * MME_QMAN_OFFSET;
8149 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8150 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8151 
8152 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8153 		is_slave = i % 2;
8154 		if (!is_slave) {
8155 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8156 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8157 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8158 		}
8159 
8160 		is_idle &= is_eng_idle;
8161 
8162 		if (mask && !is_eng_idle)
8163 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8164 		if (e) {
8165 			if (!is_slave)
8166 				hl_engine_data_sprintf(e, fmt, i,
8167 					is_eng_idle ? "Y" : "N",
8168 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8169 			else
8170 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8171 					is_eng_idle ? "Y" : "N", "-",
8172 					"-", mme_arch_sts);
8173 		}
8174 	}
8175 
8176 	if (e)
8177 		hl_engine_data_sprintf(e,
8178 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8179 				"---  -------  ------------  ----------\n");
8180 
8181 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8182 		offset = i * NIC_MACRO_QMAN_OFFSET;
8183 		port = 2 * i;
8184 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8185 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8186 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8187 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8188 			is_idle &= is_eng_idle;
8189 
8190 			if (mask && !is_eng_idle)
8191 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8192 			if (e)
8193 				hl_engine_data_sprintf(e, nic_fmt, port,
8194 						is_eng_idle ? "Y" : "N",
8195 						qm_glbl_sts0, qm_cgm_sts);
8196 		}
8197 
8198 		port = 2 * i + 1;
8199 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8200 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8201 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8202 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8203 			is_idle &= is_eng_idle;
8204 
8205 			if (mask && !is_eng_idle)
8206 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8207 			if (e)
8208 				hl_engine_data_sprintf(e, nic_fmt, port,
8209 						is_eng_idle ? "Y" : "N",
8210 						qm_glbl_sts0, qm_cgm_sts);
8211 		}
8212 	}
8213 
8214 	if (e)
8215 		hl_engine_data_sprintf(e, "\n");
8216 
8217 	return is_idle;
8218 }
8219 
gaudi_hw_queues_lock(struct hl_device * hdev)8220 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8221 	__acquires(&gaudi->hw_queues_lock)
8222 {
8223 	struct gaudi_device *gaudi = hdev->asic_specific;
8224 
8225 	spin_lock(&gaudi->hw_queues_lock);
8226 }
8227 
gaudi_hw_queues_unlock(struct hl_device * hdev)8228 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8229 	__releases(&gaudi->hw_queues_lock)
8230 {
8231 	struct gaudi_device *gaudi = hdev->asic_specific;
8232 
8233 	spin_unlock(&gaudi->hw_queues_lock);
8234 }
8235 
gaudi_get_pci_id(struct hl_device * hdev)8236 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8237 {
8238 	return hdev->pdev->device;
8239 }
8240 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8241 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8242 				size_t max_size)
8243 {
8244 	struct gaudi_device *gaudi = hdev->asic_specific;
8245 
8246 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8247 		return 0;
8248 
8249 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8250 }
8251 
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8252 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8253 {
8254 	struct gaudi_device *gaudi = hdev->asic_specific;
8255 
8256 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8257 		return 0;
8258 
8259 	return hl_fw_get_monitor_dump(hdev, data);
8260 }
8261 
8262 /*
8263  * this function should be used only during initialization and/or after reset,
8264  * when there are no active users.
8265  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8266 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8267 {
8268 	u64 kernel_timeout;
8269 	u32 status, offset;
8270 	int rc;
8271 
8272 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8273 
8274 	if (hdev->pldm)
8275 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8276 	else
8277 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8278 
8279 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8280 			lower_32_bits(tpc_kernel));
8281 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8282 			upper_32_bits(tpc_kernel));
8283 
8284 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8285 			lower_32_bits(tpc_kernel));
8286 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8287 			upper_32_bits(tpc_kernel));
8288 	/* set a valid LUT pointer, content is of no significance */
8289 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8290 			lower_32_bits(tpc_kernel));
8291 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8292 			upper_32_bits(tpc_kernel));
8293 
8294 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8295 			lower_32_bits(CFG_BASE +
8296 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8297 
8298 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8299 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8300 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8301 	/* wait a bit for the engine to start executing */
8302 	usleep_range(1000, 1500);
8303 
8304 	/* wait until engine has finished executing */
8305 	rc = hl_poll_timeout(
8306 		hdev,
8307 		mmTPC0_CFG_STATUS + offset,
8308 		status,
8309 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8310 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8311 		1000,
8312 		kernel_timeout);
8313 
8314 	if (rc) {
8315 		dev_err(hdev->dev,
8316 			"Timeout while waiting for TPC%d icache prefetch\n",
8317 			tpc_id);
8318 		return -EIO;
8319 	}
8320 
8321 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8322 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8323 
8324 	/* wait a bit for the engine to start executing */
8325 	usleep_range(1000, 1500);
8326 
8327 	/* wait until engine has finished executing */
8328 	rc = hl_poll_timeout(
8329 		hdev,
8330 		mmTPC0_CFG_STATUS + offset,
8331 		status,
8332 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8333 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8334 		1000,
8335 		kernel_timeout);
8336 
8337 	if (rc) {
8338 		dev_err(hdev->dev,
8339 			"Timeout while waiting for TPC%d vector pipe\n",
8340 			tpc_id);
8341 		return -EIO;
8342 	}
8343 
8344 	rc = hl_poll_timeout(
8345 		hdev,
8346 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8347 		status,
8348 		(status == 0),
8349 		1000,
8350 		kernel_timeout);
8351 
8352 	if (rc) {
8353 		dev_err(hdev->dev,
8354 			"Timeout while waiting for TPC%d kernel to execute\n",
8355 			tpc_id);
8356 		return -EIO;
8357 	}
8358 
8359 	return 0;
8360 }
8361 
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8362 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8363 		struct hl_ctx *ctx)
8364 {
8365 	struct gaudi_device *gaudi = hdev->asic_specific;
8366 	int min_alloc_order, rc, collective_cb_size;
8367 
8368 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8369 		return 0;
8370 
8371 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8372 							HOST_SPACE_INTERNAL_CB_SZ,
8373 							&hdev->internal_cb_pool_dma_addr,
8374 							GFP_KERNEL | __GFP_ZERO);
8375 
8376 	if (!hdev->internal_cb_pool_virt_addr)
8377 		return -ENOMEM;
8378 
8379 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8380 			sizeof(struct packet_fence);
8381 	min_alloc_order = ilog2(collective_cb_size);
8382 
8383 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8384 	if (!hdev->internal_cb_pool) {
8385 		dev_err(hdev->dev,
8386 			"Failed to create internal CB pool\n");
8387 		rc = -ENOMEM;
8388 		goto free_internal_cb_pool;
8389 	}
8390 
8391 	rc = gen_pool_add(hdev->internal_cb_pool,
8392 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8393 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8394 	if (rc) {
8395 		dev_err(hdev->dev,
8396 			"Failed to add memory to internal CB pool\n");
8397 		rc = -EFAULT;
8398 		goto destroy_internal_cb_pool;
8399 	}
8400 
8401 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8402 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8403 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8404 
8405 	if (!hdev->internal_cb_va_base) {
8406 		rc = -ENOMEM;
8407 		goto destroy_internal_cb_pool;
8408 	}
8409 
8410 	mutex_lock(&hdev->mmu_lock);
8411 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8412 			hdev->internal_cb_pool_dma_addr,
8413 			HOST_SPACE_INTERNAL_CB_SZ);
8414 
8415 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8416 	mutex_unlock(&hdev->mmu_lock);
8417 
8418 	if (rc)
8419 		goto unreserve_internal_cb_pool;
8420 
8421 	return 0;
8422 
8423 unreserve_internal_cb_pool:
8424 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8425 			HOST_SPACE_INTERNAL_CB_SZ);
8426 destroy_internal_cb_pool:
8427 	gen_pool_destroy(hdev->internal_cb_pool);
8428 free_internal_cb_pool:
8429 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8430 					hdev->internal_cb_pool_dma_addr);
8431 
8432 	return rc;
8433 }
8434 
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8435 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8436 		struct hl_ctx *ctx)
8437 {
8438 	struct gaudi_device *gaudi = hdev->asic_specific;
8439 
8440 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8441 		return;
8442 
8443 	mutex_lock(&hdev->mmu_lock);
8444 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8445 			HOST_SPACE_INTERNAL_CB_SZ);
8446 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8447 			HOST_SPACE_INTERNAL_CB_SZ);
8448 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8449 	mutex_unlock(&hdev->mmu_lock);
8450 
8451 	gen_pool_destroy(hdev->internal_cb_pool);
8452 
8453 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8454 					hdev->internal_cb_pool_dma_addr);
8455 }
8456 
gaudi_ctx_init(struct hl_ctx * ctx)8457 static int gaudi_ctx_init(struct hl_ctx *ctx)
8458 {
8459 	int rc;
8460 
8461 	if (ctx->asid == HL_KERNEL_ASID_ID)
8462 		return 0;
8463 
8464 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8465 	if (rc)
8466 		return rc;
8467 
8468 	rc = gaudi_restore_user_registers(ctx->hdev);
8469 	if (rc)
8470 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8471 
8472 	return rc;
8473 }
8474 
gaudi_ctx_fini(struct hl_ctx * ctx)8475 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8476 {
8477 	if (ctx->asid == HL_KERNEL_ASID_ID)
8478 		return;
8479 
8480 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8481 }
8482 
gaudi_pre_schedule_cs(struct hl_cs * cs)8483 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8484 {
8485 	return 0;
8486 }
8487 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8488 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8489 {
8490 	return gaudi_cq_assignment[cq_idx];
8491 }
8492 
gaudi_get_signal_cb_size(struct hl_device * hdev)8493 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8494 {
8495 	return sizeof(struct packet_msg_short) +
8496 			sizeof(struct packet_msg_prot) * 2;
8497 }
8498 
gaudi_get_wait_cb_size(struct hl_device * hdev)8499 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8500 {
8501 	return sizeof(struct packet_msg_short) * 4 +
8502 			sizeof(struct packet_fence) +
8503 			sizeof(struct packet_msg_prot) * 2;
8504 }
8505 
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8506 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8507 {
8508 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8509 }
8510 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8511 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8512 				u32 size, bool eb)
8513 {
8514 	struct hl_cb *cb = (struct hl_cb *) data;
8515 	struct packet_msg_short *pkt;
8516 	u32 value, ctl, pkt_size = sizeof(*pkt);
8517 
8518 	pkt = cb->kernel_address + size;
8519 	memset(pkt, 0, pkt_size);
8520 
8521 	/* Inc by 1, Mode ADD */
8522 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8523 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8524 
8525 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8526 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8527 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8528 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8529 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8530 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8531 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8532 
8533 	pkt->value = cpu_to_le32(value);
8534 	pkt->ctl = cpu_to_le32(ctl);
8535 
8536 	return size + pkt_size;
8537 }
8538 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8539 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8540 					u16 addr)
8541 {
8542 	u32 ctl, pkt_size = sizeof(*pkt);
8543 
8544 	memset(pkt, 0, pkt_size);
8545 
8546 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8547 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8548 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8549 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8550 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8551 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8552 
8553 	pkt->value = cpu_to_le32(value);
8554 	pkt->ctl = cpu_to_le32(ctl);
8555 
8556 	return pkt_size;
8557 }
8558 
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8559 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8560 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8561 		u16 sob_val, u16 mon_id)
8562 {
8563 	u64 monitor_base;
8564 	u32 ctl, value, pkt_size = sizeof(*pkt);
8565 	u16 msg_addr_offset;
8566 	u8 mask;
8567 
8568 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8569 		dev_err(hdev->dev,
8570 			"sob_base %u (mask %#x) is not valid\n",
8571 			sob_base, sob_mask);
8572 		return 0;
8573 	}
8574 
8575 	/*
8576 	 * monitor_base should be the content of the base0 address registers,
8577 	 * so it will be added to the msg short offsets
8578 	 */
8579 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8580 
8581 	msg_addr_offset =
8582 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8583 				monitor_base;
8584 
8585 	memset(pkt, 0, pkt_size);
8586 
8587 	/* Monitor config packet: bind the monitor to a sync object */
8588 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8589 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8590 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8591 			0); /* GREATER OR EQUAL*/
8592 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8593 
8594 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8595 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8596 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8597 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8598 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8599 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8600 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8601 
8602 	pkt->value = cpu_to_le32(value);
8603 	pkt->ctl = cpu_to_le32(ctl);
8604 
8605 	return pkt_size;
8606 }
8607 
gaudi_add_fence_pkt(struct packet_fence * pkt)8608 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8609 {
8610 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8611 
8612 	memset(pkt, 0, pkt_size);
8613 
8614 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8615 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8616 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8617 
8618 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8619 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8620 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8621 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8622 
8623 	pkt->cfg = cpu_to_le32(cfg);
8624 	pkt->ctl = cpu_to_le32(ctl);
8625 
8626 	return pkt_size;
8627 }
8628 
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8629 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8630 {
8631 	u32 offset, nic_index;
8632 
8633 	switch (queue_id) {
8634 	case GAUDI_QUEUE_ID_DMA_0_0:
8635 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8636 		break;
8637 	case GAUDI_QUEUE_ID_DMA_0_1:
8638 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8639 		break;
8640 	case GAUDI_QUEUE_ID_DMA_0_2:
8641 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8642 		break;
8643 	case GAUDI_QUEUE_ID_DMA_0_3:
8644 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8645 		break;
8646 	case GAUDI_QUEUE_ID_DMA_1_0:
8647 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8648 		break;
8649 	case GAUDI_QUEUE_ID_DMA_1_1:
8650 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8651 		break;
8652 	case GAUDI_QUEUE_ID_DMA_1_2:
8653 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8654 		break;
8655 	case GAUDI_QUEUE_ID_DMA_1_3:
8656 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8657 		break;
8658 	case GAUDI_QUEUE_ID_DMA_5_0:
8659 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8660 		break;
8661 	case GAUDI_QUEUE_ID_DMA_5_1:
8662 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8663 		break;
8664 	case GAUDI_QUEUE_ID_DMA_5_2:
8665 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8666 		break;
8667 	case GAUDI_QUEUE_ID_DMA_5_3:
8668 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8669 		break;
8670 	case GAUDI_QUEUE_ID_TPC_7_0:
8671 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8672 		break;
8673 	case GAUDI_QUEUE_ID_TPC_7_1:
8674 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8675 		break;
8676 	case GAUDI_QUEUE_ID_TPC_7_2:
8677 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8678 		break;
8679 	case GAUDI_QUEUE_ID_TPC_7_3:
8680 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8681 		break;
8682 	case GAUDI_QUEUE_ID_NIC_0_0:
8683 	case GAUDI_QUEUE_ID_NIC_1_0:
8684 	case GAUDI_QUEUE_ID_NIC_2_0:
8685 	case GAUDI_QUEUE_ID_NIC_3_0:
8686 	case GAUDI_QUEUE_ID_NIC_4_0:
8687 	case GAUDI_QUEUE_ID_NIC_5_0:
8688 	case GAUDI_QUEUE_ID_NIC_6_0:
8689 	case GAUDI_QUEUE_ID_NIC_7_0:
8690 	case GAUDI_QUEUE_ID_NIC_8_0:
8691 	case GAUDI_QUEUE_ID_NIC_9_0:
8692 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8693 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8694 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8695 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8696 		break;
8697 	case GAUDI_QUEUE_ID_NIC_0_1:
8698 	case GAUDI_QUEUE_ID_NIC_1_1:
8699 	case GAUDI_QUEUE_ID_NIC_2_1:
8700 	case GAUDI_QUEUE_ID_NIC_3_1:
8701 	case GAUDI_QUEUE_ID_NIC_4_1:
8702 	case GAUDI_QUEUE_ID_NIC_5_1:
8703 	case GAUDI_QUEUE_ID_NIC_6_1:
8704 	case GAUDI_QUEUE_ID_NIC_7_1:
8705 	case GAUDI_QUEUE_ID_NIC_8_1:
8706 	case GAUDI_QUEUE_ID_NIC_9_1:
8707 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8708 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8709 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8710 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8711 		break;
8712 	case GAUDI_QUEUE_ID_NIC_0_2:
8713 	case GAUDI_QUEUE_ID_NIC_1_2:
8714 	case GAUDI_QUEUE_ID_NIC_2_2:
8715 	case GAUDI_QUEUE_ID_NIC_3_2:
8716 	case GAUDI_QUEUE_ID_NIC_4_2:
8717 	case GAUDI_QUEUE_ID_NIC_5_2:
8718 	case GAUDI_QUEUE_ID_NIC_6_2:
8719 	case GAUDI_QUEUE_ID_NIC_7_2:
8720 	case GAUDI_QUEUE_ID_NIC_8_2:
8721 	case GAUDI_QUEUE_ID_NIC_9_2:
8722 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8723 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8724 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8725 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8726 		break;
8727 	case GAUDI_QUEUE_ID_NIC_0_3:
8728 	case GAUDI_QUEUE_ID_NIC_1_3:
8729 	case GAUDI_QUEUE_ID_NIC_2_3:
8730 	case GAUDI_QUEUE_ID_NIC_3_3:
8731 	case GAUDI_QUEUE_ID_NIC_4_3:
8732 	case GAUDI_QUEUE_ID_NIC_5_3:
8733 	case GAUDI_QUEUE_ID_NIC_6_3:
8734 	case GAUDI_QUEUE_ID_NIC_7_3:
8735 	case GAUDI_QUEUE_ID_NIC_8_3:
8736 	case GAUDI_QUEUE_ID_NIC_9_3:
8737 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8738 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8739 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8740 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8741 		break;
8742 	default:
8743 		return -EINVAL;
8744 	}
8745 
8746 	*addr = CFG_BASE + offset;
8747 
8748 	return 0;
8749 }
8750 
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8751 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8752 {
8753 	u64 monitor_base;
8754 	u32 size = 0;
8755 	u16 msg_addr_offset;
8756 
8757 	/*
8758 	 * monitor_base should be the content of the base0 address registers,
8759 	 * so it will be added to the msg short offsets
8760 	 */
8761 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8762 
8763 	/* First monitor config packet: low address of the sync */
8764 	msg_addr_offset =
8765 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8766 				monitor_base;
8767 
8768 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8769 					msg_addr_offset);
8770 
8771 	/* Second monitor config packet: high address of the sync */
8772 	msg_addr_offset =
8773 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8774 				monitor_base;
8775 
8776 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8777 					msg_addr_offset);
8778 
8779 	/*
8780 	 * Third monitor config packet: the payload, i.e. what to write when the
8781 	 * sync triggers
8782 	 */
8783 	msg_addr_offset =
8784 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8785 				monitor_base;
8786 
8787 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8788 
8789 	return size;
8790 }
8791 
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8792 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8793 				struct hl_gen_wait_properties *prop)
8794 {
8795 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8796 	void *buf = cb->kernel_address;
8797 	u64 fence_addr = 0;
8798 	u32 size = prop->size;
8799 
8800 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8801 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8802 				prop->q_idx);
8803 		return 0;
8804 	}
8805 
8806 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8807 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8808 			prop->sob_mask, prop->sob_val, prop->mon_id);
8809 	size += gaudi_add_fence_pkt(buf + size);
8810 
8811 	return size;
8812 }
8813 
gaudi_reset_sob(struct hl_device * hdev,void * data)8814 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8815 {
8816 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8817 
8818 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8819 		hw_sob->sob_id);
8820 
8821 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8822 			hw_sob->sob_id * 4, 0);
8823 
8824 	kref_init(&hw_sob->kref);
8825 }
8826 
gaudi_get_device_time(struct hl_device * hdev)8827 static u64 gaudi_get_device_time(struct hl_device *hdev)
8828 {
8829 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8830 
8831 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8832 }
8833 
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8834 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8835 				u32 *block_size, u32 *block_id)
8836 {
8837 	return -EPERM;
8838 }
8839 
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8840 static int gaudi_block_mmap(struct hl_device *hdev,
8841 				struct vm_area_struct *vma,
8842 				u32 block_id, u32 block_size)
8843 {
8844 	return -EPERM;
8845 }
8846 
gaudi_enable_events_from_fw(struct hl_device * hdev)8847 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8848 {
8849 	struct cpu_dyn_regs *dyn_regs =
8850 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8851 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8852 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8853 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8854 
8855 	WREG32(irq_handler_offset,
8856 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8857 }
8858 
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8859 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8860 {
8861 	return -EINVAL;
8862 }
8863 
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8864 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8865 {
8866 	switch (pll_idx) {
8867 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8868 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8869 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8870 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8871 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8872 	case HL_GAUDI_MME_PLL: return MME_PLL;
8873 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8874 	case HL_GAUDI_IF_PLL: return IF_PLL;
8875 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8876 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8877 	default: return -EINVAL;
8878 	}
8879 }
8880 
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8881 static int gaudi_add_sync_to_engine_map_entry(
8882 	struct hl_sync_to_engine_map *map, u32 reg_value,
8883 	enum hl_sync_engine_type engine_type, u32 engine_id)
8884 {
8885 	struct hl_sync_to_engine_map_entry *entry;
8886 
8887 	/* Reg value represents a partial address of sync object,
8888 	 * it is used as unique identifier. For this we need to
8889 	 * clear the cutoff cfg base bits from the value.
8890 	 */
8891 	if (reg_value == 0 || reg_value == 0xffffffff)
8892 		return 0;
8893 	reg_value -= lower_32_bits(CFG_BASE);
8894 
8895 	/* create a new hash entry */
8896 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8897 	if (!entry)
8898 		return -ENOMEM;
8899 	entry->engine_type = engine_type;
8900 	entry->engine_id = engine_id;
8901 	entry->sync_id = reg_value;
8902 	hash_add(map->tb, &entry->node, reg_value);
8903 
8904 	return 0;
8905 }
8906 
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8907 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8908 				struct hl_sync_to_engine_map *map)
8909 {
8910 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8911 	int i, j, rc;
8912 	u32 reg_value;
8913 
8914 	/* Iterate over TPC engines */
8915 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8916 
8917 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8918 					sds->props[SP_NEXT_TPC] * i);
8919 
8920 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8921 							ENGINE_TPC, i);
8922 		if (rc)
8923 			goto free_sync_to_engine_map;
8924 	}
8925 
8926 	/* Iterate over MME engines */
8927 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8928 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8929 
8930 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8931 						sds->props[SP_NEXT_MME] * i +
8932 						j * sizeof(u32));
8933 
8934 			rc = gaudi_add_sync_to_engine_map_entry(
8935 				map, reg_value, ENGINE_MME,
8936 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8937 			if (rc)
8938 				goto free_sync_to_engine_map;
8939 		}
8940 	}
8941 
8942 	/* Iterate over DMA engines */
8943 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8944 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8945 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8946 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8947 							ENGINE_DMA, i);
8948 		if (rc)
8949 			goto free_sync_to_engine_map;
8950 	}
8951 
8952 	return 0;
8953 
8954 free_sync_to_engine_map:
8955 	hl_state_dump_free_sync_to_engine_map(map);
8956 
8957 	return rc;
8958 }
8959 
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8960 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8961 {
8962 	return FIELD_GET(
8963 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8964 		mon->status);
8965 }
8966 
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8967 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8968 {
8969 	const size_t max_write = 10;
8970 	u32 gid, mask, sob;
8971 	int i, offset;
8972 
8973 	/* Sync object ID is calculated as follows:
8974 	 * (8 * group_id + cleared bits in mask)
8975 	 */
8976 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8977 			mon->arm_data);
8978 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8979 			mon->arm_data);
8980 
8981 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8982 		max_write; mask >>= 1, i++) {
8983 		if (!(mask & 1)) {
8984 			sob = gid * MONITOR_MAX_SOBS + i;
8985 
8986 			if (offset > 0)
8987 				offset += snprintf(sobs + offset, max_write,
8988 							", ");
8989 
8990 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8991 		}
8992 	}
8993 }
8994 
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8995 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8996 				struct hl_device *hdev,
8997 				struct hl_mon_state_dump *mon)
8998 {
8999 	const char *name;
9000 	char scratch_buf1[BIN_REG_STRING_SIZE],
9001 		scratch_buf2[BIN_REG_STRING_SIZE];
9002 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9003 
9004 	name = hl_state_dump_get_monitor_name(hdev, mon);
9005 	if (!name)
9006 		name = "";
9007 
9008 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9009 
9010 	return hl_snprintf_resize(
9011 		buf, size, offset,
9012 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9013 		mon->id, name,
9014 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9015 				mon->arm_data),
9016 		hl_format_as_binary(
9017 			scratch_buf1, sizeof(scratch_buf1),
9018 			FIELD_GET(
9019 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9020 				mon->arm_data)),
9021 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9022 				mon->arm_data),
9023 		mon->wr_data,
9024 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9025 		hl_format_as_binary(
9026 			scratch_buf2, sizeof(scratch_buf2),
9027 			FIELD_GET(
9028 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9029 				mon->status)),
9030 		monitored_sobs);
9031 }
9032 
9033 
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)9034 static int gaudi_print_fences_single_engine(
9035 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9036 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9037 	size_t *size, size_t *offset)
9038 {
9039 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9040 	int rc = -ENOMEM, i;
9041 	u32 *statuses, *fences;
9042 
9043 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9044 			sizeof(*statuses), GFP_KERNEL);
9045 	if (!statuses)
9046 		goto out;
9047 
9048 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9049 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9050 			 sizeof(*fences), GFP_KERNEL);
9051 	if (!fences)
9052 		goto free_status;
9053 
9054 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9055 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9056 
9057 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9058 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9059 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9060 
9061 	/* The actual print */
9062 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9063 		u32 fence_id;
9064 		u64 fence_cnt, fence_rdata;
9065 		const char *engine_name;
9066 
9067 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9068 			statuses[i]))
9069 			continue;
9070 
9071 		fence_id =
9072 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9073 		fence_cnt = base_offset + CFG_BASE +
9074 			sizeof(u32) *
9075 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9076 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9077 				sds->props[SP_FENCE0_RDATA_OFFSET];
9078 		engine_name = hl_sync_engine_to_string(engine_type);
9079 
9080 		rc = hl_snprintf_resize(
9081 			buf, size, offset,
9082 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9083 			engine_name, engine_id,
9084 			i, fence_id,
9085 			fence_cnt, engine_name, engine_id, fence_id, i,
9086 			fence_rdata, engine_name, engine_id, fence_id, i,
9087 			fences[fence_id],
9088 			statuses[i]);
9089 		if (rc)
9090 			goto free_fences;
9091 	}
9092 
9093 	rc = 0;
9094 
9095 free_fences:
9096 	kfree(fences);
9097 free_status:
9098 	kfree(statuses);
9099 out:
9100 	return rc;
9101 }
9102 
9103 
9104 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9105 	.monitor_valid = gaudi_monitor_valid,
9106 	.print_single_monitor = gaudi_print_single_monitor,
9107 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9108 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9109 };
9110 
gaudi_state_dump_init(struct hl_device * hdev)9111 static void gaudi_state_dump_init(struct hl_device *hdev)
9112 {
9113 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9114 	int i;
9115 
9116 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9117 		hash_add(sds->so_id_to_str_tb,
9118 			&gaudi_so_id_to_str[i].node,
9119 			gaudi_so_id_to_str[i].id);
9120 
9121 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9122 		hash_add(sds->monitor_id_to_str_tb,
9123 			&gaudi_monitor_id_to_str[i].node,
9124 			gaudi_monitor_id_to_str[i].id);
9125 
9126 	sds->props = gaudi_state_dump_specs_props;
9127 
9128 	sds->sync_namager_names = gaudi_sync_manager_names;
9129 
9130 	sds->funcs = gaudi_state_dump_funcs;
9131 }
9132 
gaudi_get_stream_master_qid_arr(void)9133 static u32 *gaudi_get_stream_master_qid_arr(void)
9134 {
9135 	return gaudi_stream_master;
9136 }
9137 
gaudi_check_if_razwi_happened(struct hl_device * hdev)9138 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9139 {
9140 }
9141 
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9142 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9143 {
9144 	struct hl_device *hdev = dev_get_drvdata(dev);
9145 	struct cpucp_info *cpucp_info;
9146 
9147 	cpucp_info = &hdev->asic_prop.cpucp_info;
9148 
9149 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9150 }
9151 
9152 static DEVICE_ATTR_RO(infineon_ver);
9153 
9154 static struct attribute *gaudi_vrm_dev_attrs[] = {
9155 	&dev_attr_infineon_ver.attr,
9156 	NULL,
9157 };
9158 
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9159 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9160 					struct attribute_group *dev_vrm_attr_grp)
9161 {
9162 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9163 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9164 }
9165 
gaudi_send_device_activity(struct hl_device * hdev,bool open)9166 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9167 {
9168 	return 0;
9169 }
9170 
9171 static const struct hl_asic_funcs gaudi_funcs = {
9172 	.early_init = gaudi_early_init,
9173 	.early_fini = gaudi_early_fini,
9174 	.late_init = gaudi_late_init,
9175 	.late_fini = gaudi_late_fini,
9176 	.sw_init = gaudi_sw_init,
9177 	.sw_fini = gaudi_sw_fini,
9178 	.hw_init = gaudi_hw_init,
9179 	.hw_fini = gaudi_hw_fini,
9180 	.halt_engines = gaudi_halt_engines,
9181 	.suspend = gaudi_suspend,
9182 	.resume = gaudi_resume,
9183 	.mmap = gaudi_mmap,
9184 	.ring_doorbell = gaudi_ring_doorbell,
9185 	.pqe_write = gaudi_pqe_write,
9186 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9187 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9188 	.scrub_device_mem = gaudi_scrub_device_mem,
9189 	.scrub_device_dram = gaudi_scrub_device_dram,
9190 	.get_int_queue_base = gaudi_get_int_queue_base,
9191 	.test_queues = gaudi_test_queues,
9192 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9193 	.asic_dma_pool_free = gaudi_dma_pool_free,
9194 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9195 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9196 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9197 	.cs_parser = gaudi_cs_parser,
9198 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9199 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9200 	.update_eq_ci = gaudi_update_eq_ci,
9201 	.context_switch = gaudi_context_switch,
9202 	.restore_phase_topology = gaudi_restore_phase_topology,
9203 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9204 	.add_device_attr = gaudi_add_device_attr,
9205 	.handle_eqe = gaudi_handle_eqe,
9206 	.get_events_stat = gaudi_get_events_stat,
9207 	.read_pte = gaudi_read_pte,
9208 	.write_pte = gaudi_write_pte,
9209 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9210 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9211 	.mmu_prefetch_cache_range = NULL,
9212 	.send_heartbeat = gaudi_send_heartbeat,
9213 	.debug_coresight = gaudi_debug_coresight,
9214 	.is_device_idle = gaudi_is_device_idle,
9215 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9216 	.hw_queues_lock = gaudi_hw_queues_lock,
9217 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9218 	.get_pci_id = gaudi_get_pci_id,
9219 	.get_eeprom_data = gaudi_get_eeprom_data,
9220 	.get_monitor_dump = gaudi_get_monitor_dump,
9221 	.send_cpu_message = gaudi_send_cpu_message,
9222 	.pci_bars_map = gaudi_pci_bars_map,
9223 	.init_iatu = gaudi_init_iatu,
9224 	.rreg = hl_rreg,
9225 	.wreg = hl_wreg,
9226 	.halt_coresight = gaudi_halt_coresight,
9227 	.ctx_init = gaudi_ctx_init,
9228 	.ctx_fini = gaudi_ctx_fini,
9229 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9230 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9231 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9232 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9233 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9234 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9235 	.gen_signal_cb = gaudi_gen_signal_cb,
9236 	.gen_wait_cb = gaudi_gen_wait_cb,
9237 	.reset_sob = gaudi_reset_sob,
9238 	.reset_sob_group = gaudi_reset_sob_group,
9239 	.get_device_time = gaudi_get_device_time,
9240 	.pb_print_security_errors = NULL,
9241 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9242 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9243 	.get_dec_base_addr = NULL,
9244 	.scramble_addr = hl_mmu_scramble_addr,
9245 	.descramble_addr = hl_mmu_descramble_addr,
9246 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9247 	.get_hw_block_id = gaudi_get_hw_block_id,
9248 	.hw_block_mmap = gaudi_block_mmap,
9249 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9250 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9251 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9252 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9253 	.init_firmware_loader = gaudi_init_firmware_loader,
9254 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9255 	.state_dump_init = gaudi_state_dump_init,
9256 	.get_sob_addr = gaudi_get_sob_addr,
9257 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9258 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9259 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9260 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9261 	.access_dev_mem = hl_access_dev_mem,
9262 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9263 	.send_device_activity = gaudi_send_device_activity,
9264 };
9265 
9266 /**
9267  * gaudi_set_asic_funcs - set GAUDI function pointers
9268  *
9269  * @hdev: pointer to hl_device structure
9270  *
9271  */
gaudi_set_asic_funcs(struct hl_device * hdev)9272 void gaudi_set_asic_funcs(struct hl_device *hdev)
9273 {
9274 	hdev->asic_funcs = &gaudi_funcs;
9275 }
9276