1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
99 
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
101 		BIT(GAUDI_ENGINE_ID_MME_0) |\
102 		BIT(GAUDI_ENGINE_ID_MME_2) |\
103 		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104 
105 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
106 
107 #define GAUDI_PLL_MAX 10
108 
109 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
110 
111 #define MONITOR_SOB_STRING_SIZE		256
112 
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114 	GAUDI_QUEUE_ID_DMA_0_0,
115 	GAUDI_QUEUE_ID_DMA_0_1,
116 	GAUDI_QUEUE_ID_DMA_0_2,
117 	GAUDI_QUEUE_ID_DMA_0_3,
118 	GAUDI_QUEUE_ID_DMA_1_0,
119 	GAUDI_QUEUE_ID_DMA_1_1,
120 	GAUDI_QUEUE_ID_DMA_1_2,
121 	GAUDI_QUEUE_ID_DMA_1_3
122 };
123 
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128 		"gaudi cpu eq"
129 };
130 
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141 
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
144 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
145 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
146 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
147 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
148 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
149 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
150 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152 
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
155 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
156 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
157 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
158 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
159 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
160 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
161 	[PACKET_FENCE]		= sizeof(struct packet_fence),
162 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
163 	[PACKET_NOP]		= sizeof(struct packet_nop),
164 	[PACKET_STOP]		= sizeof(struct packet_stop),
165 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
166 	[PACKET_WAIT]		= sizeof(struct packet_wait),
167 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
168 };
169 
validate_packet_id(enum packet_id id)170 static inline bool validate_packet_id(enum packet_id id)
171 {
172 	switch (id) {
173 	case PACKET_WREG_32:
174 	case PACKET_WREG_BULK:
175 	case PACKET_MSG_LONG:
176 	case PACKET_MSG_SHORT:
177 	case PACKET_CP_DMA:
178 	case PACKET_REPEAT:
179 	case PACKET_MSG_PROT:
180 	case PACKET_FENCE:
181 	case PACKET_LIN_DMA:
182 	case PACKET_NOP:
183 	case PACKET_STOP:
184 	case PACKET_ARB_POINT:
185 	case PACKET_WAIT:
186 	case PACKET_LOAD_AND_EXE:
187 		return true;
188 	default:
189 		return false;
190 	}
191 }
192 
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195 	"tpc_address_exceed_slm",
196 	"tpc_div_by_0",
197 	"tpc_spu_mac_overflow",
198 	"tpc_spu_addsub_overflow",
199 	"tpc_spu_abs_overflow",
200 	"tpc_spu_fp_dst_nan_inf",
201 	"tpc_spu_fp_dst_denorm",
202 	"tpc_vpu_mac_overflow",
203 	"tpc_vpu_addsub_overflow",
204 	"tpc_vpu_abs_overflow",
205 	"tpc_vpu_fp_dst_nan_inf",
206 	"tpc_vpu_fp_dst_denorm",
207 	"tpc_assertions",
208 	"tpc_illegal_instruction",
209 	"tpc_pc_wrap_around",
210 	"tpc_qm_sw_err",
211 	"tpc_hbw_rresp_err",
212 	"tpc_hbw_bresp_err",
213 	"tpc_lbw_rresp_err",
214 	"tpc_lbw_bresp_err"
215 };
216 
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219 	"PQ AXI HBW error",
220 	"CQ AXI HBW error",
221 	"CP AXI HBW error",
222 	"CP error due to undefined OPCODE",
223 	"CP encountered STOP OPCODE",
224 	"CP AXI LBW error",
225 	"CP WRREG32 or WRBULK returned error",
226 	"N/A",
227 	"FENCE 0 inc over max value and clipped",
228 	"FENCE 1 inc over max value and clipped",
229 	"FENCE 2 inc over max value and clipped",
230 	"FENCE 3 inc over max value and clipped",
231 	"FENCE 0 dec under min value and clipped",
232 	"FENCE 1 dec under min value and clipped",
233 	"FENCE 2 dec under min value and clipped",
234 	"FENCE 3 dec under min value and clipped"
235 };
236 
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239 	"Choice push while full error",
240 	"Choice Q watchdog error",
241 	"MSG AXI LBW returned with error"
242 };
243 
244 enum gaudi_sm_sei_cause {
245 	GAUDI_SM_SEI_SO_OVERFLOW,
246 	GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247 	GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249 
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365 
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395 
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409 
410 static s64 gaudi_state_dump_specs_props[] = {
411 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414 	[SP_MON_OBJ_WR_ADDR_LOW] =
415 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416 	[SP_MON_OBJ_WR_ADDR_HIGH] =
417 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438 	[SP_FENCE0_CNT_OFFSET] =
439 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440 	[SP_FENCE0_RDATA_OFFSET] =
441 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443 	[SP_NUM_CORES] = 1,
444 };
445 
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450 	"SYNC_MGR_E_N",
451 	"SYNC_MGR_W_N",
452 	"SYNC_MGR_E_S",
453 	"SYNC_MGR_W_S",
454 	NULL
455 };
456 
457 struct ecc_info_extract_params {
458 	u64 block_address;
459 	u32 num_memories;
460 	bool derr;
461 	bool disable_clock_gating;
462 };
463 
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465 								u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467 					struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469 					u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471 					u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473 				u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479 				u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481 				struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486 		return HL_COLLECTIVE_MASTER;
487 
488 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490 		return HL_COLLECTIVE_SLAVE;
491 
492 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494 		return HL_COLLECTIVE_SLAVE;
495 
496 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498 		return HL_COLLECTIVE_SLAVE;
499 
500 	return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502 
set_default_power_values(struct hl_device * hdev)503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505 	struct asic_fixed_properties *prop = &hdev->asic_prop;
506 
507 	if (hdev->card_type == cpucp_card_type_pmc) {
508 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509 
510 		if (prop->fw_security_enabled)
511 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512 		else
513 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514 	} else {
515 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517 	}
518 }
519 
gaudi_set_fixed_properties(struct hl_device * hdev)520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522 	struct asic_fixed_properties *prop = &hdev->asic_prop;
523 	u32 num_sync_stream_queues = 0;
524 	int i;
525 
526 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527 	prop->hw_queues_props = kcalloc(prop->max_queues,
528 			sizeof(struct hw_queue_properties),
529 			GFP_KERNEL);
530 
531 	if (!prop->hw_queues_props)
532 		return -ENOMEM;
533 
534 	for (i = 0 ; i < prop->max_queues ; i++) {
535 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537 			prop->hw_queues_props[i].driver_only = 0;
538 			prop->hw_queues_props[i].supports_sync_stream = 1;
539 			prop->hw_queues_props[i].cb_alloc_flags =
540 				CB_ALLOC_KERNEL;
541 			num_sync_stream_queues++;
542 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544 			prop->hw_queues_props[i].driver_only = 1;
545 			prop->hw_queues_props[i].supports_sync_stream = 0;
546 			prop->hw_queues_props[i].cb_alloc_flags =
547 				CB_ALLOC_KERNEL;
548 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550 			prop->hw_queues_props[i].driver_only = 0;
551 			prop->hw_queues_props[i].supports_sync_stream = 0;
552 			prop->hw_queues_props[i].cb_alloc_flags =
553 				CB_ALLOC_USER;
554 
555 		}
556 		prop->hw_queues_props[i].collective_mode =
557 						get_collective_mode(hdev, i);
558 	}
559 
560 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562 	prop->collective_first_sob = 0;
563 	prop->collective_first_mon = 0;
564 
565 	/* 2 SOBs per internal queue stream are reserved for collective */
566 	prop->sync_stream_first_sob =
567 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568 			* QMAN_STREAMS * HL_RSVD_SOBS;
569 
570 	/* 1 monitor per internal queue stream are reserved for collective
571 	 * 2 monitors per external queue stream are reserved for collective
572 	 */
573 	prop->sync_stream_first_mon =
574 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575 			(NUMBER_OF_EXT_HW_QUEUES * 2);
576 
577 	prop->dram_base_address = DRAM_PHYS_BASE;
578 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
579 	prop->dram_end_address = prop->dram_base_address +
580 					prop->dram_size;
581 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582 
583 	prop->sram_base_address = SRAM_BASE_ADDR;
584 	prop->sram_size = SRAM_SIZE;
585 	prop->sram_end_address = prop->sram_base_address +
586 					prop->sram_size;
587 	prop->sram_user_base_address = prop->sram_base_address +
588 					SRAM_USER_BASE_OFFSET;
589 
590 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591 	if (hdev->pldm)
592 		prop->mmu_pgt_size = 0x800000; /* 8MB */
593 	else
594 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595 	prop->mmu_pte_size = HL_PTE_SIZE;
596 	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598 	prop->dram_page_size = PAGE_SIZE_2MB;
599 	prop->dram_supports_virtual_memory = false;
600 
601 	prop->pmmu.hop0_shift = HOP0_SHIFT;
602 	prop->pmmu.hop1_shift = HOP1_SHIFT;
603 	prop->pmmu.hop2_shift = HOP2_SHIFT;
604 	prop->pmmu.hop3_shift = HOP3_SHIFT;
605 	prop->pmmu.hop4_shift = HOP4_SHIFT;
606 	prop->pmmu.hop0_mask = HOP0_MASK;
607 	prop->pmmu.hop1_mask = HOP1_MASK;
608 	prop->pmmu.hop2_mask = HOP2_MASK;
609 	prop->pmmu.hop3_mask = HOP3_MASK;
610 	prop->pmmu.hop4_mask = HOP4_MASK;
611 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
612 	prop->pmmu.end_addr =
613 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614 	prop->pmmu.page_size = PAGE_SIZE_4KB;
615 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616 
617 	/* PMMU and HPMMU are the same except of page size */
618 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620 
621 	/* shifts and masks are the same in PMMU and DMMU */
622 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
625 	prop->dmmu.page_size = PAGE_SIZE_2MB;
626 
627 	prop->cfg_size = CFG_SIZE;
628 	prop->max_asid = MAX_ASID;
629 	prop->num_of_events = GAUDI_EVENT_SIZE;
630 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631 
632 	set_default_power_values(hdev);
633 
634 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636 
637 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639 
640 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641 					CARD_NAME_MAX_LEN);
642 
643 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644 
645 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646 			prop->sync_stream_first_sob +
647 			(num_sync_stream_queues * HL_RSVD_SOBS);
648 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649 			prop->sync_stream_first_mon +
650 			(num_sync_stream_queues * HL_RSVD_MONS);
651 
652 	prop->first_available_user_msix_interrupt = USHRT_MAX;
653 
654 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
655 		prop->first_available_cq[i] = USHRT_MAX;
656 
657 	prop->fw_cpu_boot_dev_sts0_valid = false;
658 	prop->fw_cpu_boot_dev_sts1_valid = false;
659 	prop->hard_reset_done_by_fw = false;
660 	prop->gic_interrupts_enable = true;
661 
662 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663 
664 	return 0;
665 }
666 
gaudi_pci_bars_map(struct hl_device * hdev)667 static int gaudi_pci_bars_map(struct hl_device *hdev)
668 {
669 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
670 	bool is_wc[3] = {false, false, true};
671 	int rc;
672 
673 	rc = hl_pci_bars_map(hdev, name, is_wc);
674 	if (rc)
675 		return rc;
676 
677 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
678 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
679 
680 	return 0;
681 }
682 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)683 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
684 {
685 	struct gaudi_device *gaudi = hdev->asic_specific;
686 	struct hl_inbound_pci_region pci_region;
687 	u64 old_addr = addr;
688 	int rc;
689 
690 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
691 		return old_addr;
692 
693 	if (hdev->asic_prop.iatu_done_by_fw)
694 		return U64_MAX;
695 
696 	/* Inbound Region 2 - Bar 4 - Point to HBM */
697 	pci_region.mode = PCI_BAR_MATCH_MODE;
698 	pci_region.bar = HBM_BAR_ID;
699 	pci_region.addr = addr;
700 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
701 	if (rc)
702 		return U64_MAX;
703 
704 	if (gaudi) {
705 		old_addr = gaudi->hbm_bar_cur_addr;
706 		gaudi->hbm_bar_cur_addr = addr;
707 	}
708 
709 	return old_addr;
710 }
711 
gaudi_init_iatu(struct hl_device * hdev)712 static int gaudi_init_iatu(struct hl_device *hdev)
713 {
714 	struct hl_inbound_pci_region inbound_region;
715 	struct hl_outbound_pci_region outbound_region;
716 	int rc;
717 
718 	if (hdev->asic_prop.iatu_done_by_fw)
719 		return 0;
720 
721 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
722 	inbound_region.mode = PCI_BAR_MATCH_MODE;
723 	inbound_region.bar = SRAM_BAR_ID;
724 	inbound_region.addr = SRAM_BASE_ADDR;
725 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
726 	if (rc)
727 		goto done;
728 
729 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
730 	inbound_region.mode = PCI_BAR_MATCH_MODE;
731 	inbound_region.bar = CFG_BAR_ID;
732 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
733 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
734 	if (rc)
735 		goto done;
736 
737 	/* Inbound Region 2 - Bar 4 - Point to HBM */
738 	inbound_region.mode = PCI_BAR_MATCH_MODE;
739 	inbound_region.bar = HBM_BAR_ID;
740 	inbound_region.addr = DRAM_PHYS_BASE;
741 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
742 	if (rc)
743 		goto done;
744 
745 	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
746 
747 	/* Outbound Region 0 - Point to Host */
748 	outbound_region.addr = HOST_PHYS_BASE;
749 	outbound_region.size = HOST_PHYS_SIZE;
750 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
751 
752 done:
753 	return rc;
754 }
755 
gaudi_get_hw_state(struct hl_device * hdev)756 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
757 {
758 	return RREG32(mmHW_STATE);
759 }
760 
gaudi_early_init(struct hl_device * hdev)761 static int gaudi_early_init(struct hl_device *hdev)
762 {
763 	struct asic_fixed_properties *prop = &hdev->asic_prop;
764 	struct pci_dev *pdev = hdev->pdev;
765 	u32 fw_boot_status;
766 	int rc;
767 
768 	rc = gaudi_set_fixed_properties(hdev);
769 	if (rc) {
770 		dev_err(hdev->dev, "Failed setting fixed properties\n");
771 		return rc;
772 	}
773 
774 	/* Check BAR sizes */
775 	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
776 		dev_err(hdev->dev,
777 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
778 			SRAM_BAR_ID,
779 			(unsigned long long) pci_resource_len(pdev,
780 							SRAM_BAR_ID),
781 			SRAM_BAR_SIZE);
782 		rc = -ENODEV;
783 		goto free_queue_props;
784 	}
785 
786 	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
787 		dev_err(hdev->dev,
788 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
789 			CFG_BAR_ID,
790 			(unsigned long long) pci_resource_len(pdev,
791 								CFG_BAR_ID),
792 			CFG_BAR_SIZE);
793 		rc = -ENODEV;
794 		goto free_queue_props;
795 	}
796 
797 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
798 
799 	/* If FW security is enabled at this point it means no access to ELBI */
800 	if (hdev->asic_prop.fw_security_enabled) {
801 		hdev->asic_prop.iatu_done_by_fw = true;
802 
803 		/*
804 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
805 		 * decision can only be taken based on PCI ID security.
806 		 */
807 		hdev->asic_prop.gic_interrupts_enable = false;
808 		goto pci_init;
809 	}
810 
811 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
812 				&fw_boot_status);
813 	if (rc)
814 		goto free_queue_props;
815 
816 	/* Check whether FW is configuring iATU */
817 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
818 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
819 		hdev->asic_prop.iatu_done_by_fw = true;
820 
821 pci_init:
822 	rc = hl_pci_init(hdev);
823 	if (rc)
824 		goto free_queue_props;
825 
826 	/* Before continuing in the initialization, we need to read the preboot
827 	 * version to determine whether we run with a security-enabled firmware
828 	 */
829 	rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
830 					mmCPU_BOOT_DEV_STS0,
831 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
832 					mmCPU_BOOT_ERR1,
833 					GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
834 	if (rc) {
835 		if (hdev->reset_on_preboot_fail)
836 			hdev->asic_funcs->hw_fini(hdev, true, false);
837 		goto pci_fini;
838 	}
839 
840 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
841 		dev_info(hdev->dev,
842 			"H/W state is dirty, must reset before initializing\n");
843 		hdev->asic_funcs->hw_fini(hdev, true, false);
844 	}
845 
846 	return 0;
847 
848 pci_fini:
849 	hl_pci_fini(hdev);
850 free_queue_props:
851 	kfree(hdev->asic_prop.hw_queues_props);
852 	return rc;
853 }
854 
gaudi_early_fini(struct hl_device * hdev)855 static int gaudi_early_fini(struct hl_device *hdev)
856 {
857 	kfree(hdev->asic_prop.hw_queues_props);
858 	hl_pci_fini(hdev);
859 
860 	return 0;
861 }
862 
863 /**
864  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
865  *
866  * @hdev: pointer to hl_device structure
867  *
868  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)869 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
870 {
871 	struct asic_fixed_properties *prop = &hdev->asic_prop;
872 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
873 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
874 	int rc;
875 
876 	if (hdev->asic_prop.fw_security_enabled) {
877 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
878 
879 		if (rc)
880 			return rc;
881 
882 		freq = pll_freq_arr[2];
883 	} else {
884 		/* Backward compatibility */
885 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
886 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
887 		nr = RREG32(mmPSOC_CPU_PLL_NR);
888 		nf = RREG32(mmPSOC_CPU_PLL_NF);
889 		od = RREG32(mmPSOC_CPU_PLL_OD);
890 
891 		if (div_sel == DIV_SEL_REF_CLK ||
892 				div_sel == DIV_SEL_DIVIDED_REF) {
893 			if (div_sel == DIV_SEL_REF_CLK)
894 				freq = PLL_REF_CLK;
895 			else
896 				freq = PLL_REF_CLK / (div_fctr + 1);
897 		} else if (div_sel == DIV_SEL_PLL_CLK ||
898 			div_sel == DIV_SEL_DIVIDED_PLL) {
899 			pll_clk = PLL_REF_CLK * (nf + 1) /
900 					((nr + 1) * (od + 1));
901 			if (div_sel == DIV_SEL_PLL_CLK)
902 				freq = pll_clk;
903 			else
904 				freq = pll_clk / (div_fctr + 1);
905 		} else {
906 			dev_warn(hdev->dev,
907 				"Received invalid div select value: %d",
908 				div_sel);
909 			freq = 0;
910 		}
911 	}
912 
913 	prop->psoc_timestamp_frequency = freq;
914 	prop->psoc_pci_pll_nr = nr;
915 	prop->psoc_pci_pll_nf = nf;
916 	prop->psoc_pci_pll_od = od;
917 	prop->psoc_pci_pll_div_factor = div_fctr;
918 
919 	return 0;
920 }
921 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)922 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
923 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
924 {
925 	struct asic_fixed_properties *prop = &hdev->asic_prop;
926 	struct packet_lin_dma *init_tpc_mem_pkt;
927 	struct hl_cs_job *job;
928 	struct hl_cb *cb;
929 	u64 dst_addr;
930 	u32 cb_size, ctl;
931 	u8 tpc_id;
932 	int rc;
933 
934 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
935 	if (!cb)
936 		return -EFAULT;
937 
938 	init_tpc_mem_pkt = cb->kernel_address;
939 	cb_size = sizeof(*init_tpc_mem_pkt);
940 	memset(init_tpc_mem_pkt, 0, cb_size);
941 
942 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
943 
944 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
945 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
946 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
947 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
948 
949 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
950 
951 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
952 	dst_addr = (prop->sram_user_base_address &
953 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
954 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
955 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
956 
957 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
958 	if (!job) {
959 		dev_err(hdev->dev, "Failed to allocate a new job\n");
960 		rc = -ENOMEM;
961 		goto release_cb;
962 	}
963 
964 	job->id = 0;
965 	job->user_cb = cb;
966 	atomic_inc(&job->user_cb->cs_cnt);
967 	job->user_cb_size = cb_size;
968 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
969 	job->patched_cb = job->user_cb;
970 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
971 
972 	hl_debugfs_add_job(hdev, job);
973 
974 	rc = gaudi_send_job_on_qman0(hdev, job);
975 
976 	if (rc)
977 		goto free_job;
978 
979 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
980 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
981 		if (rc)
982 			break;
983 	}
984 
985 free_job:
986 	hl_userptr_delete_list(hdev, &job->userptr_list);
987 	hl_debugfs_remove_job(hdev, job);
988 	kfree(job);
989 	atomic_dec(&cb->cs_cnt);
990 
991 release_cb:
992 	hl_cb_put(cb);
993 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
994 
995 	return rc;
996 }
997 
998 /*
999  * gaudi_init_tpc_mem() - Initialize TPC memories.
1000  * @hdev: Pointer to hl_device structure.
1001  *
1002  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1003  *
1004  * Return: 0 for success, negative value for error.
1005  */
gaudi_init_tpc_mem(struct hl_device * hdev)1006 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1007 {
1008 	const struct firmware *fw;
1009 	size_t fw_size;
1010 	void *cpu_addr;
1011 	dma_addr_t dma_handle;
1012 	int rc, count = 5;
1013 
1014 again:
1015 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1016 	if (rc == -EINTR && count-- > 0) {
1017 		msleep(50);
1018 		goto again;
1019 	}
1020 
1021 	if (rc) {
1022 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1023 				GAUDI_TPC_FW_FILE);
1024 		goto out;
1025 	}
1026 
1027 	fw_size = fw->size;
1028 	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1029 			&dma_handle, GFP_KERNEL | __GFP_ZERO);
1030 	if (!cpu_addr) {
1031 		dev_err(hdev->dev,
1032 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1033 			fw_size);
1034 		rc = -ENOMEM;
1035 		goto out;
1036 	}
1037 
1038 	memcpy(cpu_addr, fw->data, fw_size);
1039 
1040 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1041 
1042 	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1043 			dma_handle);
1044 
1045 out:
1046 	release_firmware(fw);
1047 	return rc;
1048 }
1049 
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1050 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1051 {
1052 	struct gaudi_device *gaudi = hdev->asic_specific;
1053 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1054 	struct hl_hw_queue *q;
1055 	u32 i, sob_id, sob_group_id, queue_id;
1056 
1057 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1058 	sob_group_id =
1059 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1060 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1061 
1062 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1063 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1064 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1065 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1066 	}
1067 
1068 	/* Both DMA5 and TPC7 use the same resources since only a single
1069 	 * engine need to participate in the reduction process
1070 	 */
1071 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1072 	q = &hdev->kernel_queues[queue_id];
1073 	q->sync_stream_prop.collective_sob_id =
1074 			sob_id + NIC_NUMBER_OF_ENGINES;
1075 
1076 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1077 	q = &hdev->kernel_queues[queue_id];
1078 	q->sync_stream_prop.collective_sob_id =
1079 			sob_id + NIC_NUMBER_OF_ENGINES;
1080 }
1081 
gaudi_sob_group_hw_reset(struct kref * ref)1082 static void gaudi_sob_group_hw_reset(struct kref *ref)
1083 {
1084 	struct gaudi_hw_sob_group *hw_sob_group =
1085 		container_of(ref, struct gaudi_hw_sob_group, kref);
1086 	struct hl_device *hdev = hw_sob_group->hdev;
1087 	int i;
1088 
1089 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1090 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1091 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1092 
1093 	kref_init(&hw_sob_group->kref);
1094 }
1095 
gaudi_sob_group_reset_error(struct kref * ref)1096 static void gaudi_sob_group_reset_error(struct kref *ref)
1097 {
1098 	struct gaudi_hw_sob_group *hw_sob_group =
1099 		container_of(ref, struct gaudi_hw_sob_group, kref);
1100 	struct hl_device *hdev = hw_sob_group->hdev;
1101 
1102 	dev_crit(hdev->dev,
1103 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1104 		hw_sob_group->base_sob_id);
1105 }
1106 
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1107 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1108 {
1109 	struct gaudi_collective_properties *prop;
1110 	int i;
1111 
1112 	prop = &gaudi->collective_props;
1113 
1114 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1115 
1116 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1117 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1118 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1119 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1120 	/* Set collective engine bit */
1121 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1122 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1123 }
1124 
gaudi_collective_init(struct hl_device * hdev)1125 static int gaudi_collective_init(struct hl_device *hdev)
1126 {
1127 	u32 i, sob_id, reserved_sobs_per_group;
1128 	struct gaudi_collective_properties *prop;
1129 	struct gaudi_device *gaudi;
1130 
1131 	gaudi = hdev->asic_specific;
1132 	prop = &gaudi->collective_props;
1133 	sob_id = hdev->asic_prop.collective_first_sob;
1134 
1135 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1136 	reserved_sobs_per_group =
1137 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1138 
1139 	/* Init SOB groups */
1140 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1141 		prop->hw_sob_group[i].hdev = hdev;
1142 		prop->hw_sob_group[i].base_sob_id = sob_id;
1143 		sob_id += reserved_sobs_per_group;
1144 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1145 	}
1146 
1147 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1148 		prop->next_sob_group_val[i] = 1;
1149 		prop->curr_sob_group_idx[i] = 0;
1150 		gaudi_collective_map_sobs(hdev, i);
1151 	}
1152 
1153 	gaudi_collective_mstr_sob_mask_set(gaudi);
1154 
1155 	return 0;
1156 }
1157 
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1158 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1159 {
1160 	struct gaudi_device *gaudi = hdev->asic_specific;
1161 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1162 
1163 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1164 					gaudi_sob_group_hw_reset);
1165 }
1166 
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1167 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1168 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1169 {
1170 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1171 	struct gaudi_collective_properties *cprop;
1172 	struct hl_gen_wait_properties wait_prop;
1173 	struct hl_sync_stream_properties *prop;
1174 	struct gaudi_device *gaudi;
1175 
1176 	gaudi = hdev->asic_specific;
1177 	cprop = &gaudi->collective_props;
1178 	queue_id = job->hw_queue_id;
1179 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1180 
1181 	master_sob_base =
1182 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1183 	master_monitor = prop->collective_mstr_mon_id[0];
1184 
1185 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1186 
1187 	dev_dbg(hdev->dev,
1188 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1189 		master_sob_base, cprop->mstr_sob_mask[0],
1190 		cprop->next_sob_group_val[stream],
1191 		master_monitor, queue_id);
1192 
1193 	wait_prop.data = (void *) job->patched_cb;
1194 	wait_prop.sob_base = master_sob_base;
1195 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1196 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1197 	wait_prop.mon_id = master_monitor;
1198 	wait_prop.q_idx = queue_id;
1199 	wait_prop.size = cb_size;
1200 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201 
1202 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1203 	master_monitor = prop->collective_mstr_mon_id[1];
1204 
1205 	dev_dbg(hdev->dev,
1206 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207 		master_sob_base, cprop->mstr_sob_mask[1],
1208 		cprop->next_sob_group_val[stream],
1209 		master_monitor, queue_id);
1210 
1211 	wait_prop.sob_base = master_sob_base;
1212 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1213 	wait_prop.mon_id = master_monitor;
1214 	wait_prop.size = cb_size;
1215 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 }
1217 
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1218 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1219 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1220 {
1221 	struct hl_gen_wait_properties wait_prop;
1222 	struct hl_sync_stream_properties *prop;
1223 	u32 queue_id, cb_size = 0;
1224 
1225 	queue_id = job->hw_queue_id;
1226 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1227 
1228 	if (job->cs->encaps_signals) {
1229 		/* use the encaps signal handle store earlier in the flow
1230 		 * and set the SOB information from the encaps
1231 		 * signals handle
1232 		 */
1233 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1234 						cs_cmpl);
1235 
1236 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1237 				job->cs->sequence,
1238 				cs_cmpl->hw_sob->sob_id,
1239 				cs_cmpl->sob_val);
1240 	}
1241 
1242 	/* Add to wait CBs using slave monitor */
1243 	wait_prop.data = (void *) job->user_cb;
1244 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1245 	wait_prop.sob_mask = 0x1;
1246 	wait_prop.sob_val = cs_cmpl->sob_val;
1247 	wait_prop.mon_id = prop->collective_slave_mon_id;
1248 	wait_prop.q_idx = queue_id;
1249 	wait_prop.size = cb_size;
1250 
1251 	dev_dbg(hdev->dev,
1252 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1253 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1254 		prop->collective_slave_mon_id, queue_id);
1255 
1256 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257 
1258 	dev_dbg(hdev->dev,
1259 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1260 		prop->collective_sob_id, queue_id);
1261 
1262 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1263 			prop->collective_sob_id, cb_size, false);
1264 }
1265 
gaudi_collective_wait_init_cs(struct hl_cs * cs)1266 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1267 {
1268 	struct hl_cs_compl *signal_cs_cmpl =
1269 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1270 	struct hl_cs_compl *cs_cmpl =
1271 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1272 	struct gaudi_collective_properties *cprop;
1273 	u32 stream, queue_id, sob_group_offset;
1274 	struct gaudi_device *gaudi;
1275 	struct hl_device *hdev;
1276 	struct hl_cs_job *job;
1277 	struct hl_ctx *ctx;
1278 
1279 	ctx = cs->ctx;
1280 	hdev = ctx->hdev;
1281 	gaudi = hdev->asic_specific;
1282 	cprop = &gaudi->collective_props;
1283 
1284 	/* In encaps signals case the SOB info will be retrieved from
1285 	 * the handle in gaudi_collective_slave_init_job.
1286 	 */
1287 	if (!cs->encaps_signals) {
1288 		/* copy the SOB id and value of the signal CS */
1289 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1290 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1291 	}
1292 
1293 	/* check again if the signal cs already completed.
1294 	 * if yes then don't send any wait cs since the hw_sob
1295 	 * could be in reset already. if signal is not completed
1296 	 * then get refcount to hw_sob to prevent resetting the sob
1297 	 * while wait cs is not submitted.
1298 	 * note that this check is protected by two locks,
1299 	 * hw queue lock and completion object lock,
1300 	 * and the same completion object lock also protects
1301 	 * the hw_sob reset handler function.
1302 	 * The hw_queue lock prevent out of sync of hw_sob
1303 	 * refcount value, changed by signal/wait flows.
1304 	 */
1305 	spin_lock(&signal_cs_cmpl->lock);
1306 
1307 	if (completion_done(&cs->signal_fence->completion)) {
1308 		spin_unlock(&signal_cs_cmpl->lock);
1309 		return -EINVAL;
1310 	}
1311 	/* Increment kref since all slave queues are now waiting on it */
1312 	kref_get(&cs_cmpl->hw_sob->kref);
1313 
1314 	spin_unlock(&signal_cs_cmpl->lock);
1315 
1316 	/* Calculate the stream from collective master queue (1st job) */
1317 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1318 	stream = job->hw_queue_id % 4;
1319 	sob_group_offset =
1320 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1321 
1322 	list_for_each_entry(job, &cs->job_list, cs_node) {
1323 		queue_id = job->hw_queue_id;
1324 
1325 		if (hdev->kernel_queues[queue_id].collective_mode ==
1326 				HL_COLLECTIVE_MASTER)
1327 			gaudi_collective_master_init_job(hdev, job, stream,
1328 						sob_group_offset);
1329 		else
1330 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1331 	}
1332 
1333 	cs_cmpl->sob_group = sob_group_offset;
1334 
1335 	/* Handle sob group kref and wraparound */
1336 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1337 	cprop->next_sob_group_val[stream]++;
1338 
1339 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1340 		/*
1341 		 * Decrement as we reached the max value.
1342 		 * The release function won't be called here as we've
1343 		 * just incremented the refcount.
1344 		 */
1345 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1346 				gaudi_sob_group_reset_error);
1347 		cprop->next_sob_group_val[stream] = 1;
1348 		/* only two SOBs are currently in use */
1349 		cprop->curr_sob_group_idx[stream] =
1350 			(cprop->curr_sob_group_idx[stream] + 1) &
1351 							(HL_RSVD_SOBS - 1);
1352 
1353 		gaudi_collective_map_sobs(hdev, stream);
1354 
1355 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1356 				cprop->curr_sob_group_idx[stream], stream);
1357 	}
1358 
1359 	mb();
1360 	hl_fence_put(cs->signal_fence);
1361 	cs->signal_fence = NULL;
1362 
1363 	return 0;
1364 }
1365 
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1366 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1367 		struct hl_ctx *ctx, struct hl_cs *cs,
1368 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1369 		u32 encaps_signal_offset)
1370 {
1371 	struct hw_queue_properties *hw_queue_prop;
1372 	struct hl_cs_counters_atomic *cntr;
1373 	struct hl_cs_job *job;
1374 	struct hl_cb *cb;
1375 	u32 cb_size;
1376 	bool patched_cb;
1377 
1378 	cntr = &hdev->aggregated_cs_counters;
1379 
1380 	if (mode == HL_COLLECTIVE_MASTER) {
1381 		/* CB size of collective master queue contains
1382 		 * 4 msg short packets for monitor 1 configuration
1383 		 * 1 fence packet
1384 		 * 4 msg short packets for monitor 2 configuration
1385 		 * 1 fence packet
1386 		 * 2 msg prot packets for completion and MSI-X
1387 		 */
1388 		cb_size = sizeof(struct packet_msg_short) * 8 +
1389 				sizeof(struct packet_fence) * 2 +
1390 				sizeof(struct packet_msg_prot) * 2;
1391 		patched_cb = true;
1392 	} else {
1393 		/* CB size of collective slave queues contains
1394 		 * 4 msg short packets for monitor configuration
1395 		 * 1 fence packet
1396 		 * 1 additional msg short packet for sob signal
1397 		 */
1398 		cb_size = sizeof(struct packet_msg_short) * 5 +
1399 				sizeof(struct packet_fence);
1400 		patched_cb = false;
1401 	}
1402 
1403 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1404 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1405 	if (!job) {
1406 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1407 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1408 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1409 		return -ENOMEM;
1410 	}
1411 
1412 	/* Allocate internal mapped CB for non patched CBs */
1413 	cb = hl_cb_kernel_create(hdev, cb_size,
1414 			hdev->mmu_enable && !patched_cb);
1415 	if (!cb) {
1416 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1417 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1418 		kfree(job);
1419 		return -EFAULT;
1420 	}
1421 
1422 	job->id = 0;
1423 	job->cs = cs;
1424 	job->user_cb = cb;
1425 	atomic_inc(&job->user_cb->cs_cnt);
1426 	job->user_cb_size = cb_size;
1427 	job->hw_queue_id = queue_id;
1428 
1429 	/* since its guaranteed to have only one chunk in the collective wait
1430 	 * cs, we can use this chunk to set the encapsulated signal offset
1431 	 * in the jobs.
1432 	 */
1433 	if (cs->encaps_signals)
1434 		job->encaps_sig_wait_offset = encaps_signal_offset;
1435 
1436 	/*
1437 	 * No need in parsing, user CB is the patched CB.
1438 	 * We call hl_cb_destroy() out of two reasons - we don't need
1439 	 * the CB in the CB idr anymore and to decrement its refcount as
1440 	 * it was incremented inside hl_cb_kernel_create().
1441 	 */
1442 	if (patched_cb)
1443 		job->patched_cb = job->user_cb;
1444 	else
1445 		job->patched_cb = NULL;
1446 
1447 	job->job_cb_size = job->user_cb_size;
1448 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1449 
1450 	/* increment refcount as for external queues we get completion */
1451 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1452 		cs_get(cs);
1453 
1454 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1455 
1456 	list_add_tail(&job->cs_node, &cs->job_list);
1457 
1458 	hl_debugfs_add_job(hdev, job);
1459 
1460 	return 0;
1461 }
1462 
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1463 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1464 		struct hl_ctx *ctx, struct hl_cs *cs,
1465 		u32 wait_queue_id, u32 collective_engine_id,
1466 		u32 encaps_signal_offset)
1467 {
1468 	struct gaudi_device *gaudi = hdev->asic_specific;
1469 	struct hw_queue_properties *hw_queue_prop;
1470 	u32 queue_id, collective_queue, num_jobs;
1471 	u32 stream, nic_queue, nic_idx = 0;
1472 	bool skip;
1473 	int i, rc = 0;
1474 
1475 	/* Verify wait queue id is configured as master */
1476 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1477 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1478 		dev_err(hdev->dev,
1479 			"Queue %d is not configured as collective master\n",
1480 			wait_queue_id);
1481 		return -EINVAL;
1482 	}
1483 
1484 	/* Verify engine id is supported */
1485 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1486 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1487 		dev_err(hdev->dev,
1488 			"Collective wait does not support engine %u\n",
1489 			collective_engine_id);
1490 		return -EINVAL;
1491 	}
1492 
1493 	stream = wait_queue_id % 4;
1494 
1495 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1496 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1497 	else
1498 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1499 
1500 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1501 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1502 
1503 	/* First job goes to the collective master queue, it will wait for
1504 	 * the collective slave queues to finish execution.
1505 	 * The synchronization is done using two monitors:
1506 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1507 	 * reduction engine (DMA5/TPC7).
1508 	 *
1509 	 * Rest of the jobs goes to the collective slave queues which will
1510 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1511 	 */
1512 	for (i = 0 ; i < num_jobs ; i++) {
1513 		if (i == 0) {
1514 			queue_id = wait_queue_id;
1515 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1516 				HL_COLLECTIVE_MASTER, queue_id,
1517 				wait_queue_id, encaps_signal_offset);
1518 		} else {
1519 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1520 				if (gaudi->hw_cap_initialized &
1521 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1522 					skip = false;
1523 				else
1524 					skip = true;
1525 
1526 				queue_id = nic_queue;
1527 				nic_queue += 4;
1528 				nic_idx++;
1529 
1530 				if (skip)
1531 					continue;
1532 			} else {
1533 				queue_id = collective_queue;
1534 			}
1535 
1536 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537 				HL_COLLECTIVE_SLAVE, queue_id,
1538 				wait_queue_id, encaps_signal_offset);
1539 		}
1540 
1541 		if (rc)
1542 			return rc;
1543 	}
1544 
1545 	return rc;
1546 }
1547 
gaudi_late_init(struct hl_device * hdev)1548 static int gaudi_late_init(struct hl_device *hdev)
1549 {
1550 	struct gaudi_device *gaudi = hdev->asic_specific;
1551 	int rc;
1552 
1553 	rc = gaudi->cpucp_info_get(hdev);
1554 	if (rc) {
1555 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1556 		return rc;
1557 	}
1558 
1559 	if ((hdev->card_type == cpucp_card_type_pci) &&
1560 			(hdev->nic_ports_mask & 0x3)) {
1561 		dev_info(hdev->dev,
1562 			"PCI card detected, only 8 ports are enabled\n");
1563 		hdev->nic_ports_mask &= ~0x3;
1564 
1565 		/* Stop and disable unused NIC QMANs */
1566 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1567 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1568 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1569 
1570 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573 
1574 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1575 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1576 
1577 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1578 	}
1579 
1580 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1581 	if (rc) {
1582 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1583 		return rc;
1584 	}
1585 
1586 	/* Scrub both SRAM and DRAM */
1587 	rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1588 	if (rc)
1589 		goto disable_pci_access;
1590 
1591 	rc = gaudi_fetch_psoc_frequency(hdev);
1592 	if (rc) {
1593 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1594 		goto disable_pci_access;
1595 	}
1596 
1597 	rc = gaudi_mmu_clear_pgt_range(hdev);
1598 	if (rc) {
1599 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1600 		goto disable_pci_access;
1601 	}
1602 
1603 	rc = gaudi_init_tpc_mem(hdev);
1604 	if (rc) {
1605 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1606 		goto disable_pci_access;
1607 	}
1608 
1609 	rc = gaudi_collective_init(hdev);
1610 	if (rc) {
1611 		dev_err(hdev->dev, "Failed to init collective\n");
1612 		goto disable_pci_access;
1613 	}
1614 
1615 	/* We only support a single ASID for the user, so for the sake of optimization, just
1616 	 * initialize the ASID one time during device initialization with the fixed value of 1
1617 	 */
1618 	gaudi_mmu_prepare(hdev, 1);
1619 
1620 	return 0;
1621 
1622 disable_pci_access:
1623 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1624 
1625 	return rc;
1626 }
1627 
gaudi_late_fini(struct hl_device * hdev)1628 static void gaudi_late_fini(struct hl_device *hdev)
1629 {
1630 	const struct hwmon_channel_info **channel_info_arr;
1631 	int i = 0;
1632 
1633 	if (!hdev->hl_chip_info->info)
1634 		return;
1635 
1636 	channel_info_arr = hdev->hl_chip_info->info;
1637 
1638 	while (channel_info_arr[i]) {
1639 		kfree(channel_info_arr[i]->config);
1640 		kfree(channel_info_arr[i]);
1641 		i++;
1642 	}
1643 
1644 	kfree(channel_info_arr);
1645 
1646 	hdev->hl_chip_info->info = NULL;
1647 }
1648 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1649 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1650 {
1651 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1652 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1653 	int i, j, rc = 0;
1654 
1655 	/*
1656 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1657 	 * to '1' when accessing the host.
1658 	 * Bits 49:39 of the full host address are saved for a later
1659 	 * configuration of the HW to perform extension to 50 bits.
1660 	 * Because there is a single HW register that holds the extension bits,
1661 	 * these bits must be identical in all allocated range.
1662 	 */
1663 
1664 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1665 		virt_addr_arr[i] =
1666 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1667 						HL_CPU_ACCESSIBLE_MEM_SIZE,
1668 						&dma_addr_arr[i],
1669 						GFP_KERNEL | __GFP_ZERO);
1670 		if (!virt_addr_arr[i]) {
1671 			rc = -ENOMEM;
1672 			goto free_dma_mem_arr;
1673 		}
1674 
1675 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1676 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1677 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1678 			break;
1679 	}
1680 
1681 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1682 		dev_err(hdev->dev,
1683 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1684 		rc = -EFAULT;
1685 		goto free_dma_mem_arr;
1686 	}
1687 
1688 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1689 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1690 	hdev->cpu_pci_msb_addr =
1691 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1692 
1693 	if (!hdev->asic_prop.fw_security_enabled)
1694 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1695 
1696 free_dma_mem_arr:
1697 	for (j = 0 ; j < i ; j++)
1698 		hdev->asic_funcs->asic_dma_free_coherent(hdev,
1699 						HL_CPU_ACCESSIBLE_MEM_SIZE,
1700 						virt_addr_arr[j],
1701 						dma_addr_arr[j]);
1702 
1703 	return rc;
1704 }
1705 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1706 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1707 {
1708 	struct gaudi_device *gaudi = hdev->asic_specific;
1709 	struct gaudi_internal_qman_info *q;
1710 	u32 i;
1711 
1712 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1713 		q = &gaudi->internal_qmans[i];
1714 		if (!q->pq_kernel_addr)
1715 			continue;
1716 		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1717 							q->pq_kernel_addr,
1718 							q->pq_dma_addr);
1719 	}
1720 }
1721 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1722 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1723 {
1724 	struct gaudi_device *gaudi = hdev->asic_specific;
1725 	struct gaudi_internal_qman_info *q;
1726 	int rc, i;
1727 
1728 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1729 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1730 			continue;
1731 
1732 		q = &gaudi->internal_qmans[i];
1733 
1734 		switch (i) {
1735 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1736 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1737 			break;
1738 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1739 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1740 			break;
1741 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1742 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1743 			break;
1744 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1745 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1746 			break;
1747 		default:
1748 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1749 			rc = -EINVAL;
1750 			goto free_internal_qmans_pq_mem;
1751 		}
1752 
1753 		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1754 						hdev, q->pq_size,
1755 						&q->pq_dma_addr,
1756 						GFP_KERNEL | __GFP_ZERO);
1757 		if (!q->pq_kernel_addr) {
1758 			rc = -ENOMEM;
1759 			goto free_internal_qmans_pq_mem;
1760 		}
1761 	}
1762 
1763 	return 0;
1764 
1765 free_internal_qmans_pq_mem:
1766 	gaudi_free_internal_qmans_pq_mem(hdev);
1767 	return rc;
1768 }
1769 
gaudi_set_pci_memory_regions(struct hl_device * hdev)1770 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1771 {
1772 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1773 	struct pci_mem_region *region;
1774 
1775 	/* CFG */
1776 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1777 	region->region_base = CFG_BASE;
1778 	region->region_size = CFG_SIZE;
1779 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1780 	region->bar_size = CFG_BAR_SIZE;
1781 	region->bar_id = CFG_BAR_ID;
1782 	region->used = 1;
1783 
1784 	/* SRAM */
1785 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1786 	region->region_base = SRAM_BASE_ADDR;
1787 	region->region_size = SRAM_SIZE;
1788 	region->offset_in_bar = 0;
1789 	region->bar_size = SRAM_BAR_SIZE;
1790 	region->bar_id = SRAM_BAR_ID;
1791 	region->used = 1;
1792 
1793 	/* DRAM */
1794 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1795 	region->region_base = DRAM_PHYS_BASE;
1796 	region->region_size = hdev->asic_prop.dram_size;
1797 	region->offset_in_bar = 0;
1798 	region->bar_size = prop->dram_pci_bar_size;
1799 	region->bar_id = HBM_BAR_ID;
1800 	region->used = 1;
1801 
1802 	/* SP SRAM */
1803 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1804 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1805 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1806 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1807 	region->bar_size = CFG_BAR_SIZE;
1808 	region->bar_id = CFG_BAR_ID;
1809 	region->used = 1;
1810 }
1811 
gaudi_sw_init(struct hl_device * hdev)1812 static int gaudi_sw_init(struct hl_device *hdev)
1813 {
1814 	struct gaudi_device *gaudi;
1815 	u32 i, event_id = 0;
1816 	int rc;
1817 
1818 	/* Allocate device structure */
1819 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1820 	if (!gaudi)
1821 		return -ENOMEM;
1822 
1823 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1824 		if (gaudi_irq_map_table[i].valid) {
1825 			if (event_id == GAUDI_EVENT_SIZE) {
1826 				dev_err(hdev->dev,
1827 					"Event array exceeds the limit of %u events\n",
1828 					GAUDI_EVENT_SIZE);
1829 				rc = -EINVAL;
1830 				goto free_gaudi_device;
1831 			}
1832 
1833 			gaudi->events[event_id++] =
1834 					gaudi_irq_map_table[i].fc_id;
1835 		}
1836 	}
1837 
1838 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1839 
1840 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1841 
1842 	hdev->asic_specific = gaudi;
1843 
1844 	/* Create DMA pool for small allocations */
1845 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1846 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1847 	if (!hdev->dma_pool) {
1848 		dev_err(hdev->dev, "failed to create DMA pool\n");
1849 		rc = -ENOMEM;
1850 		goto free_gaudi_device;
1851 	}
1852 
1853 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1854 	if (rc)
1855 		goto free_dma_pool;
1856 
1857 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1858 	if (!hdev->cpu_accessible_dma_pool) {
1859 		dev_err(hdev->dev,
1860 			"Failed to create CPU accessible DMA pool\n");
1861 		rc = -ENOMEM;
1862 		goto free_cpu_dma_mem;
1863 	}
1864 
1865 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1866 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1867 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1868 	if (rc) {
1869 		dev_err(hdev->dev,
1870 			"Failed to add memory to CPU accessible DMA pool\n");
1871 		rc = -EFAULT;
1872 		goto free_cpu_accessible_dma_pool;
1873 	}
1874 
1875 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1876 	if (rc)
1877 		goto free_cpu_accessible_dma_pool;
1878 
1879 	spin_lock_init(&gaudi->hw_queues_lock);
1880 	mutex_init(&gaudi->clk_gate_mutex);
1881 
1882 	hdev->supports_sync_stream = true;
1883 	hdev->supports_coresight = true;
1884 	hdev->supports_staged_submission = true;
1885 	hdev->supports_wait_for_multi_cs = true;
1886 
1887 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1888 	hdev->stream_master_qid_arr =
1889 				hdev->asic_funcs->get_stream_master_qid_arr();
1890 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1891 
1892 	return 0;
1893 
1894 free_cpu_accessible_dma_pool:
1895 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1896 free_cpu_dma_mem:
1897 	if (!hdev->asic_prop.fw_security_enabled)
1898 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1899 					hdev->cpu_pci_msb_addr);
1900 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1901 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1902 			hdev->cpu_accessible_dma_mem,
1903 			hdev->cpu_accessible_dma_address);
1904 free_dma_pool:
1905 	dma_pool_destroy(hdev->dma_pool);
1906 free_gaudi_device:
1907 	kfree(gaudi);
1908 	return rc;
1909 }
1910 
gaudi_sw_fini(struct hl_device * hdev)1911 static int gaudi_sw_fini(struct hl_device *hdev)
1912 {
1913 	struct gaudi_device *gaudi = hdev->asic_specific;
1914 
1915 	gaudi_free_internal_qmans_pq_mem(hdev);
1916 
1917 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1918 
1919 	if (!hdev->asic_prop.fw_security_enabled)
1920 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1921 					hdev->cpu_pci_msb_addr);
1922 
1923 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1924 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1925 			hdev->cpu_accessible_dma_mem,
1926 			hdev->cpu_accessible_dma_address);
1927 
1928 	dma_pool_destroy(hdev->dma_pool);
1929 
1930 	mutex_destroy(&gaudi->clk_gate_mutex);
1931 
1932 	kfree(gaudi);
1933 
1934 	return 0;
1935 }
1936 
gaudi_irq_handler_single(int irq,void * arg)1937 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1938 {
1939 	struct hl_device *hdev = arg;
1940 	int i;
1941 
1942 	if (hdev->disabled)
1943 		return IRQ_HANDLED;
1944 
1945 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1946 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1947 
1948 	hl_irq_handler_eq(irq, &hdev->event_queue);
1949 
1950 	return IRQ_HANDLED;
1951 }
1952 
1953 /*
1954  * For backward compatibility, new MSI interrupts should be set after the
1955  * existing CPU and NIC interrupts.
1956  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1957 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1958 				bool cpu_eq)
1959 {
1960 	int msi_vec;
1961 
1962 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1963 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1964 				GAUDI_EVENT_QUEUE_MSI_IDX);
1965 
1966 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1967 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1968 
1969 	return pci_irq_vector(hdev->pdev, msi_vec);
1970 }
1971 
gaudi_enable_msi_single(struct hl_device * hdev)1972 static int gaudi_enable_msi_single(struct hl_device *hdev)
1973 {
1974 	int rc, irq;
1975 
1976 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1977 
1978 	irq = gaudi_pci_irq_vector(hdev, 0, false);
1979 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1980 			"gaudi single msi", hdev);
1981 	if (rc)
1982 		dev_err(hdev->dev,
1983 			"Failed to request single MSI IRQ\n");
1984 
1985 	return rc;
1986 }
1987 
gaudi_enable_msi_multi(struct hl_device * hdev)1988 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1989 {
1990 	int cq_cnt = hdev->asic_prop.completion_queues_count;
1991 	int rc, i, irq_cnt_init, irq;
1992 
1993 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1994 		irq = gaudi_pci_irq_vector(hdev, i, false);
1995 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1996 				&hdev->completion_queue[i]);
1997 		if (rc) {
1998 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1999 			goto free_irqs;
2000 		}
2001 	}
2002 
2003 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2004 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2005 				&hdev->event_queue);
2006 	if (rc) {
2007 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2008 		goto free_irqs;
2009 	}
2010 
2011 	return 0;
2012 
2013 free_irqs:
2014 	for (i = 0 ; i < irq_cnt_init ; i++)
2015 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2016 				&hdev->completion_queue[i]);
2017 	return rc;
2018 }
2019 
gaudi_enable_msi(struct hl_device * hdev)2020 static int gaudi_enable_msi(struct hl_device *hdev)
2021 {
2022 	struct gaudi_device *gaudi = hdev->asic_specific;
2023 	int rc;
2024 
2025 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2026 		return 0;
2027 
2028 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2029 	if (rc < 0) {
2030 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031 		return rc;
2032 	}
2033 
2034 	if (rc < NUMBER_OF_INTERRUPTS) {
2035 		gaudi->multi_msi_mode = false;
2036 		rc = gaudi_enable_msi_single(hdev);
2037 	} else {
2038 		gaudi->multi_msi_mode = true;
2039 		rc = gaudi_enable_msi_multi(hdev);
2040 	}
2041 
2042 	if (rc)
2043 		goto free_pci_irq_vectors;
2044 
2045 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2046 
2047 	return 0;
2048 
2049 free_pci_irq_vectors:
2050 	pci_free_irq_vectors(hdev->pdev);
2051 	return rc;
2052 }
2053 
gaudi_sync_irqs(struct hl_device * hdev)2054 static void gaudi_sync_irqs(struct hl_device *hdev)
2055 {
2056 	struct gaudi_device *gaudi = hdev->asic_specific;
2057 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2058 
2059 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060 		return;
2061 
2062 	/* Wait for all pending IRQs to be finished */
2063 	if (gaudi->multi_msi_mode) {
2064 		for (i = 0 ; i < cq_cnt ; i++)
2065 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2066 
2067 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2068 						GAUDI_EVENT_QUEUE_MSI_IDX,
2069 						true));
2070 	} else {
2071 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2072 	}
2073 }
2074 
gaudi_disable_msi(struct hl_device * hdev)2075 static void gaudi_disable_msi(struct hl_device *hdev)
2076 {
2077 	struct gaudi_device *gaudi = hdev->asic_specific;
2078 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2079 
2080 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2081 		return;
2082 
2083 	gaudi_sync_irqs(hdev);
2084 
2085 	if (gaudi->multi_msi_mode) {
2086 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2087 						true);
2088 		free_irq(irq, &hdev->event_queue);
2089 
2090 		for (i = 0 ; i < cq_cnt ; i++) {
2091 			irq = gaudi_pci_irq_vector(hdev, i, false);
2092 			free_irq(irq, &hdev->completion_queue[i]);
2093 		}
2094 	} else {
2095 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2096 	}
2097 
2098 	pci_free_irq_vectors(hdev->pdev);
2099 
2100 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2101 }
2102 
gaudi_init_scrambler_sram(struct hl_device * hdev)2103 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2104 {
2105 	struct gaudi_device *gaudi = hdev->asic_specific;
2106 
2107 	if (hdev->asic_prop.fw_security_enabled)
2108 		return;
2109 
2110 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2111 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2112 		return;
2113 
2114 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2115 		return;
2116 
2117 	if (!hdev->sram_scrambler_enable)
2118 		return;
2119 
2120 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2121 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2123 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2125 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2127 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2129 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2131 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2133 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2135 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136 
2137 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2138 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2140 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2142 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2144 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2146 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2148 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2150 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2152 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153 
2154 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2155 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2156 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2157 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2159 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2161 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2163 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2165 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2167 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2169 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170 
2171 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2172 }
2173 
gaudi_init_scrambler_hbm(struct hl_device * hdev)2174 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2175 {
2176 	struct gaudi_device *gaudi = hdev->asic_specific;
2177 
2178 	if (hdev->asic_prop.fw_security_enabled)
2179 		return;
2180 
2181 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2182 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2183 		return;
2184 
2185 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2186 		return;
2187 
2188 	if (!hdev->dram_scrambler_enable)
2189 		return;
2190 
2191 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2192 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2193 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2194 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2196 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2198 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2200 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2202 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2204 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2206 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207 
2208 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 
2225 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2226 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2227 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2228 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2230 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2232 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2234 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2236 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2238 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2240 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241 
2242 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2243 }
2244 
gaudi_init_e2e(struct hl_device * hdev)2245 static void gaudi_init_e2e(struct hl_device *hdev)
2246 {
2247 	if (hdev->asic_prop.fw_security_enabled)
2248 		return;
2249 
2250 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2251 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2252 		return;
2253 
2254 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2255 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2256 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2257 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2258 
2259 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263 
2264 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268 
2269 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273 
2274 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278 
2279 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283 
2284 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288 
2289 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2290 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2291 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2292 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2293 
2294 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2295 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2296 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2297 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2298 
2299 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2300 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2301 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2302 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2303 
2304 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2305 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2306 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2307 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2308 
2309 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2310 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2311 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2312 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2313 
2314 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2315 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2316 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2317 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2318 
2319 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2320 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2321 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2322 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2323 
2324 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2325 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2326 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2327 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2328 
2329 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2330 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2331 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2332 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2333 
2334 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338 
2339 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343 
2344 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348 
2349 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353 
2354 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2355 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2356 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2357 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2358 
2359 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2360 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2361 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2362 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2363 
2364 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2365 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2366 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2367 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2368 
2369 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2370 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2371 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2372 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2373 
2374 	if (!hdev->dram_scrambler_enable) {
2375 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2376 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2377 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2378 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2379 
2380 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2381 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2382 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2383 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2384 
2385 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2386 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2387 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2388 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2389 
2390 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2391 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2392 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2393 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2394 
2395 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2396 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2397 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2398 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2399 
2400 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2401 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2402 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2403 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2404 
2405 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2406 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2407 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2408 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2409 
2410 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2411 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2412 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2413 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2414 
2415 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2416 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2417 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2418 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2419 
2420 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2421 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2422 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2423 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2424 
2425 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2426 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2427 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2428 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2429 
2430 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2431 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2432 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2433 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2434 
2435 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2436 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2437 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2438 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2439 
2440 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2441 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2442 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2443 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2444 
2445 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2446 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2447 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2448 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2449 
2450 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2451 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2452 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2453 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2454 
2455 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459 
2460 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464 
2465 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469 
2470 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474 
2475 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2476 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2477 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2478 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2479 
2480 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2481 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2482 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2483 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2484 
2485 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2486 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2487 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2488 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2489 
2490 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2491 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2492 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2493 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2494 	}
2495 
2496 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2497 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2499 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500 
2501 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2502 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2504 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505 
2506 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2507 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2509 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510 
2511 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2512 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2514 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515 
2516 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2517 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2519 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520 
2521 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2522 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2524 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525 
2526 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2527 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2529 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530 
2531 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2532 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2534 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535 
2536 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2537 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2539 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540 
2541 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2542 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2544 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545 
2546 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2547 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2549 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550 
2551 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2552 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2554 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555 
2556 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2557 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2559 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560 
2561 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2562 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2564 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565 
2566 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2567 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2569 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570 
2571 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2572 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2574 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575 
2576 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2577 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2579 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580 
2581 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2582 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2584 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585 
2586 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2587 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2589 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590 
2591 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2592 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2594 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595 
2596 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2597 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2599 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600 
2601 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2602 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2604 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605 
2606 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2607 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2609 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610 
2611 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2612 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2614 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615 }
2616 
gaudi_init_hbm_cred(struct hl_device * hdev)2617 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2618 {
2619 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2620 
2621 	if (hdev->asic_prop.fw_security_enabled)
2622 		return;
2623 
2624 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2625 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2626 		return;
2627 
2628 	hbm0_wr = 0x33333333;
2629 	hbm0_rd = 0x77777777;
2630 	hbm1_wr = 0x55555555;
2631 	hbm1_rd = 0xDDDDDDDD;
2632 
2633 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2634 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2635 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2636 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2637 
2638 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2639 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2640 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2641 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2642 
2643 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2644 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2645 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2646 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2647 
2648 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2649 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2650 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2651 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2652 
2653 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2654 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2655 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2656 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2657 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2660 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2663 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665 
2666 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2667 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2668 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2669 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2670 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2673 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2676 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678 }
2679 
gaudi_init_golden_registers(struct hl_device * hdev)2680 static void gaudi_init_golden_registers(struct hl_device *hdev)
2681 {
2682 	u32 tpc_offset;
2683 	int tpc_id, i;
2684 
2685 	gaudi_init_e2e(hdev);
2686 	gaudi_init_hbm_cred(hdev);
2687 
2688 	for (tpc_id = 0, tpc_offset = 0;
2689 				tpc_id < TPC_NUMBER_OF_ENGINES;
2690 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2691 		/* Mask all arithmetic interrupts from TPC */
2692 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2693 		/* Set 16 cache lines */
2694 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2695 				ICACHE_FETCH_LINE_NUM, 2);
2696 	}
2697 
2698 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2699 	for (i = 0 ; i < 128 ; i += 8)
2700 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2701 
2702 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2703 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2704 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 }
2707 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2708 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2709 					int qman_id, dma_addr_t qman_pq_addr)
2710 {
2711 	struct cpu_dyn_regs *dyn_regs =
2712 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2714 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2715 	u32 q_off, dma_qm_offset;
2716 	u32 dma_qm_err_cfg, irq_handler_offset;
2717 
2718 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2719 
2720 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2721 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2722 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2723 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724 	so_base_en_lo = lower_32_bits(CFG_BASE +
2725 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2726 	so_base_en_hi = upper_32_bits(CFG_BASE +
2727 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2729 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2730 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2731 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2733 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2734 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2735 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736 
2737 	q_off = dma_qm_offset + qman_id * 4;
2738 
2739 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2740 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2741 
2742 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2743 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2744 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2745 
2746 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2747 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2748 							QMAN_LDMA_SRC_OFFSET);
2749 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2750 							QMAN_LDMA_DST_OFFSET);
2751 
2752 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2753 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2754 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2755 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2756 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2757 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2758 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2759 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2760 
2761 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2762 
2763 	/* The following configuration is needed only once per QMAN */
2764 	if (qman_id == 0) {
2765 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2766 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2767 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2768 
2769 		/* Configure RAZWI IRQ */
2770 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2771 		if (hdev->stop_on_err)
2772 			dma_qm_err_cfg |=
2773 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2774 
2775 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2776 
2777 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2778 			lower_32_bits(CFG_BASE + irq_handler_offset));
2779 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2780 			upper_32_bits(CFG_BASE + irq_handler_offset));
2781 
2782 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2783 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2784 									dma_id);
2785 
2786 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2787 				QM_ARB_ERR_MSG_EN_MASK);
2788 
2789 		/* Increase ARB WDT to support streams architecture */
2790 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2791 				GAUDI_ARB_WDT_TIMEOUT);
2792 
2793 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2794 				QMAN_EXTERNAL_MAKE_TRUSTED);
2795 
2796 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2797 	}
2798 }
2799 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2800 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2801 {
2802 	struct cpu_dyn_regs *dyn_regs =
2803 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2804 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2805 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2806 	u32 irq_handler_offset;
2807 
2808 	/* Set to maximum possible according to physical size */
2809 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2810 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2811 
2812 	/* WA for H/W bug H3-2116 */
2813 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2814 
2815 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2816 	if (hdev->stop_on_err)
2817 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2818 
2819 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2820 
2821 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2822 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2823 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2824 
2825 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2826 		lower_32_bits(CFG_BASE + irq_handler_offset));
2827 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2828 		upper_32_bits(CFG_BASE + irq_handler_offset));
2829 
2830 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2831 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2832 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2833 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2834 	/* If the channel is secured, it should be in MMU bypass mode */
2835 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2836 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2837 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2838 }
2839 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2840 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2841 				u32 enable_mask)
2842 {
2843 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2844 
2845 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2846 }
2847 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2848 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2849 {
2850 	struct gaudi_device *gaudi = hdev->asic_specific;
2851 	struct hl_hw_queue *q;
2852 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2853 
2854 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2855 		return;
2856 
2857 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2858 		dma_id = gaudi_dma_assignment[i];
2859 		/*
2860 		 * For queues after the CPU Q need to add 1 to get the correct
2861 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2862 		 * order to get the correct MSI register.
2863 		 */
2864 		if (dma_id > 1) {
2865 			cpu_skip = 1;
2866 			nic_skip = NIC_NUMBER_OF_ENGINES;
2867 		} else {
2868 			cpu_skip = 0;
2869 			nic_skip = 0;
2870 		}
2871 
2872 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2873 			q_idx = 4 * dma_id + j + cpu_skip;
2874 			q = &hdev->kernel_queues[q_idx];
2875 			q->cq_id = cq_id++;
2876 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2877 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2878 						q->bus_address);
2879 		}
2880 
2881 		gaudi_init_dma_core(hdev, dma_id);
2882 
2883 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2884 	}
2885 
2886 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2887 }
2888 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2889 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2890 					int qman_id, u64 qman_base_addr)
2891 {
2892 	struct cpu_dyn_regs *dyn_regs =
2893 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2894 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2895 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2896 	u32 dma_qm_err_cfg, irq_handler_offset;
2897 	u32 q_off, dma_qm_offset;
2898 
2899 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2900 
2901 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2902 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2903 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2904 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905 	so_base_en_lo = lower_32_bits(CFG_BASE +
2906 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2907 	so_base_en_hi = upper_32_bits(CFG_BASE +
2908 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2910 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2912 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2914 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2916 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917 
2918 	q_off = dma_qm_offset + qman_id * 4;
2919 
2920 	if (qman_id < 4) {
2921 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2922 					lower_32_bits(qman_base_addr));
2923 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2924 					upper_32_bits(qman_base_addr));
2925 
2926 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2927 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2928 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2929 
2930 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2931 							QMAN_CPDMA_SIZE_OFFSET);
2932 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2933 							QMAN_CPDMA_SRC_OFFSET);
2934 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2935 							QMAN_CPDMA_DST_OFFSET);
2936 	} else {
2937 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2938 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2939 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2940 
2941 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2942 							QMAN_LDMA_SIZE_OFFSET);
2943 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2944 							QMAN_LDMA_SRC_OFFSET);
2945 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2946 							QMAN_LDMA_DST_OFFSET);
2947 
2948 		/* Configure RAZWI IRQ */
2949 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2950 		if (hdev->stop_on_err)
2951 			dma_qm_err_cfg |=
2952 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2953 
2954 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2955 
2956 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2957 			lower_32_bits(CFG_BASE + irq_handler_offset));
2958 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2959 			upper_32_bits(CFG_BASE + irq_handler_offset));
2960 
2961 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2962 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2963 									dma_id);
2964 
2965 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2966 				QM_ARB_ERR_MSG_EN_MASK);
2967 
2968 		/* Increase ARB WDT to support streams architecture */
2969 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2970 				GAUDI_ARB_WDT_TIMEOUT);
2971 
2972 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2973 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2974 				QMAN_INTERNAL_MAKE_TRUSTED);
2975 	}
2976 
2977 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2978 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2979 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2980 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2981 
2982 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2983 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2984 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2985 				mtr_base_ws_lo);
2986 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2987 				mtr_base_ws_hi);
2988 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2989 				so_base_ws_lo);
2990 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2991 				so_base_ws_hi);
2992 	}
2993 }
2994 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2995 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2996 {
2997 	struct gaudi_device *gaudi = hdev->asic_specific;
2998 	struct gaudi_internal_qman_info *q;
2999 	u64 qman_base_addr;
3000 	int i, j, dma_id, internal_q_index;
3001 
3002 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3003 		return;
3004 
3005 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3006 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3007 
3008 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
3009 			 /*
3010 			  * Add the CPU queue in order to get the correct queue
3011 			  * number as all internal queue are placed after it
3012 			  */
3013 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3014 
3015 			q = &gaudi->internal_qmans[internal_q_index];
3016 			qman_base_addr = (u64) q->pq_dma_addr;
3017 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3018 						qman_base_addr);
3019 		}
3020 
3021 		/* Initializing lower CP for HBM DMA QMAN */
3022 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3023 
3024 		gaudi_init_dma_core(hdev, dma_id);
3025 
3026 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3027 	}
3028 
3029 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3030 }
3031 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)3032 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3033 					int qman_id, u64 qman_base_addr)
3034 {
3035 	struct cpu_dyn_regs *dyn_regs =
3036 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3037 	u32 mtr_base_lo, mtr_base_hi;
3038 	u32 so_base_lo, so_base_hi;
3039 	u32 irq_handler_offset;
3040 	u32 q_off, mme_id;
3041 	u32 mme_qm_err_cfg;
3042 
3043 	mtr_base_lo = lower_32_bits(CFG_BASE +
3044 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3045 	mtr_base_hi = upper_32_bits(CFG_BASE +
3046 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047 	so_base_lo = lower_32_bits(CFG_BASE +
3048 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3049 	so_base_hi = upper_32_bits(CFG_BASE +
3050 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051 
3052 	q_off = mme_offset + qman_id * 4;
3053 
3054 	if (qman_id < 4) {
3055 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3056 					lower_32_bits(qman_base_addr));
3057 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3058 					upper_32_bits(qman_base_addr));
3059 
3060 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3061 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3062 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3063 
3064 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3065 							QMAN_CPDMA_SIZE_OFFSET);
3066 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3067 							QMAN_CPDMA_SRC_OFFSET);
3068 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3069 							QMAN_CPDMA_DST_OFFSET);
3070 	} else {
3071 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3072 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3073 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3074 
3075 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3076 							QMAN_LDMA_SIZE_OFFSET);
3077 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3078 							QMAN_LDMA_SRC_OFFSET);
3079 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3080 							QMAN_LDMA_DST_OFFSET);
3081 
3082 		/* Configure RAZWI IRQ */
3083 		mme_id = mme_offset /
3084 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3085 
3086 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3087 		if (hdev->stop_on_err)
3088 			mme_qm_err_cfg |=
3089 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3090 
3091 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3092 
3093 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3094 			lower_32_bits(CFG_BASE + irq_handler_offset));
3095 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3096 			upper_32_bits(CFG_BASE + irq_handler_offset));
3097 
3098 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3099 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3100 									mme_id);
3101 
3102 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3103 				QM_ARB_ERR_MSG_EN_MASK);
3104 
3105 		/* Increase ARB WDT to support streams architecture */
3106 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3107 				GAUDI_ARB_WDT_TIMEOUT);
3108 
3109 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3110 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3111 				QMAN_INTERNAL_MAKE_TRUSTED);
3112 	}
3113 
3114 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3115 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3116 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3117 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3118 }
3119 
gaudi_init_mme_qmans(struct hl_device * hdev)3120 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3121 {
3122 	struct gaudi_device *gaudi = hdev->asic_specific;
3123 	struct gaudi_internal_qman_info *q;
3124 	u64 qman_base_addr;
3125 	u32 mme_offset;
3126 	int i, internal_q_index;
3127 
3128 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3129 		return;
3130 
3131 	/*
3132 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3133 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3134 	 */
3135 
3136 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3137 
3138 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3139 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3140 		q = &gaudi->internal_qmans[internal_q_index];
3141 		qman_base_addr = (u64) q->pq_dma_addr;
3142 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3143 					qman_base_addr);
3144 		if (i == 3)
3145 			mme_offset = 0;
3146 	}
3147 
3148 	/* Initializing lower CP for MME QMANs */
3149 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3150 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3151 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3152 
3153 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3154 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3155 
3156 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3157 }
3158 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)3159 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3160 				int qman_id, u64 qman_base_addr)
3161 {
3162 	struct cpu_dyn_regs *dyn_regs =
3163 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3164 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3165 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3166 	u32 tpc_qm_err_cfg, irq_handler_offset;
3167 	u32 q_off, tpc_id;
3168 
3169 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3170 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3172 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173 	so_base_en_lo = lower_32_bits(CFG_BASE +
3174 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175 	so_base_en_hi = upper_32_bits(CFG_BASE +
3176 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3178 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3179 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3180 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3182 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3183 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3184 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185 
3186 	q_off = tpc_offset + qman_id * 4;
3187 
3188 	tpc_id = tpc_offset /
3189 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3190 
3191 	if (qman_id < 4) {
3192 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3193 					lower_32_bits(qman_base_addr));
3194 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3195 					upper_32_bits(qman_base_addr));
3196 
3197 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3198 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3199 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3200 
3201 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3202 							QMAN_CPDMA_SIZE_OFFSET);
3203 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3204 							QMAN_CPDMA_SRC_OFFSET);
3205 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3206 							QMAN_CPDMA_DST_OFFSET);
3207 	} else {
3208 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3209 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3210 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3211 
3212 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3213 							QMAN_LDMA_SIZE_OFFSET);
3214 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3215 							QMAN_LDMA_SRC_OFFSET);
3216 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3217 							QMAN_LDMA_DST_OFFSET);
3218 
3219 		/* Configure RAZWI IRQ */
3220 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3221 		if (hdev->stop_on_err)
3222 			tpc_qm_err_cfg |=
3223 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3224 
3225 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3226 
3227 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3228 			lower_32_bits(CFG_BASE + irq_handler_offset));
3229 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3230 			upper_32_bits(CFG_BASE + irq_handler_offset));
3231 
3232 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3233 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3234 									tpc_id);
3235 
3236 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3237 				QM_ARB_ERR_MSG_EN_MASK);
3238 
3239 		/* Increase ARB WDT to support streams architecture */
3240 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3241 				GAUDI_ARB_WDT_TIMEOUT);
3242 
3243 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3244 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3245 				QMAN_INTERNAL_MAKE_TRUSTED);
3246 	}
3247 
3248 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3249 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3250 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3251 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3252 
3253 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3254 	if (tpc_id == 6) {
3255 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3256 				mtr_base_ws_lo);
3257 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3258 				mtr_base_ws_hi);
3259 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3260 				so_base_ws_lo);
3261 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3262 				so_base_ws_hi);
3263 	}
3264 }
3265 
gaudi_init_tpc_qmans(struct hl_device * hdev)3266 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3267 {
3268 	struct gaudi_device *gaudi = hdev->asic_specific;
3269 	struct gaudi_internal_qman_info *q;
3270 	u64 qman_base_addr;
3271 	u32 so_base_hi, tpc_offset = 0;
3272 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3273 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3274 	int i, tpc_id, internal_q_index;
3275 
3276 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3277 		return;
3278 
3279 	so_base_hi = upper_32_bits(CFG_BASE +
3280 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3281 
3282 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3283 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3284 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3285 						tpc_id * QMAN_STREAMS + i;
3286 			q = &gaudi->internal_qmans[internal_q_index];
3287 			qman_base_addr = (u64) q->pq_dma_addr;
3288 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3289 						qman_base_addr);
3290 
3291 			if (i == 3) {
3292 				/* Initializing lower CP for TPC QMAN */
3293 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3294 
3295 				/* Enable the QMAN and TPC channel */
3296 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3297 						QMAN_TPC_ENABLE);
3298 			}
3299 		}
3300 
3301 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3302 				so_base_hi);
3303 
3304 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3305 
3306 		gaudi->hw_cap_initialized |=
3307 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3308 	}
3309 }
3310 
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3311 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3312 				int qman_id, u64 qman_base_addr, int nic_id)
3313 {
3314 	struct cpu_dyn_regs *dyn_regs =
3315 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3316 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3317 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3318 	u32 nic_qm_err_cfg, irq_handler_offset;
3319 	u32 q_off;
3320 
3321 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3322 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3323 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3324 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325 	so_base_en_lo = lower_32_bits(CFG_BASE +
3326 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3327 	so_base_en_hi = upper_32_bits(CFG_BASE +
3328 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3330 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3331 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3332 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3334 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3335 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3336 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337 
3338 	q_off = nic_offset + qman_id * 4;
3339 
3340 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3341 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3342 
3343 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3344 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3345 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3346 
3347 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3348 							QMAN_LDMA_SIZE_OFFSET);
3349 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3350 							QMAN_LDMA_SRC_OFFSET);
3351 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3352 							QMAN_LDMA_DST_OFFSET);
3353 
3354 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3355 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3356 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3357 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3358 
3359 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3360 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3361 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3362 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3363 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3364 
3365 	if (qman_id == 0) {
3366 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3367 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3368 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3369 
3370 		/* Configure RAZWI IRQ */
3371 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3372 		if (hdev->stop_on_err)
3373 			nic_qm_err_cfg |=
3374 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3375 
3376 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3377 
3378 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3379 			lower_32_bits(CFG_BASE + irq_handler_offset));
3380 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3381 			upper_32_bits(CFG_BASE + irq_handler_offset));
3382 
3383 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3384 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3385 									nic_id);
3386 
3387 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3388 				QM_ARB_ERR_MSG_EN_MASK);
3389 
3390 		/* Increase ARB WDT to support streams architecture */
3391 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3392 				GAUDI_ARB_WDT_TIMEOUT);
3393 
3394 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3395 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3396 				QMAN_INTERNAL_MAKE_TRUSTED);
3397 	}
3398 }
3399 
gaudi_init_nic_qmans(struct hl_device * hdev)3400 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3401 {
3402 	struct gaudi_device *gaudi = hdev->asic_specific;
3403 	struct gaudi_internal_qman_info *q;
3404 	u64 qman_base_addr;
3405 	u32 nic_offset = 0;
3406 	u32 nic_delta_between_qmans =
3407 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408 	u32 nic_delta_between_nics =
3409 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410 	int i, nic_id, internal_q_index;
3411 
3412 	if (!hdev->nic_ports_mask)
3413 		return;
3414 
3415 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3416 		return;
3417 
3418 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3419 
3420 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3421 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3422 			nic_offset += nic_delta_between_qmans;
3423 			if (nic_id & 1) {
3424 				nic_offset -= (nic_delta_between_qmans * 2);
3425 				nic_offset += nic_delta_between_nics;
3426 			}
3427 			continue;
3428 		}
3429 
3430 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3431 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3432 						nic_id * QMAN_STREAMS + i;
3433 			q = &gaudi->internal_qmans[internal_q_index];
3434 			qman_base_addr = (u64) q->pq_dma_addr;
3435 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3436 						qman_base_addr, nic_id);
3437 		}
3438 
3439 		/* Enable the QMAN */
3440 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3441 
3442 		nic_offset += nic_delta_between_qmans;
3443 		if (nic_id & 1) {
3444 			nic_offset -= (nic_delta_between_qmans * 2);
3445 			nic_offset += nic_delta_between_nics;
3446 		}
3447 
3448 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3449 	}
3450 }
3451 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3452 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3453 {
3454 	struct gaudi_device *gaudi = hdev->asic_specific;
3455 
3456 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457 		return;
3458 
3459 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3460 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3461 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3462 }
3463 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3464 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3465 {
3466 	struct gaudi_device *gaudi = hdev->asic_specific;
3467 
3468 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3469 		return;
3470 
3471 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3472 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3473 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3474 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3475 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3476 }
3477 
gaudi_disable_mme_qmans(struct hl_device * hdev)3478 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3479 {
3480 	struct gaudi_device *gaudi = hdev->asic_specific;
3481 
3482 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483 		return;
3484 
3485 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3486 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3487 }
3488 
gaudi_disable_tpc_qmans(struct hl_device * hdev)3489 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3490 {
3491 	struct gaudi_device *gaudi = hdev->asic_specific;
3492 	u32 tpc_offset = 0;
3493 	int tpc_id;
3494 
3495 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496 		return;
3497 
3498 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3499 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3500 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3501 	}
3502 }
3503 
gaudi_disable_nic_qmans(struct hl_device * hdev)3504 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3505 {
3506 	struct gaudi_device *gaudi = hdev->asic_specific;
3507 	u32 nic_mask, nic_offset = 0;
3508 	u32 nic_delta_between_qmans =
3509 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3510 	u32 nic_delta_between_nics =
3511 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512 	int nic_id;
3513 
3514 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3515 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3516 
3517 		if (gaudi->hw_cap_initialized & nic_mask)
3518 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3519 
3520 		nic_offset += nic_delta_between_qmans;
3521 		if (nic_id & 1) {
3522 			nic_offset -= (nic_delta_between_qmans * 2);
3523 			nic_offset += nic_delta_between_nics;
3524 		}
3525 	}
3526 }
3527 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3528 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3529 {
3530 	struct gaudi_device *gaudi = hdev->asic_specific;
3531 
3532 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3533 		return;
3534 
3535 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3536 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3538 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 }
3540 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3541 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3542 {
3543 	struct gaudi_device *gaudi = hdev->asic_specific;
3544 
3545 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3546 		return;
3547 
3548 	/* Stop CPs of HBM DMA QMANs */
3549 
3550 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3551 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 }
3556 
gaudi_stop_mme_qmans(struct hl_device * hdev)3557 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3558 {
3559 	struct gaudi_device *gaudi = hdev->asic_specific;
3560 
3561 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3562 		return;
3563 
3564 	/* Stop CPs of MME QMANs */
3565 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567 }
3568 
gaudi_stop_tpc_qmans(struct hl_device * hdev)3569 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3570 {
3571 	struct gaudi_device *gaudi = hdev->asic_specific;
3572 
3573 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3574 		return;
3575 
3576 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3578 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 }
3585 
gaudi_stop_nic_qmans(struct hl_device * hdev)3586 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3587 {
3588 	struct gaudi_device *gaudi = hdev->asic_specific;
3589 
3590 	/* Stop upper CPs of QMANs */
3591 
3592 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3593 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3594 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597 
3598 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3599 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3600 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603 
3604 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3605 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3606 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609 
3610 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3611 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3612 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615 
3616 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3617 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3618 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621 
3622 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3623 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3624 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627 
3628 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3629 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3630 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633 
3634 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3635 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3636 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639 
3640 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3641 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3642 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645 
3646 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3647 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3648 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651 }
3652 
gaudi_pci_dma_stall(struct hl_device * hdev)3653 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3654 {
3655 	struct gaudi_device *gaudi = hdev->asic_specific;
3656 
3657 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3658 		return;
3659 
3660 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3661 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3662 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 }
3664 
gaudi_hbm_dma_stall(struct hl_device * hdev)3665 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3666 {
3667 	struct gaudi_device *gaudi = hdev->asic_specific;
3668 
3669 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3670 		return;
3671 
3672 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3673 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3674 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 }
3678 
gaudi_mme_stall(struct hl_device * hdev)3679 static void gaudi_mme_stall(struct hl_device *hdev)
3680 {
3681 	struct gaudi_device *gaudi = hdev->asic_specific;
3682 
3683 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3684 		return;
3685 
3686 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3687 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3688 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3689 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3690 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3691 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3693 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3695 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3697 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3699 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3701 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3703 }
3704 
gaudi_tpc_stall(struct hl_device * hdev)3705 static void gaudi_tpc_stall(struct hl_device *hdev)
3706 {
3707 	struct gaudi_device *gaudi = hdev->asic_specific;
3708 
3709 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3710 		return;
3711 
3712 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3713 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3714 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 }
3721 
gaudi_set_clock_gating(struct hl_device * hdev)3722 static void gaudi_set_clock_gating(struct hl_device *hdev)
3723 {
3724 	struct gaudi_device *gaudi = hdev->asic_specific;
3725 	u32 qman_offset;
3726 	bool enable;
3727 	int i;
3728 
3729 	/* In case we are during debug session, don't enable the clock gate
3730 	 * as it may interfere
3731 	 */
3732 	if (hdev->in_debug)
3733 		return;
3734 
3735 	if (hdev->asic_prop.fw_security_enabled)
3736 		return;
3737 
3738 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3739 		enable = !!(hdev->clock_gating_mask &
3740 				(BIT_ULL(gaudi_dma_assignment[i])));
3741 
3742 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3743 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3744 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3745 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3746 				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3747 	}
3748 
3749 	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3750 		enable = !!(hdev->clock_gating_mask &
3751 				(BIT_ULL(gaudi_dma_assignment[i])));
3752 
3753 		/* GC sends work to DMA engine through Upper CP in DMA5 so
3754 		 * we need to not enable clock gating in that DMA
3755 		 */
3756 		if (i == GAUDI_HBM_DMA_4)
3757 			enable = 0;
3758 
3759 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3760 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3761 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3763 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764 	}
3765 
3766 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3767 	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3768 	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3769 
3770 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3771 	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3772 	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3773 
3774 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3775 		enable = !!(hdev->clock_gating_mask &
3776 				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3777 
3778 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3779 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3780 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3781 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3782 
3783 		qman_offset += TPC_QMAN_OFFSET;
3784 	}
3785 
3786 	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3787 }
3788 
gaudi_disable_clock_gating(struct hl_device * hdev)3789 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3790 {
3791 	struct gaudi_device *gaudi = hdev->asic_specific;
3792 	u32 qman_offset;
3793 	int i;
3794 
3795 	if (hdev->asic_prop.fw_security_enabled)
3796 		return;
3797 
3798 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3799 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3800 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3801 
3802 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3803 	}
3804 
3805 	WREG32(mmMME0_QM_CGM_CFG, 0);
3806 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3807 	WREG32(mmMME2_QM_CGM_CFG, 0);
3808 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3809 
3810 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3811 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3812 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3813 
3814 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3815 	}
3816 
3817 	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3818 }
3819 
gaudi_enable_timestamp(struct hl_device * hdev)3820 static void gaudi_enable_timestamp(struct hl_device *hdev)
3821 {
3822 	/* Disable the timestamp counter */
3823 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3824 
3825 	/* Zero the lower/upper parts of the 64-bit counter */
3826 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3827 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3828 
3829 	/* Enable the counter */
3830 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3831 }
3832 
gaudi_disable_timestamp(struct hl_device * hdev)3833 static void gaudi_disable_timestamp(struct hl_device *hdev)
3834 {
3835 	/* Disable the timestamp counter */
3836 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3837 }
3838 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3839 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3840 {
3841 	u32 wait_timeout_ms;
3842 
3843 	dev_info(hdev->dev,
3844 		"Halting compute engines and disabling interrupts\n");
3845 
3846 	if (hdev->pldm)
3847 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3848 	else
3849 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3850 
3851 	if (fw_reset)
3852 		goto skip_engines;
3853 
3854 	gaudi_stop_nic_qmans(hdev);
3855 	gaudi_stop_mme_qmans(hdev);
3856 	gaudi_stop_tpc_qmans(hdev);
3857 	gaudi_stop_hbm_dma_qmans(hdev);
3858 	gaudi_stop_pci_dma_qmans(hdev);
3859 
3860 	hdev->asic_funcs->disable_clock_gating(hdev);
3861 
3862 	msleep(wait_timeout_ms);
3863 
3864 	gaudi_pci_dma_stall(hdev);
3865 	gaudi_hbm_dma_stall(hdev);
3866 	gaudi_tpc_stall(hdev);
3867 	gaudi_mme_stall(hdev);
3868 
3869 	msleep(wait_timeout_ms);
3870 
3871 	gaudi_disable_nic_qmans(hdev);
3872 	gaudi_disable_mme_qmans(hdev);
3873 	gaudi_disable_tpc_qmans(hdev);
3874 	gaudi_disable_hbm_dma_qmans(hdev);
3875 	gaudi_disable_pci_dma_qmans(hdev);
3876 
3877 	gaudi_disable_timestamp(hdev);
3878 
3879 skip_engines:
3880 	gaudi_disable_msi(hdev);
3881 }
3882 
gaudi_mmu_init(struct hl_device * hdev)3883 static int gaudi_mmu_init(struct hl_device *hdev)
3884 {
3885 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3886 	struct gaudi_device *gaudi = hdev->asic_specific;
3887 	u64 hop0_addr;
3888 	int rc, i;
3889 
3890 	if (!hdev->mmu_enable)
3891 		return 0;
3892 
3893 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3894 		return 0;
3895 
3896 	for (i = 0 ; i < prop->max_asid ; i++) {
3897 		hop0_addr = prop->mmu_pgt_addr +
3898 				(i * prop->mmu_hop_table_size);
3899 
3900 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3901 		if (rc) {
3902 			dev_err(hdev->dev,
3903 				"failed to set hop0 addr for asid %d\n", i);
3904 			goto err;
3905 		}
3906 	}
3907 
3908 	/* init MMU cache manage page */
3909 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3910 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3911 
3912 	/* mem cache invalidation */
3913 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3914 
3915 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3916 
3917 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3918 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3919 
3920 	WREG32(mmSTLB_HOP_CONFIGURATION,
3921 			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3922 
3923 	/*
3924 	 * The H/W expects the first PI after init to be 1. After wraparound
3925 	 * we'll write 0.
3926 	 */
3927 	gaudi->mmu_cache_inv_pi = 1;
3928 
3929 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3930 
3931 	return 0;
3932 
3933 err:
3934 	return rc;
3935 }
3936 
gaudi_load_firmware_to_device(struct hl_device * hdev)3937 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3938 {
3939 	void __iomem *dst;
3940 
3941 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3942 
3943 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3944 }
3945 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3946 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3947 {
3948 	void __iomem *dst;
3949 
3950 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3951 
3952 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3953 }
3954 
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3955 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3956 {
3957 	struct dynamic_fw_load_mgr *dynamic_loader;
3958 	struct cpu_dyn_regs *dyn_regs;
3959 
3960 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3961 
3962 	/*
3963 	 * here we update initial values for few specific dynamic regs (as
3964 	 * before reading the first descriptor from FW those value has to be
3965 	 * hard-coded) in later stages of the protocol those values will be
3966 	 * updated automatically by reading the FW descriptor so data there
3967 	 * will always be up-to-date
3968 	 */
3969 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3970 	dyn_regs->kmd_msg_to_cpu =
3971 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3972 	dyn_regs->cpu_cmd_status_to_host =
3973 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3974 
3975 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3976 }
3977 
gaudi_init_static_firmware_loader(struct hl_device * hdev)3978 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3979 {
3980 	struct static_fw_load_mgr *static_loader;
3981 
3982 	static_loader = &hdev->fw_loader.static_loader;
3983 
3984 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3985 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3986 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3987 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3988 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3989 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3990 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3991 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3992 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3993 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3994 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3995 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3996 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3997 			GAUDI_PLDM_RESET_WAIT_MSEC :
3998 			GAUDI_CPU_RESET_WAIT_MSEC;
3999 }
4000 
gaudi_init_firmware_loader(struct hl_device * hdev)4001 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4002 {
4003 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4004 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4005 
4006 	/* fill common fields */
4007 	fw_loader->linux_loaded = false;
4008 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4009 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4010 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4011 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4012 	fw_loader->skip_bmc = !hdev->bmc_enable;
4013 	fw_loader->sram_bar_id = SRAM_BAR_ID;
4014 	fw_loader->dram_bar_id = HBM_BAR_ID;
4015 
4016 	if (prop->dynamic_fw_load)
4017 		gaudi_init_dynamic_firmware_loader(hdev);
4018 	else
4019 		gaudi_init_static_firmware_loader(hdev);
4020 }
4021 
gaudi_init_cpu(struct hl_device * hdev)4022 static int gaudi_init_cpu(struct hl_device *hdev)
4023 {
4024 	struct gaudi_device *gaudi = hdev->asic_specific;
4025 	int rc;
4026 
4027 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4028 		return 0;
4029 
4030 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4031 		return 0;
4032 
4033 	/*
4034 	 * The device CPU works with 40 bits addresses.
4035 	 * This register sets the extension to 50 bits.
4036 	 */
4037 	if (!hdev->asic_prop.fw_security_enabled)
4038 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4039 
4040 	rc = hl_fw_init_cpu(hdev);
4041 
4042 	if (rc)
4043 		return rc;
4044 
4045 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
4046 
4047 	return 0;
4048 }
4049 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4050 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4051 {
4052 	struct cpu_dyn_regs *dyn_regs =
4053 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4054 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4055 	struct gaudi_device *gaudi = hdev->asic_specific;
4056 	u32 status, irq_handler_offset;
4057 	struct hl_eq *eq;
4058 	struct hl_hw_queue *cpu_pq =
4059 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4060 	int err;
4061 
4062 	if (!hdev->cpu_queues_enable)
4063 		return 0;
4064 
4065 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4066 		return 0;
4067 
4068 	eq = &hdev->event_queue;
4069 
4070 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4071 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4072 
4073 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4074 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4075 
4076 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4077 			lower_32_bits(hdev->cpu_accessible_dma_address));
4078 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4079 			upper_32_bits(hdev->cpu_accessible_dma_address));
4080 
4081 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4082 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4083 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4084 
4085 	/* Used for EQ CI */
4086 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4087 
4088 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4089 
4090 	if (gaudi->multi_msi_mode)
4091 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4092 	else
4093 		WREG32(mmCPU_IF_QUEUE_INIT,
4094 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4095 
4096 	irq_handler_offset = prop->gic_interrupts_enable ?
4097 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4098 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4099 
4100 	WREG32(irq_handler_offset,
4101 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4102 
4103 	err = hl_poll_timeout(
4104 		hdev,
4105 		mmCPU_IF_QUEUE_INIT,
4106 		status,
4107 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4108 		1000,
4109 		cpu_timeout);
4110 
4111 	if (err) {
4112 		dev_err(hdev->dev,
4113 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
4114 		return -EIO;
4115 	}
4116 
4117 	/* update FW application security bits */
4118 	if (prop->fw_cpu_boot_dev_sts0_valid)
4119 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4120 	if (prop->fw_cpu_boot_dev_sts1_valid)
4121 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4122 
4123 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4124 	return 0;
4125 }
4126 
gaudi_pre_hw_init(struct hl_device * hdev)4127 static void gaudi_pre_hw_init(struct hl_device *hdev)
4128 {
4129 	/* Perform read from the device to make sure device is up */
4130 	RREG32(mmHW_STATE);
4131 
4132 	if (!hdev->asic_prop.fw_security_enabled) {
4133 		/* Set the access through PCI bars (Linux driver only) as
4134 		 * secured
4135 		 */
4136 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4137 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4138 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4139 
4140 		/* Perform read to flush the waiting writes to ensure
4141 		 * configuration was set in the device
4142 		 */
4143 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4144 	}
4145 
4146 	/*
4147 	 * Let's mark in the H/W that we have reached this point. We check
4148 	 * this value in the reset_before_init function to understand whether
4149 	 * we need to reset the chip before doing H/W init. This register is
4150 	 * cleared by the H/W upon H/W reset
4151 	 */
4152 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4153 }
4154 
gaudi_hw_init(struct hl_device * hdev)4155 static int gaudi_hw_init(struct hl_device *hdev)
4156 {
4157 	struct gaudi_device *gaudi = hdev->asic_specific;
4158 	int rc;
4159 
4160 	gaudi_pre_hw_init(hdev);
4161 
4162 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4163 	 * So we set it here and if anyone tries to move it later to
4164 	 * a different address, there will be an error
4165 	 */
4166 	if (hdev->asic_prop.iatu_done_by_fw)
4167 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4168 
4169 	/*
4170 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
4171 	 * base address of dram
4172 	 */
4173 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4174 		dev_err(hdev->dev,
4175 			"failed to map HBM bar to DRAM base address\n");
4176 		return -EIO;
4177 	}
4178 
4179 	rc = gaudi_init_cpu(hdev);
4180 	if (rc) {
4181 		dev_err(hdev->dev, "failed to initialize CPU\n");
4182 		return rc;
4183 	}
4184 
4185 	/* In case the clock gating was enabled in preboot we need to disable
4186 	 * it here before touching the MME/TPC registers.
4187 	 * There is no need to take clk gating mutex because when this function
4188 	 * runs, no other relevant code can run
4189 	 */
4190 	hdev->asic_funcs->disable_clock_gating(hdev);
4191 
4192 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4193 	gaudi_init_scrambler_sram(hdev);
4194 
4195 	/* This is here just in case we are working without CPU */
4196 	gaudi_init_scrambler_hbm(hdev);
4197 
4198 	gaudi_init_golden_registers(hdev);
4199 
4200 	rc = gaudi_mmu_init(hdev);
4201 	if (rc)
4202 		return rc;
4203 
4204 	gaudi_init_security(hdev);
4205 
4206 	gaudi_init_pci_dma_qmans(hdev);
4207 
4208 	gaudi_init_hbm_dma_qmans(hdev);
4209 
4210 	gaudi_init_mme_qmans(hdev);
4211 
4212 	gaudi_init_tpc_qmans(hdev);
4213 
4214 	gaudi_init_nic_qmans(hdev);
4215 
4216 	hdev->asic_funcs->set_clock_gating(hdev);
4217 
4218 	gaudi_enable_timestamp(hdev);
4219 
4220 	/* MSI must be enabled before CPU queues and NIC are initialized */
4221 	rc = gaudi_enable_msi(hdev);
4222 	if (rc)
4223 		goto disable_queues;
4224 
4225 	/* must be called after MSI was enabled */
4226 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4227 	if (rc) {
4228 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4229 			rc);
4230 		goto disable_msi;
4231 	}
4232 
4233 	/* Perform read from the device to flush all configuration */
4234 	RREG32(mmHW_STATE);
4235 
4236 	return 0;
4237 
4238 disable_msi:
4239 	gaudi_disable_msi(hdev);
4240 disable_queues:
4241 	gaudi_disable_mme_qmans(hdev);
4242 	gaudi_disable_pci_dma_qmans(hdev);
4243 
4244 	return rc;
4245 }
4246 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4247 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4248 {
4249 	struct cpu_dyn_regs *dyn_regs =
4250 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4251 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4252 	struct gaudi_device *gaudi = hdev->asic_specific;
4253 	bool driver_performs_reset;
4254 
4255 	if (!hard_reset) {
4256 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4257 		return;
4258 	}
4259 
4260 	if (hdev->pldm) {
4261 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4262 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4263 	} else {
4264 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4265 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4266 	}
4267 
4268 	if (fw_reset) {
4269 		dev_info(hdev->dev,
4270 			"Firmware performs HARD reset, going to wait %dms\n",
4271 			reset_timeout_ms);
4272 
4273 		goto skip_reset;
4274 	}
4275 
4276 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4277 					!hdev->asic_prop.hard_reset_done_by_fw);
4278 
4279 	/* Set device to handle FLR by H/W as we will put the device CPU to
4280 	 * halt mode
4281 	 */
4282 	if (driver_performs_reset)
4283 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4284 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4285 
4286 	/* If linux is loaded in the device CPU we need to communicate with it
4287 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4288 	 * registers in case of old F/Ws
4289 	 */
4290 	if (hdev->fw_loader.linux_loaded) {
4291 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4292 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4293 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4294 
4295 		WREG32(irq_handler_offset,
4296 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4297 	} else {
4298 		if (hdev->asic_prop.hard_reset_done_by_fw)
4299 			hl_fw_ask_hard_reset_without_linux(hdev);
4300 		else
4301 			hl_fw_ask_halt_machine_without_linux(hdev);
4302 	}
4303 
4304 	if (driver_performs_reset) {
4305 
4306 		/* Configure the reset registers. Must be done as early as
4307 		 * possible in case we fail during H/W initialization
4308 		 */
4309 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4310 						(CFG_RST_H_DMA_MASK |
4311 						CFG_RST_H_MME_MASK |
4312 						CFG_RST_H_SM_MASK |
4313 						CFG_RST_H_TPC_7_MASK));
4314 
4315 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4316 
4317 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4318 						(CFG_RST_H_HBM_MASK |
4319 						CFG_RST_H_TPC_7_MASK |
4320 						CFG_RST_H_NIC_MASK |
4321 						CFG_RST_H_SM_MASK |
4322 						CFG_RST_H_DMA_MASK |
4323 						CFG_RST_H_MME_MASK |
4324 						CFG_RST_H_CPU_MASK |
4325 						CFG_RST_H_MMU_MASK));
4326 
4327 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4328 						(CFG_RST_L_IF_MASK |
4329 						CFG_RST_L_PSOC_MASK |
4330 						CFG_RST_L_TPC_MASK));
4331 
4332 		msleep(cpu_timeout_ms);
4333 
4334 		/* Tell ASIC not to re-initialize PCIe */
4335 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4336 
4337 		/* Restart BTL/BLR upon hard-reset */
4338 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4339 
4340 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4341 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4342 
4343 		dev_info(hdev->dev,
4344 			"Issued HARD reset command, going to wait %dms\n",
4345 			reset_timeout_ms);
4346 	} else {
4347 		dev_info(hdev->dev,
4348 			"Firmware performs HARD reset, going to wait %dms\n",
4349 			reset_timeout_ms);
4350 	}
4351 
4352 skip_reset:
4353 	/*
4354 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4355 	 * itself is in reset. Need to wait until the reset is deasserted
4356 	 */
4357 	msleep(reset_timeout_ms);
4358 
4359 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4360 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4361 		dev_err(hdev->dev,
4362 			"Timeout while waiting for device to reset 0x%x\n",
4363 			status);
4364 
4365 	if (gaudi) {
4366 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4367 				HW_CAP_HBM | HW_CAP_PCI_DMA |
4368 				HW_CAP_MME | HW_CAP_TPC_MASK |
4369 				HW_CAP_HBM_DMA | HW_CAP_PLL |
4370 				HW_CAP_NIC_MASK | HW_CAP_MMU |
4371 				HW_CAP_SRAM_SCRAMBLER |
4372 				HW_CAP_HBM_SCRAMBLER |
4373 				HW_CAP_CLK_GATE);
4374 
4375 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4376 
4377 		hdev->device_cpu_is_halted = false;
4378 	}
4379 }
4380 
gaudi_suspend(struct hl_device * hdev)4381 static int gaudi_suspend(struct hl_device *hdev)
4382 {
4383 	int rc;
4384 
4385 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4386 	if (rc)
4387 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4388 
4389 	return rc;
4390 }
4391 
gaudi_resume(struct hl_device * hdev)4392 static int gaudi_resume(struct hl_device *hdev)
4393 {
4394 	return gaudi_init_iatu(hdev);
4395 }
4396 
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4397 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4398 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4399 {
4400 	int rc;
4401 
4402 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4403 			VM_DONTCOPY | VM_NORESERVE;
4404 
4405 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4406 				(dma_addr - HOST_PHYS_BASE), size);
4407 	if (rc)
4408 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4409 
4410 	return rc;
4411 }
4412 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4413 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4414 {
4415 	struct cpu_dyn_regs *dyn_regs =
4416 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4417 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4418 	struct gaudi_device *gaudi = hdev->asic_specific;
4419 	bool invalid_queue = false;
4420 	int dma_id;
4421 
4422 	switch (hw_queue_id) {
4423 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4424 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4425 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4426 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4427 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4428 		break;
4429 
4430 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4431 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4432 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4433 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4434 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4435 		break;
4436 
4437 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4438 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4439 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4440 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4441 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4445 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4446 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4448 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449 		break;
4450 
4451 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4452 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4453 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4455 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456 		break;
4457 
4458 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4459 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4460 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463 		break;
4464 
4465 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4466 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4467 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470 		break;
4471 
4472 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4473 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4474 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477 		break;
4478 
4479 	case GAUDI_QUEUE_ID_CPU_PQ:
4480 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4481 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4482 		else
4483 			invalid_queue = true;
4484 		break;
4485 
4486 	case GAUDI_QUEUE_ID_MME_0_0:
4487 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4488 		break;
4489 
4490 	case GAUDI_QUEUE_ID_MME_0_1:
4491 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4492 		break;
4493 
4494 	case GAUDI_QUEUE_ID_MME_0_2:
4495 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4496 		break;
4497 
4498 	case GAUDI_QUEUE_ID_MME_0_3:
4499 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4500 		break;
4501 
4502 	case GAUDI_QUEUE_ID_MME_1_0:
4503 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4504 		break;
4505 
4506 	case GAUDI_QUEUE_ID_MME_1_1:
4507 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4508 		break;
4509 
4510 	case GAUDI_QUEUE_ID_MME_1_2:
4511 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4512 		break;
4513 
4514 	case GAUDI_QUEUE_ID_MME_1_3:
4515 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4516 		break;
4517 
4518 	case GAUDI_QUEUE_ID_TPC_0_0:
4519 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4520 		break;
4521 
4522 	case GAUDI_QUEUE_ID_TPC_0_1:
4523 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4524 		break;
4525 
4526 	case GAUDI_QUEUE_ID_TPC_0_2:
4527 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4528 		break;
4529 
4530 	case GAUDI_QUEUE_ID_TPC_0_3:
4531 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4532 		break;
4533 
4534 	case GAUDI_QUEUE_ID_TPC_1_0:
4535 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4536 		break;
4537 
4538 	case GAUDI_QUEUE_ID_TPC_1_1:
4539 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4540 		break;
4541 
4542 	case GAUDI_QUEUE_ID_TPC_1_2:
4543 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4544 		break;
4545 
4546 	case GAUDI_QUEUE_ID_TPC_1_3:
4547 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4548 		break;
4549 
4550 	case GAUDI_QUEUE_ID_TPC_2_0:
4551 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4552 		break;
4553 
4554 	case GAUDI_QUEUE_ID_TPC_2_1:
4555 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4556 		break;
4557 
4558 	case GAUDI_QUEUE_ID_TPC_2_2:
4559 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4560 		break;
4561 
4562 	case GAUDI_QUEUE_ID_TPC_2_3:
4563 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4564 		break;
4565 
4566 	case GAUDI_QUEUE_ID_TPC_3_0:
4567 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4568 		break;
4569 
4570 	case GAUDI_QUEUE_ID_TPC_3_1:
4571 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4572 		break;
4573 
4574 	case GAUDI_QUEUE_ID_TPC_3_2:
4575 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4576 		break;
4577 
4578 	case GAUDI_QUEUE_ID_TPC_3_3:
4579 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4580 		break;
4581 
4582 	case GAUDI_QUEUE_ID_TPC_4_0:
4583 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4584 		break;
4585 
4586 	case GAUDI_QUEUE_ID_TPC_4_1:
4587 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4588 		break;
4589 
4590 	case GAUDI_QUEUE_ID_TPC_4_2:
4591 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4592 		break;
4593 
4594 	case GAUDI_QUEUE_ID_TPC_4_3:
4595 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4596 		break;
4597 
4598 	case GAUDI_QUEUE_ID_TPC_5_0:
4599 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4600 		break;
4601 
4602 	case GAUDI_QUEUE_ID_TPC_5_1:
4603 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4604 		break;
4605 
4606 	case GAUDI_QUEUE_ID_TPC_5_2:
4607 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4608 		break;
4609 
4610 	case GAUDI_QUEUE_ID_TPC_5_3:
4611 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4612 		break;
4613 
4614 	case GAUDI_QUEUE_ID_TPC_6_0:
4615 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4616 		break;
4617 
4618 	case GAUDI_QUEUE_ID_TPC_6_1:
4619 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4620 		break;
4621 
4622 	case GAUDI_QUEUE_ID_TPC_6_2:
4623 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4624 		break;
4625 
4626 	case GAUDI_QUEUE_ID_TPC_6_3:
4627 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4628 		break;
4629 
4630 	case GAUDI_QUEUE_ID_TPC_7_0:
4631 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4632 		break;
4633 
4634 	case GAUDI_QUEUE_ID_TPC_7_1:
4635 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4636 		break;
4637 
4638 	case GAUDI_QUEUE_ID_TPC_7_2:
4639 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4640 		break;
4641 
4642 	case GAUDI_QUEUE_ID_TPC_7_3:
4643 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4644 		break;
4645 
4646 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4647 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4648 			invalid_queue = true;
4649 
4650 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4652 		break;
4653 
4654 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4655 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4656 			invalid_queue = true;
4657 
4658 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4660 		break;
4661 
4662 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4663 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4664 			invalid_queue = true;
4665 
4666 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4668 		break;
4669 
4670 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4671 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4672 			invalid_queue = true;
4673 
4674 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4676 		break;
4677 
4678 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4679 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4680 			invalid_queue = true;
4681 
4682 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4684 		break;
4685 
4686 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4687 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4688 			invalid_queue = true;
4689 
4690 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4692 		break;
4693 
4694 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4695 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4696 			invalid_queue = true;
4697 
4698 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4699 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4700 		break;
4701 
4702 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4703 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4704 			invalid_queue = true;
4705 
4706 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4707 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4708 		break;
4709 
4710 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4711 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4712 			invalid_queue = true;
4713 
4714 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4715 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4716 		break;
4717 
4718 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4719 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4720 			invalid_queue = true;
4721 
4722 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4723 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4724 		break;
4725 
4726 	default:
4727 		invalid_queue = true;
4728 	}
4729 
4730 	if (invalid_queue) {
4731 		/* Should never get here */
4732 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4733 			hw_queue_id);
4734 		return;
4735 	}
4736 
4737 	db_value = pi;
4738 
4739 	/* ring the doorbell */
4740 	WREG32(db_reg_offset, db_value);
4741 
4742 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4743 		/* make sure device CPU will read latest data from host */
4744 		mb();
4745 
4746 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4747 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4748 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4749 
4750 		WREG32(irq_handler_offset,
4751 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4752 	}
4753 }
4754 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4755 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4756 				struct hl_bd *bd)
4757 {
4758 	__le64 *pbd = (__le64 *) bd;
4759 
4760 	/* The QMANs are on the host memory so a simple copy suffice */
4761 	pqe[0] = pbd[0];
4762 	pqe[1] = pbd[1];
4763 }
4764 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4765 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4766 					dma_addr_t *dma_handle, gfp_t flags)
4767 {
4768 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4769 						dma_handle, flags);
4770 
4771 	/* Shift to the device's base physical address of host memory */
4772 	if (kernel_addr)
4773 		*dma_handle += HOST_PHYS_BASE;
4774 
4775 	return kernel_addr;
4776 }
4777 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4778 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4779 		void *cpu_addr, dma_addr_t dma_handle)
4780 {
4781 	/* Cancel the device's base physical address of host memory */
4782 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4783 
4784 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4785 }
4786 
gaudi_hbm_scrubbing(struct hl_device * hdev)4787 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4788 {
4789 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4790 	u64  cur_addr = DRAM_BASE_ADDR_USER;
4791 	u32 val;
4792 	u32 chunk_size;
4793 	int rc, dma_id;
4794 
4795 	while (cur_addr < prop->dram_end_address) {
4796 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4797 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4798 
4799 			chunk_size =
4800 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4801 
4802 			dev_dbg(hdev->dev,
4803 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4804 				cur_addr, cur_addr + chunk_size);
4805 
4806 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4807 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4808 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4809 						lower_32_bits(cur_addr));
4810 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4811 						upper_32_bits(cur_addr));
4812 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4813 					chunk_size);
4814 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4815 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4816 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4817 
4818 			cur_addr += chunk_size;
4819 
4820 			if (cur_addr == prop->dram_end_address)
4821 				break;
4822 		}
4823 
4824 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4825 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4826 
4827 			rc = hl_poll_timeout(
4828 				hdev,
4829 				mmDMA0_CORE_STS0 + dma_offset,
4830 				val,
4831 				((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4832 				1000,
4833 				HBM_SCRUBBING_TIMEOUT_US);
4834 
4835 			if (rc) {
4836 				dev_err(hdev->dev,
4837 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4838 					dma_id);
4839 				return -EIO;
4840 			}
4841 		}
4842 	}
4843 
4844 	return 0;
4845 }
4846 
gaudi_scrub_device_mem(struct hl_device * hdev,u64 addr,u64 size)4847 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4848 {
4849 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4850 	struct gaudi_device *gaudi = hdev->asic_specific;
4851 	int rc = 0;
4852 	u64 val = 0;
4853 
4854 	if (!hdev->memory_scrub)
4855 		return 0;
4856 
4857 	if (!addr && !size) {
4858 		/* Wait till device is idle */
4859 		rc = hl_poll_timeout(
4860 				hdev,
4861 				mmDMA0_CORE_STS0/* dummy */,
4862 				val/* dummy */,
4863 				(hdev->asic_funcs->is_device_idle(hdev, NULL,
4864 						0, NULL)),
4865 						1000,
4866 						HBM_SCRUBBING_TIMEOUT_US);
4867 		if (rc) {
4868 			dev_err(hdev->dev, "waiting for idle timeout\n");
4869 			return -EIO;
4870 		}
4871 
4872 		/* Scrub SRAM */
4873 		addr = prop->sram_user_base_address;
4874 		size = hdev->pldm ? 0x10000 :
4875 				(prop->sram_size - SRAM_USER_BASE_OFFSET);
4876 		val = 0x7777777777777777ull;
4877 
4878 		rc = gaudi_memset_device_memory(hdev, addr, size, val);
4879 		if (rc) {
4880 			dev_err(hdev->dev,
4881 				"Failed to clear SRAM in mem scrub all\n");
4882 			return rc;
4883 		}
4884 
4885 		mutex_lock(&gaudi->clk_gate_mutex);
4886 		hdev->asic_funcs->disable_clock_gating(hdev);
4887 
4888 		/* Scrub HBM using all DMA channels in parallel */
4889 		rc = gaudi_hbm_scrubbing(hdev);
4890 		if (rc)
4891 			dev_err(hdev->dev,
4892 				"Failed to clear HBM in mem scrub all\n");
4893 
4894 		hdev->asic_funcs->set_clock_gating(hdev);
4895 		mutex_unlock(&gaudi->clk_gate_mutex);
4896 	}
4897 
4898 	return rc;
4899 }
4900 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4901 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4902 				u32 queue_id, dma_addr_t *dma_handle,
4903 				u16 *queue_len)
4904 {
4905 	struct gaudi_device *gaudi = hdev->asic_specific;
4906 	struct gaudi_internal_qman_info *q;
4907 
4908 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4909 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4910 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4911 		return NULL;
4912 	}
4913 
4914 	q = &gaudi->internal_qmans[queue_id];
4915 	*dma_handle = q->pq_dma_addr;
4916 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4917 
4918 	return q->pq_kernel_addr;
4919 }
4920 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4921 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4922 				u16 len, u32 timeout, u64 *result)
4923 {
4924 	struct gaudi_device *gaudi = hdev->asic_specific;
4925 
4926 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4927 		if (result)
4928 			*result = 0;
4929 		return 0;
4930 	}
4931 
4932 	if (!timeout)
4933 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4934 
4935 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4936 						timeout, result);
4937 }
4938 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4939 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4940 {
4941 	struct packet_msg_prot *fence_pkt;
4942 	dma_addr_t pkt_dma_addr;
4943 	u32 fence_val, tmp, timeout_usec;
4944 	dma_addr_t fence_dma_addr;
4945 	u32 *fence_ptr;
4946 	int rc;
4947 
4948 	if (hdev->pldm)
4949 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4950 	else
4951 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4952 
4953 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4954 
4955 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956 							&fence_dma_addr);
4957 	if (!fence_ptr) {
4958 		dev_err(hdev->dev,
4959 			"Failed to allocate memory for H/W queue %d testing\n",
4960 			hw_queue_id);
4961 		return -ENOMEM;
4962 	}
4963 
4964 	*fence_ptr = 0;
4965 
4966 	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4967 					sizeof(struct packet_msg_prot),
4968 					GFP_KERNEL, &pkt_dma_addr);
4969 	if (!fence_pkt) {
4970 		dev_err(hdev->dev,
4971 			"Failed to allocate packet for H/W queue %d testing\n",
4972 			hw_queue_id);
4973 		rc = -ENOMEM;
4974 		goto free_fence_ptr;
4975 	}
4976 
4977 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4978 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4979 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4980 
4981 	fence_pkt->ctl = cpu_to_le32(tmp);
4982 	fence_pkt->value = cpu_to_le32(fence_val);
4983 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4984 
4985 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4986 					sizeof(struct packet_msg_prot),
4987 					pkt_dma_addr);
4988 	if (rc) {
4989 		dev_err(hdev->dev,
4990 			"Failed to send fence packet to H/W queue %d\n",
4991 			hw_queue_id);
4992 		goto free_pkt;
4993 	}
4994 
4995 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4996 					1000, timeout_usec, true);
4997 
4998 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4999 
5000 	if (rc == -ETIMEDOUT) {
5001 		dev_err(hdev->dev,
5002 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5003 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5004 		rc = -EIO;
5005 	}
5006 
5007 free_pkt:
5008 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5009 					pkt_dma_addr);
5010 free_fence_ptr:
5011 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5012 					fence_dma_addr);
5013 	return rc;
5014 }
5015 
gaudi_test_cpu_queue(struct hl_device * hdev)5016 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5017 {
5018 	struct gaudi_device *gaudi = hdev->asic_specific;
5019 
5020 	/*
5021 	 * check capability here as send_cpu_message() won't update the result
5022 	 * value if no capability
5023 	 */
5024 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5025 		return 0;
5026 
5027 	return hl_fw_test_cpu_queue(hdev);
5028 }
5029 
gaudi_test_queues(struct hl_device * hdev)5030 static int gaudi_test_queues(struct hl_device *hdev)
5031 {
5032 	int i, rc, ret_val = 0;
5033 
5034 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5035 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5036 			rc = gaudi_test_queue(hdev, i);
5037 			if (rc)
5038 				ret_val = -EINVAL;
5039 		}
5040 	}
5041 
5042 	rc = gaudi_test_cpu_queue(hdev);
5043 	if (rc)
5044 		ret_val = -EINVAL;
5045 
5046 	return ret_val;
5047 }
5048 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)5049 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5050 		gfp_t mem_flags, dma_addr_t *dma_handle)
5051 {
5052 	void *kernel_addr;
5053 
5054 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
5055 		return NULL;
5056 
5057 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5058 
5059 	/* Shift to the device's base physical address of host memory */
5060 	if (kernel_addr)
5061 		*dma_handle += HOST_PHYS_BASE;
5062 
5063 	return kernel_addr;
5064 }
5065 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)5066 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5067 			dma_addr_t dma_addr)
5068 {
5069 	/* Cancel the device's base physical address of host memory */
5070 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5071 
5072 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5073 }
5074 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)5075 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5076 					size_t size, dma_addr_t *dma_handle)
5077 {
5078 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5079 }
5080 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)5081 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5082 						size_t size, void *vaddr)
5083 {
5084 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5085 }
5086 
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5087 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5088 			int nents, enum dma_data_direction dir)
5089 {
5090 	struct scatterlist *sg;
5091 	int i;
5092 
5093 	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5094 		return -ENOMEM;
5095 
5096 	/* Shift to the device's base physical address of host memory */
5097 	for_each_sg(sgl, sg, nents, i)
5098 		sg->dma_address += HOST_PHYS_BASE;
5099 
5100 	return 0;
5101 }
5102 
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5103 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5104 			int nents, enum dma_data_direction dir)
5105 {
5106 	struct scatterlist *sg;
5107 	int i;
5108 
5109 	/* Cancel the device's base physical address of host memory */
5110 	for_each_sg(sgl, sg, nents, i)
5111 		sg->dma_address -= HOST_PHYS_BASE;
5112 
5113 	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5114 }
5115 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)5116 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5117 					struct sg_table *sgt)
5118 {
5119 	struct scatterlist *sg, *sg_next_iter;
5120 	u32 count, dma_desc_cnt;
5121 	u64 len, len_next;
5122 	dma_addr_t addr, addr_next;
5123 
5124 	dma_desc_cnt = 0;
5125 
5126 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5127 
5128 		len = sg_dma_len(sg);
5129 		addr = sg_dma_address(sg);
5130 
5131 		if (len == 0)
5132 			break;
5133 
5134 		while ((count + 1) < sgt->nents) {
5135 			sg_next_iter = sg_next(sg);
5136 			len_next = sg_dma_len(sg_next_iter);
5137 			addr_next = sg_dma_address(sg_next_iter);
5138 
5139 			if (len_next == 0)
5140 				break;
5141 
5142 			if ((addr + len == addr_next) &&
5143 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5144 				len += len_next;
5145 				count++;
5146 				sg = sg_next_iter;
5147 			} else {
5148 				break;
5149 			}
5150 		}
5151 
5152 		dma_desc_cnt++;
5153 	}
5154 
5155 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
5156 }
5157 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)5158 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5159 				struct hl_cs_parser *parser,
5160 				struct packet_lin_dma *user_dma_pkt,
5161 				u64 addr, enum dma_data_direction dir)
5162 {
5163 	struct hl_userptr *userptr;
5164 	int rc;
5165 
5166 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5167 			parser->job_userptr_list, &userptr))
5168 		goto already_pinned;
5169 
5170 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5171 	if (!userptr)
5172 		return -ENOMEM;
5173 
5174 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5175 				userptr);
5176 	if (rc)
5177 		goto free_userptr;
5178 
5179 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
5180 
5181 	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5182 					userptr->sgt->nents, dir);
5183 	if (rc) {
5184 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5185 		goto unpin_memory;
5186 	}
5187 
5188 	userptr->dma_mapped = true;
5189 	userptr->dir = dir;
5190 
5191 already_pinned:
5192 	parser->patched_cb_size +=
5193 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5194 
5195 	return 0;
5196 
5197 unpin_memory:
5198 	list_del(&userptr->job_node);
5199 	hl_unpin_host_memory(hdev, userptr);
5200 free_userptr:
5201 	kfree(userptr);
5202 	return rc;
5203 }
5204 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)5205 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5206 				struct hl_cs_parser *parser,
5207 				struct packet_lin_dma *user_dma_pkt,
5208 				bool src_in_host)
5209 {
5210 	enum dma_data_direction dir;
5211 	bool skip_host_mem_pin = false, user_memset;
5212 	u64 addr;
5213 	int rc = 0;
5214 
5215 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5216 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5217 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218 
5219 	if (src_in_host) {
5220 		if (user_memset)
5221 			skip_host_mem_pin = true;
5222 
5223 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5224 		dir = DMA_TO_DEVICE;
5225 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5226 	} else {
5227 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5228 		dir = DMA_FROM_DEVICE;
5229 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5230 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5231 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5232 	}
5233 
5234 	if (skip_host_mem_pin)
5235 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5236 	else
5237 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5238 						addr, dir);
5239 
5240 	return rc;
5241 }
5242 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5243 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5244 				struct hl_cs_parser *parser,
5245 				struct packet_lin_dma *user_dma_pkt)
5246 {
5247 	bool src_in_host = false;
5248 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5249 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5250 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5251 
5252 	dev_dbg(hdev->dev, "DMA packet details:\n");
5253 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5254 				le64_to_cpu(user_dma_pkt->src_addr));
5255 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5256 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5257 
5258 	/*
5259 	 * Special handling for DMA with size 0. Bypass all validations
5260 	 * because no transactions will be done except for WR_COMP, which
5261 	 * is not a security issue
5262 	 */
5263 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5264 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5265 		return 0;
5266 	}
5267 
5268 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5269 		src_in_host = true;
5270 
5271 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5272 						src_in_host);
5273 }
5274 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5275 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5276 					struct hl_cs_parser *parser,
5277 					struct packet_load_and_exe *user_pkt)
5278 {
5279 	u32 cfg;
5280 
5281 	cfg = le32_to_cpu(user_pkt->cfg);
5282 
5283 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5284 		dev_err(hdev->dev,
5285 			"User not allowed to use Load and Execute\n");
5286 		return -EPERM;
5287 	}
5288 
5289 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5290 
5291 	return 0;
5292 }
5293 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5294 static int gaudi_validate_cb(struct hl_device *hdev,
5295 			struct hl_cs_parser *parser, bool is_mmu)
5296 {
5297 	u32 cb_parsed_length = 0;
5298 	int rc = 0;
5299 
5300 	parser->patched_cb_size = 0;
5301 
5302 	/* cb_user_size is more than 0 so loop will always be executed */
5303 	while (cb_parsed_length < parser->user_cb_size) {
5304 		enum packet_id pkt_id;
5305 		u16 pkt_size;
5306 		struct gaudi_packet *user_pkt;
5307 
5308 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5309 
5310 		pkt_id = (enum packet_id) (
5311 				(le64_to_cpu(user_pkt->header) &
5312 				PACKET_HEADER_PACKET_ID_MASK) >>
5313 					PACKET_HEADER_PACKET_ID_SHIFT);
5314 
5315 		if (!validate_packet_id(pkt_id)) {
5316 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5317 			rc = -EINVAL;
5318 			break;
5319 		}
5320 
5321 		pkt_size = gaudi_packet_sizes[pkt_id];
5322 		cb_parsed_length += pkt_size;
5323 		if (cb_parsed_length > parser->user_cb_size) {
5324 			dev_err(hdev->dev,
5325 				"packet 0x%x is out of CB boundary\n", pkt_id);
5326 			rc = -EINVAL;
5327 			break;
5328 		}
5329 
5330 		switch (pkt_id) {
5331 		case PACKET_MSG_PROT:
5332 			dev_err(hdev->dev,
5333 				"User not allowed to use MSG_PROT\n");
5334 			rc = -EPERM;
5335 			break;
5336 
5337 		case PACKET_CP_DMA:
5338 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5339 			rc = -EPERM;
5340 			break;
5341 
5342 		case PACKET_STOP:
5343 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5344 			rc = -EPERM;
5345 			break;
5346 
5347 		case PACKET_WREG_BULK:
5348 			dev_err(hdev->dev,
5349 				"User not allowed to use WREG_BULK\n");
5350 			rc = -EPERM;
5351 			break;
5352 
5353 		case PACKET_LOAD_AND_EXE:
5354 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5355 				(struct packet_load_and_exe *) user_pkt);
5356 			break;
5357 
5358 		case PACKET_LIN_DMA:
5359 			parser->contains_dma_pkt = true;
5360 			if (is_mmu)
5361 				parser->patched_cb_size += pkt_size;
5362 			else
5363 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5364 					(struct packet_lin_dma *) user_pkt);
5365 			break;
5366 
5367 		case PACKET_WREG_32:
5368 		case PACKET_MSG_LONG:
5369 		case PACKET_MSG_SHORT:
5370 		case PACKET_REPEAT:
5371 		case PACKET_FENCE:
5372 		case PACKET_NOP:
5373 		case PACKET_ARB_POINT:
5374 			parser->patched_cb_size += pkt_size;
5375 			break;
5376 
5377 		default:
5378 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5379 				pkt_id);
5380 			rc = -EINVAL;
5381 			break;
5382 		}
5383 
5384 		if (rc)
5385 			break;
5386 	}
5387 
5388 	/*
5389 	 * The new CB should have space at the end for two MSG_PROT packets:
5390 	 * 1. A packet that will act as a completion packet
5391 	 * 2. A packet that will generate MSI-X interrupt
5392 	 */
5393 	if (parser->completion)
5394 		parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5395 
5396 	return rc;
5397 }
5398 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5399 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5400 				struct hl_cs_parser *parser,
5401 				struct packet_lin_dma *user_dma_pkt,
5402 				struct packet_lin_dma *new_dma_pkt,
5403 				u32 *new_dma_pkt_size)
5404 {
5405 	struct hl_userptr *userptr;
5406 	struct scatterlist *sg, *sg_next_iter;
5407 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5408 	u64 len, len_next;
5409 	dma_addr_t dma_addr, dma_addr_next;
5410 	u64 device_memory_addr, addr;
5411 	enum dma_data_direction dir;
5412 	struct sg_table *sgt;
5413 	bool src_in_host = false;
5414 	bool skip_host_mem_pin = false;
5415 	bool user_memset;
5416 
5417 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5418 
5419 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5420 		src_in_host = true;
5421 
5422 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5423 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5424 
5425 	if (src_in_host) {
5426 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5427 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5428 		dir = DMA_TO_DEVICE;
5429 		if (user_memset)
5430 			skip_host_mem_pin = true;
5431 	} else {
5432 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5433 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5434 		dir = DMA_FROM_DEVICE;
5435 	}
5436 
5437 	if ((!skip_host_mem_pin) &&
5438 		(!hl_userptr_is_pinned(hdev, addr,
5439 					le32_to_cpu(user_dma_pkt->tsize),
5440 					parser->job_userptr_list, &userptr))) {
5441 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5442 				addr, user_dma_pkt->tsize);
5443 		return -EFAULT;
5444 	}
5445 
5446 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5447 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5448 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5449 		return 0;
5450 	}
5451 
5452 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5453 
5454 	sgt = userptr->sgt;
5455 	dma_desc_cnt = 0;
5456 
5457 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5458 		len = sg_dma_len(sg);
5459 		dma_addr = sg_dma_address(sg);
5460 
5461 		if (len == 0)
5462 			break;
5463 
5464 		while ((count + 1) < sgt->nents) {
5465 			sg_next_iter = sg_next(sg);
5466 			len_next = sg_dma_len(sg_next_iter);
5467 			dma_addr_next = sg_dma_address(sg_next_iter);
5468 
5469 			if (len_next == 0)
5470 				break;
5471 
5472 			if ((dma_addr + len == dma_addr_next) &&
5473 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5474 				len += len_next;
5475 				count++;
5476 				sg = sg_next_iter;
5477 			} else {
5478 				break;
5479 			}
5480 		}
5481 
5482 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5483 		if (likely(dma_desc_cnt))
5484 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5485 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5486 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5487 		new_dma_pkt->tsize = cpu_to_le32(len);
5488 
5489 		if (dir == DMA_TO_DEVICE) {
5490 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5491 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5492 		} else {
5493 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5494 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5495 		}
5496 
5497 		if (!user_memset)
5498 			device_memory_addr += len;
5499 		dma_desc_cnt++;
5500 		new_dma_pkt++;
5501 	}
5502 
5503 	if (!dma_desc_cnt) {
5504 		dev_err(hdev->dev,
5505 			"Error of 0 SG entries when patching DMA packet\n");
5506 		return -EFAULT;
5507 	}
5508 
5509 	/* Fix the last dma packet - wrcomp must be as user set it */
5510 	new_dma_pkt--;
5511 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5512 
5513 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5514 
5515 	return 0;
5516 }
5517 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5518 static int gaudi_patch_cb(struct hl_device *hdev,
5519 				struct hl_cs_parser *parser)
5520 {
5521 	u32 cb_parsed_length = 0;
5522 	u32 cb_patched_cur_length = 0;
5523 	int rc = 0;
5524 
5525 	/* cb_user_size is more than 0 so loop will always be executed */
5526 	while (cb_parsed_length < parser->user_cb_size) {
5527 		enum packet_id pkt_id;
5528 		u16 pkt_size;
5529 		u32 new_pkt_size = 0;
5530 		struct gaudi_packet *user_pkt, *kernel_pkt;
5531 
5532 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5533 		kernel_pkt = parser->patched_cb->kernel_address +
5534 					cb_patched_cur_length;
5535 
5536 		pkt_id = (enum packet_id) (
5537 				(le64_to_cpu(user_pkt->header) &
5538 				PACKET_HEADER_PACKET_ID_MASK) >>
5539 					PACKET_HEADER_PACKET_ID_SHIFT);
5540 
5541 		if (!validate_packet_id(pkt_id)) {
5542 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5543 			rc = -EINVAL;
5544 			break;
5545 		}
5546 
5547 		pkt_size = gaudi_packet_sizes[pkt_id];
5548 		cb_parsed_length += pkt_size;
5549 		if (cb_parsed_length > parser->user_cb_size) {
5550 			dev_err(hdev->dev,
5551 				"packet 0x%x is out of CB boundary\n", pkt_id);
5552 			rc = -EINVAL;
5553 			break;
5554 		}
5555 
5556 		switch (pkt_id) {
5557 		case PACKET_LIN_DMA:
5558 			rc = gaudi_patch_dma_packet(hdev, parser,
5559 					(struct packet_lin_dma *) user_pkt,
5560 					(struct packet_lin_dma *) kernel_pkt,
5561 					&new_pkt_size);
5562 			cb_patched_cur_length += new_pkt_size;
5563 			break;
5564 
5565 		case PACKET_MSG_PROT:
5566 			dev_err(hdev->dev,
5567 				"User not allowed to use MSG_PROT\n");
5568 			rc = -EPERM;
5569 			break;
5570 
5571 		case PACKET_CP_DMA:
5572 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5573 			rc = -EPERM;
5574 			break;
5575 
5576 		case PACKET_STOP:
5577 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5578 			rc = -EPERM;
5579 			break;
5580 
5581 		case PACKET_WREG_32:
5582 		case PACKET_WREG_BULK:
5583 		case PACKET_MSG_LONG:
5584 		case PACKET_MSG_SHORT:
5585 		case PACKET_REPEAT:
5586 		case PACKET_FENCE:
5587 		case PACKET_NOP:
5588 		case PACKET_ARB_POINT:
5589 		case PACKET_LOAD_AND_EXE:
5590 			memcpy(kernel_pkt, user_pkt, pkt_size);
5591 			cb_patched_cur_length += pkt_size;
5592 			break;
5593 
5594 		default:
5595 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5596 				pkt_id);
5597 			rc = -EINVAL;
5598 			break;
5599 		}
5600 
5601 		if (rc)
5602 			break;
5603 	}
5604 
5605 	return rc;
5606 }
5607 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5608 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5609 		struct hl_cs_parser *parser)
5610 {
5611 	u64 patched_cb_handle;
5612 	u32 patched_cb_size;
5613 	struct hl_cb *user_cb;
5614 	int rc;
5615 
5616 	/*
5617 	 * The new CB should have space at the end for two MSG_PROT pkt:
5618 	 * 1. A packet that will act as a completion packet
5619 	 * 2. A packet that will generate MSI interrupt
5620 	 */
5621 	if (parser->completion)
5622 		parser->patched_cb_size = parser->user_cb_size +
5623 				sizeof(struct packet_msg_prot) * 2;
5624 	else
5625 		parser->patched_cb_size = parser->user_cb_size;
5626 
5627 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5628 				parser->patched_cb_size, false, false,
5629 				&patched_cb_handle);
5630 
5631 	if (rc) {
5632 		dev_err(hdev->dev,
5633 			"Failed to allocate patched CB for DMA CS %d\n",
5634 			rc);
5635 		return rc;
5636 	}
5637 
5638 	patched_cb_handle >>= PAGE_SHIFT;
5639 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5640 				(u32) patched_cb_handle);
5641 	/* hl_cb_get should never fail */
5642 	if (!parser->patched_cb) {
5643 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5644 			(u32) patched_cb_handle);
5645 		rc = -EFAULT;
5646 		goto out;
5647 	}
5648 
5649 	/*
5650 	 * The check that parser->user_cb_size <= parser->user_cb->size was done
5651 	 * in validate_queue_index().
5652 	 */
5653 	memcpy(parser->patched_cb->kernel_address,
5654 		parser->user_cb->kernel_address,
5655 		parser->user_cb_size);
5656 
5657 	patched_cb_size = parser->patched_cb_size;
5658 
5659 	/* Validate patched CB instead of user CB */
5660 	user_cb = parser->user_cb;
5661 	parser->user_cb = parser->patched_cb;
5662 	rc = gaudi_validate_cb(hdev, parser, true);
5663 	parser->user_cb = user_cb;
5664 
5665 	if (rc) {
5666 		hl_cb_put(parser->patched_cb);
5667 		goto out;
5668 	}
5669 
5670 	if (patched_cb_size != parser->patched_cb_size) {
5671 		dev_err(hdev->dev, "user CB size mismatch\n");
5672 		hl_cb_put(parser->patched_cb);
5673 		rc = -EINVAL;
5674 		goto out;
5675 	}
5676 
5677 out:
5678 	/*
5679 	 * Always call cb destroy here because we still have 1 reference
5680 	 * to it by calling cb_get earlier. After the job will be completed,
5681 	 * cb_put will release it, but here we want to remove it from the
5682 	 * idr
5683 	 */
5684 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5685 					patched_cb_handle << PAGE_SHIFT);
5686 
5687 	return rc;
5688 }
5689 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5690 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5691 		struct hl_cs_parser *parser)
5692 {
5693 	u64 patched_cb_handle;
5694 	int rc;
5695 
5696 	rc = gaudi_validate_cb(hdev, parser, false);
5697 
5698 	if (rc)
5699 		goto free_userptr;
5700 
5701 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5702 				parser->patched_cb_size, false, false,
5703 				&patched_cb_handle);
5704 	if (rc) {
5705 		dev_err(hdev->dev,
5706 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5707 		goto free_userptr;
5708 	}
5709 
5710 	patched_cb_handle >>= PAGE_SHIFT;
5711 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5712 				(u32) patched_cb_handle);
5713 	/* hl_cb_get should never fail here */
5714 	if (!parser->patched_cb) {
5715 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5716 				(u32) patched_cb_handle);
5717 		rc = -EFAULT;
5718 		goto out;
5719 	}
5720 
5721 	rc = gaudi_patch_cb(hdev, parser);
5722 
5723 	if (rc)
5724 		hl_cb_put(parser->patched_cb);
5725 
5726 out:
5727 	/*
5728 	 * Always call cb destroy here because we still have 1 reference
5729 	 * to it by calling cb_get earlier. After the job will be completed,
5730 	 * cb_put will release it, but here we want to remove it from the
5731 	 * idr
5732 	 */
5733 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5734 				patched_cb_handle << PAGE_SHIFT);
5735 
5736 free_userptr:
5737 	if (rc)
5738 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5739 	return rc;
5740 }
5741 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5742 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5743 					struct hl_cs_parser *parser)
5744 {
5745 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5746 	struct gaudi_device *gaudi = hdev->asic_specific;
5747 	u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5748 		((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5749 
5750 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5751 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5752 			(!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5753 		dev_err(hdev->dev, "h/w queue %d is disabled\n",
5754 				parser->hw_queue_id);
5755 		return -EINVAL;
5756 	}
5757 
5758 	/* For internal queue jobs just check if CB address is valid */
5759 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5760 					parser->user_cb_size,
5761 					asic_prop->sram_user_base_address,
5762 					asic_prop->sram_end_address))
5763 		return 0;
5764 
5765 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5766 					parser->user_cb_size,
5767 					asic_prop->dram_user_base_address,
5768 					asic_prop->dram_end_address))
5769 		return 0;
5770 
5771 	/* PMMU and HPMMU addresses are equal, check only one of them */
5772 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5773 					parser->user_cb_size,
5774 					asic_prop->pmmu.start_addr,
5775 					asic_prop->pmmu.end_addr))
5776 		return 0;
5777 
5778 	dev_err(hdev->dev,
5779 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5780 		parser->user_cb, parser->user_cb_size);
5781 
5782 	return -EFAULT;
5783 }
5784 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5785 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5786 {
5787 	struct gaudi_device *gaudi = hdev->asic_specific;
5788 
5789 	if (parser->queue_type == QUEUE_TYPE_INT)
5790 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5791 
5792 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5793 		return gaudi_parse_cb_mmu(hdev, parser);
5794 	else
5795 		return gaudi_parse_cb_no_mmu(hdev, parser);
5796 }
5797 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5798 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5799 					void *kernel_address, u32 len,
5800 					u64 cq_addr, u32 cq_val, u32 msi_vec,
5801 					bool eb)
5802 {
5803 	struct gaudi_device *gaudi = hdev->asic_specific;
5804 	struct packet_msg_prot *cq_pkt;
5805 	u64 msi_addr;
5806 	u32 tmp;
5807 
5808 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5809 
5810 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5811 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5812 
5813 	if (eb)
5814 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5815 
5816 	cq_pkt->ctl = cpu_to_le32(tmp);
5817 	cq_pkt->value = cpu_to_le32(cq_val);
5818 	cq_pkt->addr = cpu_to_le64(cq_addr);
5819 
5820 	cq_pkt++;
5821 
5822 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5823 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5824 	cq_pkt->ctl = cpu_to_le32(tmp);
5825 	cq_pkt->value = cpu_to_le32(1);
5826 
5827 	if (gaudi->multi_msi_mode)
5828 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5829 	else
5830 		msi_addr = mmPCIE_CORE_MSI_REQ;
5831 
5832 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5833 }
5834 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5835 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5836 {
5837 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5838 }
5839 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5840 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5841 					u32 size, u64 val)
5842 {
5843 	struct packet_lin_dma *lin_dma_pkt;
5844 	struct hl_cs_job *job;
5845 	u32 cb_size, ctl, err_cause;
5846 	struct hl_cb *cb;
5847 	u64 id;
5848 	int rc;
5849 
5850 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5851 	if (!cb)
5852 		return -EFAULT;
5853 
5854 	lin_dma_pkt = cb->kernel_address;
5855 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5856 	cb_size = sizeof(*lin_dma_pkt);
5857 
5858 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5859 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5860 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5861 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5862 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5863 
5864 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5865 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5866 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5867 	lin_dma_pkt->tsize = cpu_to_le32(size);
5868 
5869 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5870 	if (!job) {
5871 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5872 		rc = -ENOMEM;
5873 		goto release_cb;
5874 	}
5875 
5876 	/* Verify DMA is OK */
5877 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5878 	if (err_cause && !hdev->init_done) {
5879 		dev_dbg(hdev->dev,
5880 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5881 			err_cause);
5882 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5883 	}
5884 
5885 	job->id = 0;
5886 	job->user_cb = cb;
5887 	atomic_inc(&job->user_cb->cs_cnt);
5888 	job->user_cb_size = cb_size;
5889 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5890 	job->patched_cb = job->user_cb;
5891 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5892 
5893 	hl_debugfs_add_job(hdev, job);
5894 
5895 	rc = gaudi_send_job_on_qman0(hdev, job);
5896 	hl_debugfs_remove_job(hdev, job);
5897 	kfree(job);
5898 	atomic_dec(&cb->cs_cnt);
5899 
5900 	/* Verify DMA is OK */
5901 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5902 	if (err_cause) {
5903 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5904 		rc = -EIO;
5905 		if (!hdev->init_done) {
5906 			dev_dbg(hdev->dev,
5907 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5908 				err_cause);
5909 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5910 		}
5911 	}
5912 
5913 release_cb:
5914 	id = cb->id;
5915 	hl_cb_put(cb);
5916 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5917 
5918 	return rc;
5919 }
5920 
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5921 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5922 					u32 num_regs, u32 val)
5923 {
5924 	struct packet_msg_long *pkt;
5925 	struct hl_cs_job *job;
5926 	u32 cb_size, ctl;
5927 	struct hl_cb *cb;
5928 	int i, rc;
5929 
5930 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5931 
5932 	if (cb_size > SZ_2M) {
5933 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5934 		return -ENOMEM;
5935 	}
5936 
5937 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5938 	if (!cb)
5939 		return -EFAULT;
5940 
5941 	pkt = cb->kernel_address;
5942 
5943 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5944 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5945 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5946 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5947 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5948 
5949 	for (i = 0; i < num_regs ; i++, pkt++) {
5950 		pkt->ctl = cpu_to_le32(ctl);
5951 		pkt->value = cpu_to_le32(val);
5952 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5953 	}
5954 
5955 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5956 	if (!job) {
5957 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5958 		rc = -ENOMEM;
5959 		goto release_cb;
5960 	}
5961 
5962 	job->id = 0;
5963 	job->user_cb = cb;
5964 	atomic_inc(&job->user_cb->cs_cnt);
5965 	job->user_cb_size = cb_size;
5966 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5967 	job->patched_cb = job->user_cb;
5968 	job->job_cb_size = cb_size;
5969 
5970 	hl_debugfs_add_job(hdev, job);
5971 
5972 	rc = gaudi_send_job_on_qman0(hdev, job);
5973 	hl_debugfs_remove_job(hdev, job);
5974 	kfree(job);
5975 	atomic_dec(&cb->cs_cnt);
5976 
5977 release_cb:
5978 	hl_cb_put(cb);
5979 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5980 
5981 	return rc;
5982 }
5983 
gaudi_restore_sm_registers(struct hl_device * hdev)5984 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5985 {
5986 	u64 base_addr;
5987 	u32 num_regs;
5988 	int rc;
5989 
5990 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5991 	num_regs = NUM_OF_SOB_IN_BLOCK;
5992 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993 	if (rc) {
5994 		dev_err(hdev->dev, "failed resetting SM registers");
5995 		return -ENOMEM;
5996 	}
5997 
5998 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5999 	num_regs = NUM_OF_SOB_IN_BLOCK;
6000 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001 	if (rc) {
6002 		dev_err(hdev->dev, "failed resetting SM registers");
6003 		return -ENOMEM;
6004 	}
6005 
6006 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6007 	num_regs = NUM_OF_SOB_IN_BLOCK;
6008 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6009 	if (rc) {
6010 		dev_err(hdev->dev, "failed resetting SM registers");
6011 		return -ENOMEM;
6012 	}
6013 
6014 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6015 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
6016 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6017 	if (rc) {
6018 		dev_err(hdev->dev, "failed resetting SM registers");
6019 		return -ENOMEM;
6020 	}
6021 
6022 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6023 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
6024 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6025 	if (rc) {
6026 		dev_err(hdev->dev, "failed resetting SM registers");
6027 		return -ENOMEM;
6028 	}
6029 
6030 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6031 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
6032 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6033 	if (rc) {
6034 		dev_err(hdev->dev, "failed resetting SM registers");
6035 		return -ENOMEM;
6036 	}
6037 
6038 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6039 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6040 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6041 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6042 	if (rc) {
6043 		dev_err(hdev->dev, "failed resetting SM registers");
6044 		return -ENOMEM;
6045 	}
6046 
6047 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6048 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6049 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6050 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6051 	if (rc) {
6052 		dev_err(hdev->dev, "failed resetting SM registers");
6053 		return -ENOMEM;
6054 	}
6055 
6056 	return 0;
6057 }
6058 
gaudi_restore_dma_registers(struct hl_device * hdev)6059 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6060 {
6061 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6062 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6063 	int i;
6064 
6065 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6066 		u64 sob_addr = CFG_BASE +
6067 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6068 				(i * sob_delta);
6069 		u32 dma_offset = i * DMA_CORE_OFFSET;
6070 
6071 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6072 				lower_32_bits(sob_addr));
6073 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6074 				upper_32_bits(sob_addr));
6075 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6076 
6077 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6078 		 * modified by the user for SRAM reduction
6079 		 */
6080 		if (i > 1)
6081 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6082 								0x00000001);
6083 	}
6084 }
6085 
gaudi_restore_qm_registers(struct hl_device * hdev)6086 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6087 {
6088 	u32 qman_offset;
6089 	int i;
6090 
6091 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6092 		qman_offset = i * DMA_QMAN_OFFSET;
6093 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6094 	}
6095 
6096 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6097 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6098 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6099 	}
6100 
6101 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6102 		qman_offset = i * TPC_QMAN_OFFSET;
6103 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6104 	}
6105 
6106 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6107 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6108 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6109 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6110 	}
6111 }
6112 
gaudi_restore_user_registers(struct hl_device * hdev)6113 static int gaudi_restore_user_registers(struct hl_device *hdev)
6114 {
6115 	int rc;
6116 
6117 	rc = gaudi_restore_sm_registers(hdev);
6118 	if (rc)
6119 		return rc;
6120 
6121 	gaudi_restore_dma_registers(hdev);
6122 	gaudi_restore_qm_registers(hdev);
6123 
6124 	return 0;
6125 }
6126 
gaudi_context_switch(struct hl_device * hdev,u32 asid)6127 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6128 {
6129 	return 0;
6130 }
6131 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)6132 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6133 {
6134 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6135 	struct gaudi_device *gaudi = hdev->asic_specific;
6136 	u64 addr = prop->mmu_pgt_addr;
6137 	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6138 
6139 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6140 		return 0;
6141 
6142 	return gaudi_memset_device_memory(hdev, addr, size, 0);
6143 }
6144 
gaudi_restore_phase_topology(struct hl_device * hdev)6145 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6146 {
6147 
6148 }
6149 
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,bool user_address,u32 * val)6150 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6151 			bool user_address, u32 *val)
6152 {
6153 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6154 	struct gaudi_device *gaudi = hdev->asic_specific;
6155 	u64 hbm_bar_addr, host_phys_end;
6156 	int rc = 0;
6157 
6158 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6159 
6160 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6161 
6162 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6163 				(hdev->clock_gating_mask &
6164 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6165 
6166 			dev_err_ratelimited(hdev->dev,
6167 				"Can't read register - clock gating is enabled!\n");
6168 			rc = -EFAULT;
6169 		} else {
6170 			*val = RREG32(addr - CFG_BASE);
6171 		}
6172 
6173 	} else if ((addr >= SRAM_BASE_ADDR) &&
6174 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6175 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6176 				(addr - SRAM_BASE_ADDR));
6177 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6178 		u64 bar_base_addr = DRAM_PHYS_BASE +
6179 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6180 
6181 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6182 		if (hbm_bar_addr != U64_MAX) {
6183 			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6184 						(addr - bar_base_addr));
6185 
6186 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6187 						hbm_bar_addr);
6188 		}
6189 		if (hbm_bar_addr == U64_MAX)
6190 			rc = -EIO;
6191 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6192 			user_address && !iommu_present(&pci_bus_type)) {
6193 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6194 	} else {
6195 		rc = -EFAULT;
6196 	}
6197 
6198 	return rc;
6199 }
6200 
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,bool user_address,u32 val)6201 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6202 			bool user_address, u32 val)
6203 {
6204 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6205 	struct gaudi_device *gaudi = hdev->asic_specific;
6206 	u64 hbm_bar_addr, host_phys_end;
6207 	int rc = 0;
6208 
6209 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6210 
6211 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6212 
6213 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6214 				(hdev->clock_gating_mask &
6215 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6216 
6217 			dev_err_ratelimited(hdev->dev,
6218 				"Can't write register - clock gating is enabled!\n");
6219 			rc = -EFAULT;
6220 		} else {
6221 			WREG32(addr - CFG_BASE, val);
6222 		}
6223 
6224 	} else if ((addr >= SRAM_BASE_ADDR) &&
6225 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6226 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6227 					(addr - SRAM_BASE_ADDR));
6228 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6229 		u64 bar_base_addr = DRAM_PHYS_BASE +
6230 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6231 
6232 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6233 		if (hbm_bar_addr != U64_MAX) {
6234 			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6235 						(addr - bar_base_addr));
6236 
6237 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6238 						hbm_bar_addr);
6239 		}
6240 		if (hbm_bar_addr == U64_MAX)
6241 			rc = -EIO;
6242 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6243 			user_address && !iommu_present(&pci_bus_type)) {
6244 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6245 	} else {
6246 		rc = -EFAULT;
6247 	}
6248 
6249 	return rc;
6250 }
6251 
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,bool user_address,u64 * val)6252 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6253 				bool user_address, u64 *val)
6254 {
6255 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6256 	struct gaudi_device *gaudi = hdev->asic_specific;
6257 	u64 hbm_bar_addr, host_phys_end;
6258 	int rc = 0;
6259 
6260 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6261 
6262 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6263 
6264 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6265 				(hdev->clock_gating_mask &
6266 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6267 
6268 			dev_err_ratelimited(hdev->dev,
6269 				"Can't read register - clock gating is enabled!\n");
6270 			rc = -EFAULT;
6271 		} else {
6272 			u32 val_l = RREG32(addr - CFG_BASE);
6273 			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6274 
6275 			*val = (((u64) val_h) << 32) | val_l;
6276 		}
6277 
6278 	} else if ((addr >= SRAM_BASE_ADDR) &&
6279 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6280 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6281 				(addr - SRAM_BASE_ADDR));
6282 	} else if (addr <=
6283 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6284 		u64 bar_base_addr = DRAM_PHYS_BASE +
6285 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6286 
6287 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6288 		if (hbm_bar_addr != U64_MAX) {
6289 			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6290 						(addr - bar_base_addr));
6291 
6292 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6293 						hbm_bar_addr);
6294 		}
6295 		if (hbm_bar_addr == U64_MAX)
6296 			rc = -EIO;
6297 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6298 			user_address && !iommu_present(&pci_bus_type)) {
6299 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6300 	} else {
6301 		rc = -EFAULT;
6302 	}
6303 
6304 	return rc;
6305 }
6306 
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,bool user_address,u64 val)6307 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6308 				bool user_address, u64 val)
6309 {
6310 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6311 	struct gaudi_device *gaudi = hdev->asic_specific;
6312 	u64 hbm_bar_addr, host_phys_end;
6313 	int rc = 0;
6314 
6315 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6316 
6317 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6318 
6319 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6320 				(hdev->clock_gating_mask &
6321 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6322 
6323 			dev_err_ratelimited(hdev->dev,
6324 				"Can't write register - clock gating is enabled!\n");
6325 			rc = -EFAULT;
6326 		} else {
6327 			WREG32(addr - CFG_BASE, lower_32_bits(val));
6328 			WREG32(addr + sizeof(u32) - CFG_BASE,
6329 				upper_32_bits(val));
6330 		}
6331 
6332 	} else if ((addr >= SRAM_BASE_ADDR) &&
6333 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6334 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6335 					(addr - SRAM_BASE_ADDR));
6336 	} else if (addr <=
6337 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6338 		u64 bar_base_addr = DRAM_PHYS_BASE +
6339 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6340 
6341 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6342 		if (hbm_bar_addr != U64_MAX) {
6343 			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6344 						(addr - bar_base_addr));
6345 
6346 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6347 						hbm_bar_addr);
6348 		}
6349 		if (hbm_bar_addr == U64_MAX)
6350 			rc = -EIO;
6351 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6352 			user_address && !iommu_present(&pci_bus_type)) {
6353 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6354 	} else {
6355 		rc = -EFAULT;
6356 	}
6357 
6358 	return rc;
6359 }
6360 
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)6361 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6362 					u32 size_to_dma, dma_addr_t dma_addr)
6363 {
6364 	u32 err_cause, val;
6365 	u64 dma_offset;
6366 	int rc;
6367 
6368 	dma_offset = dma_id * DMA_CORE_OFFSET;
6369 
6370 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6371 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6372 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6373 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6374 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6375 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6376 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6377 
6378 	rc = hl_poll_timeout(
6379 		hdev,
6380 		mmDMA0_CORE_STS0 + dma_offset,
6381 		val,
6382 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6383 		0,
6384 		1000000);
6385 
6386 	if (rc) {
6387 		dev_err(hdev->dev,
6388 			"DMA %d timed-out during reading of 0x%llx\n",
6389 			dma_id, addr);
6390 		return -EIO;
6391 	}
6392 
6393 	/* Verify DMA is OK */
6394 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6395 	if (err_cause) {
6396 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6397 		dev_dbg(hdev->dev,
6398 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6399 			err_cause);
6400 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6401 
6402 		return -EIO;
6403 	}
6404 
6405 	return 0;
6406 }
6407 
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)6408 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6409 				void *blob_addr)
6410 {
6411 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6412 	struct gaudi_device *gaudi = hdev->asic_specific;
6413 	u64 dma_offset, qm_offset;
6414 	dma_addr_t dma_addr;
6415 	void *kernel_addr;
6416 	bool is_eng_idle;
6417 	int rc = 0, dma_id;
6418 
6419 	kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6420 						hdev, SZ_2M,
6421 						&dma_addr,
6422 						GFP_KERNEL | __GFP_ZERO);
6423 
6424 	if (!kernel_addr)
6425 		return -ENOMEM;
6426 
6427 	mutex_lock(&gaudi->clk_gate_mutex);
6428 
6429 	hdev->asic_funcs->disable_clock_gating(hdev);
6430 
6431 	hdev->asic_funcs->hw_queues_lock(hdev);
6432 
6433 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6434 	dma_offset = dma_id * DMA_CORE_OFFSET;
6435 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6436 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6437 	is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6438 
6439 	if (!is_eng_idle) {
6440 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6441 		dma_offset = dma_id * DMA_CORE_OFFSET;
6442 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6443 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6444 		is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6445 
6446 		if (!is_eng_idle) {
6447 			dev_err_ratelimited(hdev->dev,
6448 				"Can't read via DMA because it is BUSY\n");
6449 			rc = -EAGAIN;
6450 			goto out;
6451 		}
6452 	}
6453 
6454 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6455 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6456 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6457 
6458 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6459 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6460 	 * ASID
6461 	 */
6462 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6463 
6464 	/* Verify DMA is OK */
6465 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466 	if (err_cause) {
6467 		dev_dbg(hdev->dev,
6468 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6469 			err_cause);
6470 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6471 	}
6472 
6473 	pos = 0;
6474 	size_left = size;
6475 	size_to_dma = SZ_2M;
6476 
6477 	while (size_left > 0) {
6478 
6479 		if (size_left < SZ_2M)
6480 			size_to_dma = size_left;
6481 
6482 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6483 						dma_addr);
6484 		if (rc)
6485 			break;
6486 
6487 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6488 
6489 		if (size_left <= SZ_2M)
6490 			break;
6491 
6492 		pos += SZ_2M;
6493 		addr += SZ_2M;
6494 		size_left -= SZ_2M;
6495 	}
6496 
6497 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6498 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6499 	 * ASID
6500 	 */
6501 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6502 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6503 
6504 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6505 
6506 out:
6507 	hdev->asic_funcs->hw_queues_unlock(hdev);
6508 
6509 	hdev->asic_funcs->set_clock_gating(hdev);
6510 
6511 	mutex_unlock(&gaudi->clk_gate_mutex);
6512 
6513 	hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6514 						dma_addr);
6515 
6516 	return rc;
6517 }
6518 
gaudi_read_pte(struct hl_device * hdev,u64 addr)6519 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6520 {
6521 	struct gaudi_device *gaudi = hdev->asic_specific;
6522 
6523 	if (hdev->hard_reset_pending)
6524 		return U64_MAX;
6525 
6526 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6527 			(addr - gaudi->hbm_bar_cur_addr));
6528 }
6529 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6530 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6531 {
6532 	struct gaudi_device *gaudi = hdev->asic_specific;
6533 
6534 	if (hdev->hard_reset_pending)
6535 		return;
6536 
6537 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6538 			(addr - gaudi->hbm_bar_cur_addr));
6539 }
6540 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6541 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6542 {
6543 	/* mask to zero the MMBP and ASID bits */
6544 	WREG32_AND(reg, ~0x7FF);
6545 	WREG32_OR(reg, asid);
6546 }
6547 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6548 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6549 {
6550 	struct gaudi_device *gaudi = hdev->asic_specific;
6551 
6552 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6553 		return;
6554 
6555 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6556 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6557 		return;
6558 	}
6559 
6560 	mutex_lock(&gaudi->clk_gate_mutex);
6561 
6562 	hdev->asic_funcs->disable_clock_gating(hdev);
6563 
6564 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569 
6570 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6571 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6572 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6573 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6574 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575 
6576 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6577 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6578 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6579 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6580 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581 
6582 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6583 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6584 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6585 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6586 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587 
6588 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593 
6594 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6595 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6596 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6597 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6598 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6599 
6600 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605 
6606 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6607 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6608 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6609 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6610 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6611 
6612 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6613 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6614 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6615 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6616 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6617 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6618 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6619 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6620 
6621 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6622 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6623 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6624 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6625 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6626 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6627 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6628 
6629 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6630 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6631 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6632 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6633 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6634 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6635 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6636 
6637 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6638 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6639 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6640 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6641 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6642 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6643 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6644 
6645 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6646 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6647 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6648 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6649 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6650 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6651 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6652 
6653 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6654 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6655 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6656 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6657 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6658 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6659 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6660 
6661 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6662 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6663 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6664 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6665 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6666 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6667 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6668 
6669 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6670 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6671 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6672 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6673 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6674 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6675 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6676 
6677 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6678 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6679 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6680 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6681 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6682 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6683 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6684 
6685 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6686 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6687 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6688 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6689 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6690 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6691 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6692 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6693 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6694 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6695 
6696 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6697 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6698 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6699 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6700 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6701 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6702 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6703 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6704 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6705 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6706 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6707 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6708 
6709 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6710 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6711 				asid);
6712 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6713 				asid);
6714 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6715 				asid);
6716 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6717 				asid);
6718 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6719 				asid);
6720 	}
6721 
6722 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6723 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6724 				asid);
6725 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6726 				asid);
6727 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6728 				asid);
6729 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6730 				asid);
6731 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6732 				asid);
6733 	}
6734 
6735 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6736 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6737 				asid);
6738 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6739 				asid);
6740 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6741 				asid);
6742 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6743 				asid);
6744 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6745 				asid);
6746 	}
6747 
6748 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6749 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6750 				asid);
6751 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6752 				asid);
6753 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6754 				asid);
6755 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6756 				asid);
6757 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6758 				asid);
6759 	}
6760 
6761 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6762 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6763 				asid);
6764 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6765 				asid);
6766 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6767 				asid);
6768 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6769 				asid);
6770 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6771 				asid);
6772 	}
6773 
6774 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6775 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6776 				asid);
6777 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6778 				asid);
6779 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6780 				asid);
6781 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6782 				asid);
6783 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6784 				asid);
6785 	}
6786 
6787 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6788 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6789 				asid);
6790 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6791 				asid);
6792 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6793 				asid);
6794 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6795 				asid);
6796 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6797 				asid);
6798 	}
6799 
6800 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6801 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6802 				asid);
6803 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6804 				asid);
6805 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6806 				asid);
6807 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6808 				asid);
6809 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6810 				asid);
6811 	}
6812 
6813 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6814 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6815 				asid);
6816 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6817 				asid);
6818 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6819 				asid);
6820 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6821 				asid);
6822 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6823 				asid);
6824 	}
6825 
6826 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6827 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6828 				asid);
6829 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6830 				asid);
6831 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6832 				asid);
6833 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6834 				asid);
6835 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6836 				asid);
6837 	}
6838 
6839 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6840 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6841 
6842 	hdev->asic_funcs->set_clock_gating(hdev);
6843 
6844 	mutex_unlock(&gaudi->clk_gate_mutex);
6845 }
6846 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6847 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6848 		struct hl_cs_job *job)
6849 {
6850 	struct packet_msg_prot *fence_pkt;
6851 	u32 *fence_ptr;
6852 	dma_addr_t fence_dma_addr;
6853 	struct hl_cb *cb;
6854 	u32 tmp, timeout, dma_offset;
6855 	int rc;
6856 
6857 	if (hdev->pldm)
6858 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6859 	else
6860 		timeout = HL_DEVICE_TIMEOUT_USEC;
6861 
6862 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6863 		dev_err_ratelimited(hdev->dev,
6864 			"Can't send driver job on QMAN0 because the device is not idle\n");
6865 		return -EBUSY;
6866 	}
6867 
6868 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6869 							&fence_dma_addr);
6870 	if (!fence_ptr) {
6871 		dev_err(hdev->dev,
6872 			"Failed to allocate fence memory for QMAN0\n");
6873 		return -ENOMEM;
6874 	}
6875 
6876 	cb = job->patched_cb;
6877 
6878 	fence_pkt = cb->kernel_address +
6879 			job->job_cb_size - sizeof(struct packet_msg_prot);
6880 
6881 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6882 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6883 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6884 
6885 	fence_pkt->ctl = cpu_to_le32(tmp);
6886 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6887 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6888 
6889 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6890 
6891 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6892 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6893 
6894 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6895 					job->job_cb_size, cb->bus_address);
6896 	if (rc) {
6897 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6898 		goto free_fence_ptr;
6899 	}
6900 
6901 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6902 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6903 				timeout, true);
6904 
6905 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6906 
6907 	if (rc == -ETIMEDOUT) {
6908 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6909 		goto free_fence_ptr;
6910 	}
6911 
6912 free_fence_ptr:
6913 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6914 
6915 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6916 					fence_dma_addr);
6917 	return rc;
6918 }
6919 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6920 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6921 {
6922 	if (event_type >= GAUDI_EVENT_SIZE)
6923 		goto event_not_supported;
6924 
6925 	if (!gaudi_irq_map_table[event_type].valid)
6926 		goto event_not_supported;
6927 
6928 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6929 
6930 	return;
6931 
6932 event_not_supported:
6933 	snprintf(desc, size, "N/A");
6934 }
6935 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)6936 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6937 							u32 x_y, bool is_write)
6938 {
6939 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6940 
6941 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6942 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6943 
6944 	switch (x_y) {
6945 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6946 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6947 		dma_id[0] = 0;
6948 		dma_id[1] = 2;
6949 		break;
6950 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6951 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6952 		dma_id[0] = 1;
6953 		dma_id[1] = 3;
6954 		break;
6955 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6956 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6957 		dma_id[0] = 4;
6958 		dma_id[1] = 6;
6959 		break;
6960 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6961 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6962 		dma_id[0] = 5;
6963 		dma_id[1] = 7;
6964 		break;
6965 	default:
6966 		goto unknown_initiator;
6967 	}
6968 
6969 	for (i = 0 ; i < 2 ; i++) {
6970 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6971 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6972 	}
6973 
6974 	switch (x_y) {
6975 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6976 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6977 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6978 			return "DMA0";
6979 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6980 			return "DMA2";
6981 		else
6982 			return "DMA0 or DMA2";
6983 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6984 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6985 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6986 			return "DMA1";
6987 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6988 			return "DMA3";
6989 		else
6990 			return "DMA1 or DMA3";
6991 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6992 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6993 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6994 			return "DMA4";
6995 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6996 			return "DMA6";
6997 		else
6998 			return "DMA4 or DMA6";
6999 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7000 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7001 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7002 			return "DMA5";
7003 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7004 			return "DMA7";
7005 		else
7006 			return "DMA5 or DMA7";
7007 	}
7008 
7009 unknown_initiator:
7010 	return "unknown initiator";
7011 }
7012 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)7013 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7014 							bool is_write)
7015 {
7016 	u32 val, x_y, axi_id;
7017 
7018 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7019 				RREG32(mmMMU_UP_RAZWI_READ_ID);
7020 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7021 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7022 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7023 			RAZWI_INITIATOR_AXI_ID_SHIFT);
7024 
7025 	switch (x_y) {
7026 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7027 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7028 			return "TPC0";
7029 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7030 			return "NIC0";
7031 		break;
7032 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
7033 		return "TPC1";
7034 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7035 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7036 		return "MME0";
7037 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7038 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7039 		return "MME1";
7040 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
7041 		return "TPC2";
7042 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7043 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7044 			return "TPC3";
7045 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7046 			return "PCI";
7047 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7048 			return "CPU";
7049 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7050 			return "PSOC";
7051 		break;
7052 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7053 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7054 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7055 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7056 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7057 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7058 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7059 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7060 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7061 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7062 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7063 			return "TPC4";
7064 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7065 			return "NIC1";
7066 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7067 			return "NIC2";
7068 		break;
7069 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
7070 		return "TPC5";
7071 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7072 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7073 		return "MME2";
7074 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7075 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7076 		return "MME3";
7077 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
7078 		return "TPC6";
7079 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7080 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7081 			return "TPC7";
7082 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7083 			return "NIC4";
7084 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7085 			return "NIC5";
7086 		break;
7087 	default:
7088 		break;
7089 	}
7090 
7091 	dev_err(hdev->dev,
7092 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7093 		val,
7094 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7095 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7096 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7097 			RAZWI_INITIATOR_AXI_ID_MASK);
7098 
7099 	return "unknown initiator";
7100 }
7101 
gaudi_print_razwi_info(struct hl_device * hdev)7102 static void gaudi_print_razwi_info(struct hl_device *hdev)
7103 {
7104 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7105 		dev_err_ratelimited(hdev->dev,
7106 			"RAZWI event caused by illegal write of %s\n",
7107 			gaudi_get_razwi_initiator_name(hdev, true));
7108 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7109 	}
7110 
7111 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7112 		dev_err_ratelimited(hdev->dev,
7113 			"RAZWI event caused by illegal read of %s\n",
7114 			gaudi_get_razwi_initiator_name(hdev, false));
7115 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7116 	}
7117 }
7118 
gaudi_print_mmu_error_info(struct hl_device * hdev)7119 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7120 {
7121 	struct gaudi_device *gaudi = hdev->asic_specific;
7122 	u64 addr;
7123 	u32 val;
7124 
7125 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7126 		return;
7127 
7128 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7129 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7130 		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7131 		addr <<= 32;
7132 		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7133 
7134 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7135 					addr);
7136 
7137 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7138 	}
7139 
7140 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7141 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7142 		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7143 		addr <<= 32;
7144 		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7145 
7146 		dev_err_ratelimited(hdev->dev,
7147 				"MMU access error on va 0x%llx\n", addr);
7148 
7149 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7150 	}
7151 }
7152 
7153 /*
7154  *  +-------------------+------------------------------------------------------+
7155  *  | Configuration Reg |                     Description                      |
7156  *  |      Address      |                                                      |
7157  *  +-------------------+------------------------------------------------------+
7158  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7159  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7160  *  |                   |0xF34 memory wrappers 63:32                           |
7161  *  |                   |0xF38 memory wrappers 95:64                           |
7162  *  |                   |0xF3C memory wrappers 127:96                          |
7163  *  +-------------------+------------------------------------------------------+
7164  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7165  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7166  *  |                   |0xF44 memory wrappers 63:32                           |
7167  *  |                   |0xF48 memory wrappers 95:64                           |
7168  *  |                   |0xF4C memory wrappers 127:96                          |
7169  *  +-------------------+------------------------------------------------------+
7170  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)7171 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7172 		struct ecc_info_extract_params *params, u64 *ecc_address,
7173 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7174 {
7175 	struct gaudi_device *gaudi = hdev->asic_specific;
7176 	u32 i, num_mem_regs, reg, err_bit;
7177 	u64 err_addr, err_word = 0;
7178 	int rc = 0;
7179 
7180 	num_mem_regs = params->num_memories / 32 +
7181 			((params->num_memories % 32) ? 1 : 0);
7182 
7183 	if (params->block_address >= CFG_BASE)
7184 		params->block_address -= CFG_BASE;
7185 
7186 	if (params->derr)
7187 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7188 	else
7189 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7190 
7191 	if (params->disable_clock_gating) {
7192 		mutex_lock(&gaudi->clk_gate_mutex);
7193 		hdev->asic_funcs->disable_clock_gating(hdev);
7194 	}
7195 
7196 	/* Set invalid wrapper index */
7197 	*memory_wrapper_idx = 0xFF;
7198 
7199 	/* Iterate through memory wrappers, a single bit must be set */
7200 	for (i = 0 ; i < num_mem_regs ; i++) {
7201 		err_addr += i * 4;
7202 		err_word = RREG32(err_addr);
7203 		if (err_word) {
7204 			err_bit = __ffs(err_word);
7205 			*memory_wrapper_idx = err_bit + (32 * i);
7206 			break;
7207 		}
7208 	}
7209 
7210 	if (*memory_wrapper_idx == 0xFF) {
7211 		dev_err(hdev->dev, "ECC error information cannot be found\n");
7212 		rc = -EINVAL;
7213 		goto enable_clk_gate;
7214 	}
7215 
7216 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7217 			*memory_wrapper_idx);
7218 
7219 	*ecc_address =
7220 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7221 	*ecc_syndrom =
7222 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7223 
7224 	/* Clear error indication */
7225 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7226 	if (params->derr)
7227 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7228 	else
7229 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7230 
7231 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7232 
7233 enable_clk_gate:
7234 	if (params->disable_clock_gating) {
7235 		hdev->asic_funcs->set_clock_gating(hdev);
7236 
7237 		mutex_unlock(&gaudi->clk_gate_mutex);
7238 	}
7239 
7240 	return rc;
7241 }
7242 
7243 /*
7244  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7245  *
7246  * @idx: the current pi/ci value
7247  * @q_len: the queue length (power of 2)
7248  *
7249  * @return the cyclically decremented index
7250  */
gaudi_queue_idx_dec(u32 idx,u32 q_len)7251 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7252 {
7253 	u32 mask = q_len - 1;
7254 
7255 	/*
7256 	 * modular decrement is equivalent to adding (queue_size -1)
7257 	 * later we take LSBs to make sure the value is in the
7258 	 * range [0, queue_len - 1]
7259 	 */
7260 	return (idx + q_len - 1) & mask;
7261 }
7262 
7263 /**
7264  * gaudi_print_sw_config_stream_data - print SW config stream data
7265  *
7266  * @hdev: pointer to the habanalabs device structure
7267  * @stream: the QMAN's stream
7268  * @qman_base: base address of QMAN registers block
7269  */
gaudi_print_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base)7270 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7271 						u64 qman_base)
7272 {
7273 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7274 	u32 cq_ptr_lo_off, size;
7275 
7276 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7277 
7278 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7279 						stream * cq_ptr_lo_off;
7280 	cq_ptr_hi = cq_ptr_lo +
7281 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7282 	cq_tsize = cq_ptr_lo +
7283 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284 
7285 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7286 	size = RREG32(cq_tsize);
7287 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7288 							stream, cq_ptr, size);
7289 }
7290 
7291 /**
7292  * gaudi_print_last_pqes_on_err - print last PQEs on error
7293  *
7294  * @hdev: pointer to the habanalabs device structure
7295  * @qid_base: first QID of the QMAN (out of 4 streams)
7296  * @stream: the QMAN's stream
7297  * @qman_base: base address of QMAN registers block
7298  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7299  */
gaudi_print_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,bool pr_sw_conf)7300 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7301 						u32 stream, u64 qman_base,
7302 						bool pr_sw_conf)
7303 {
7304 	u32 ci, qm_ci_stream_off, queue_len;
7305 	struct hl_hw_queue *q;
7306 	u64 pq_ci;
7307 	int i;
7308 
7309 	q = &hdev->kernel_queues[qid_base + stream];
7310 
7311 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7312 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7313 						stream * qm_ci_stream_off;
7314 
7315 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7316 					q->int_queue_len : HL_QUEUE_LENGTH;
7317 
7318 	hdev->asic_funcs->hw_queues_lock(hdev);
7319 
7320 	if (pr_sw_conf)
7321 		gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7322 
7323 	ci = RREG32(pq_ci);
7324 
7325 	/* we should start printing form ci -1 */
7326 	ci = gaudi_queue_idx_dec(ci, queue_len);
7327 
7328 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7329 		struct hl_bd *bd;
7330 		u64 addr;
7331 		u32 len;
7332 
7333 		bd = q->kernel_address;
7334 		bd += ci;
7335 
7336 		len = le32_to_cpu(bd->len);
7337 		/* len 0 means uninitialized entry- break */
7338 		if (!len)
7339 			break;
7340 
7341 		addr = le64_to_cpu(bd->ptr);
7342 
7343 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7344 							stream, ci, addr, len);
7345 
7346 		/* get previous ci, wrap if needed */
7347 		ci = gaudi_queue_idx_dec(ci, queue_len);
7348 	}
7349 
7350 	hdev->asic_funcs->hw_queues_unlock(hdev);
7351 }
7352 
7353 /**
7354  * print_qman_data_on_err - extract QMAN data on error
7355  *
7356  * @hdev: pointer to the habanalabs device structure
7357  * @qid_base: first QID of the QMAN (out of 4 streams)
7358  * @stream: the QMAN's stream
7359  * @qman_base: base address of QMAN registers block
7360  *
7361  * This function attempt to exatract as much data as possible on QMAN error.
7362  * On upper CP print the SW config stream data and last 8 PQEs.
7363  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7364  */
print_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base)7365 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7366 						u32 stream, u64 qman_base)
7367 {
7368 	u32 i;
7369 
7370 	if (stream != QMAN_STREAMS) {
7371 		gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7372 									true);
7373 		return;
7374 	}
7375 
7376 	gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7377 
7378 	for (i = 0; i < QMAN_STREAMS; i++)
7379 		gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7380 									false);
7381 }
7382 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base)7383 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7384 					  const char *qm_name,
7385 					  u64 qman_base,
7386 					  u32 qid_base)
7387 {
7388 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7389 	u64 glbl_sts_addr, arb_err_addr;
7390 	char reg_desc[32];
7391 
7392 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7393 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7394 
7395 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7396 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7397 		glbl_sts_clr_val = 0;
7398 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7399 
7400 		if (!glbl_sts_val)
7401 			continue;
7402 
7403 		if (i == QMAN_STREAMS)
7404 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7405 		else
7406 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7407 
7408 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7409 			if (glbl_sts_val & BIT(j)) {
7410 				dev_err_ratelimited(hdev->dev,
7411 						"%s %s. err cause: %s\n",
7412 						qm_name, reg_desc,
7413 						gaudi_qman_error_cause[j]);
7414 				glbl_sts_clr_val |= BIT(j);
7415 			}
7416 		}
7417 
7418 		/* Write 1 clear errors */
7419 		if (!hdev->stop_on_err)
7420 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7421 		else
7422 			print_qman_data_on_err(hdev, qid_base, i, qman_base);
7423 	}
7424 
7425 	arb_err_val = RREG32(arb_err_addr);
7426 
7427 	if (!arb_err_val)
7428 		return;
7429 
7430 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7431 		if (arb_err_val & BIT(j)) {
7432 			dev_err_ratelimited(hdev->dev,
7433 					"%s ARB_ERR. err cause: %s\n",
7434 					qm_name,
7435 					gaudi_qman_arb_error_cause[j]);
7436 		}
7437 	}
7438 }
7439 
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7440 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7441 		struct hl_eq_sm_sei_data *sei_data)
7442 {
7443 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7444 
7445 	/* Flip the bits as the enum is ordered in the opposite way */
7446 	index = (index ^ 0x3) & 0x3;
7447 
7448 	switch (sei_data->sei_cause) {
7449 	case SM_SEI_SO_OVERFLOW:
7450 		dev_err_ratelimited(hdev->dev,
7451 			"%s SEI Error: SOB Group %u overflow/underflow",
7452 			gaudi_sync_manager_names[index],
7453 			le32_to_cpu(sei_data->sei_log));
7454 		break;
7455 	case SM_SEI_LBW_4B_UNALIGNED:
7456 		dev_err_ratelimited(hdev->dev,
7457 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7458 			gaudi_sync_manager_names[index],
7459 			le32_to_cpu(sei_data->sei_log));
7460 		break;
7461 	case SM_SEI_AXI_RESPONSE_ERR:
7462 		dev_err_ratelimited(hdev->dev,
7463 			"%s SEI Error: AXI ID %u response error",
7464 			gaudi_sync_manager_names[index],
7465 			le32_to_cpu(sei_data->sei_log));
7466 		break;
7467 	default:
7468 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7469 				le32_to_cpu(sei_data->sei_log));
7470 		break;
7471 	}
7472 }
7473 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7474 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7475 		struct hl_eq_ecc_data *ecc_data)
7476 {
7477 	struct ecc_info_extract_params params;
7478 	u64 ecc_address = 0, ecc_syndrom = 0;
7479 	u8 index, memory_wrapper_idx = 0;
7480 	bool extract_info_from_fw;
7481 	int rc;
7482 
7483 	if (hdev->asic_prop.fw_security_enabled) {
7484 		extract_info_from_fw = true;
7485 		goto extract_ecc_info;
7486 	}
7487 
7488 	switch (event_type) {
7489 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7490 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7491 		extract_info_from_fw = true;
7492 		break;
7493 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7494 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7495 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7496 		params.num_memories = 90;
7497 		params.derr = false;
7498 		params.disable_clock_gating = true;
7499 		extract_info_from_fw = false;
7500 		break;
7501 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7502 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7503 		params.block_address =
7504 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7505 		params.num_memories = 90;
7506 		params.derr = true;
7507 		params.disable_clock_gating = true;
7508 		extract_info_from_fw = false;
7509 		break;
7510 	case GAUDI_EVENT_MME0_ACC_SERR:
7511 	case GAUDI_EVENT_MME1_ACC_SERR:
7512 	case GAUDI_EVENT_MME2_ACC_SERR:
7513 	case GAUDI_EVENT_MME3_ACC_SERR:
7514 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7515 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7516 		params.num_memories = 128;
7517 		params.derr = false;
7518 		params.disable_clock_gating = true;
7519 		extract_info_from_fw = false;
7520 		break;
7521 	case GAUDI_EVENT_MME0_ACC_DERR:
7522 	case GAUDI_EVENT_MME1_ACC_DERR:
7523 	case GAUDI_EVENT_MME2_ACC_DERR:
7524 	case GAUDI_EVENT_MME3_ACC_DERR:
7525 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7526 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7527 		params.num_memories = 128;
7528 		params.derr = true;
7529 		params.disable_clock_gating = true;
7530 		extract_info_from_fw = false;
7531 		break;
7532 	case GAUDI_EVENT_MME0_SBAB_SERR:
7533 	case GAUDI_EVENT_MME1_SBAB_SERR:
7534 	case GAUDI_EVENT_MME2_SBAB_SERR:
7535 	case GAUDI_EVENT_MME3_SBAB_SERR:
7536 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7537 		params.block_address =
7538 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7539 		params.num_memories = 33;
7540 		params.derr = false;
7541 		params.disable_clock_gating = true;
7542 		extract_info_from_fw = false;
7543 		break;
7544 	case GAUDI_EVENT_MME0_SBAB_DERR:
7545 	case GAUDI_EVENT_MME1_SBAB_DERR:
7546 	case GAUDI_EVENT_MME2_SBAB_DERR:
7547 	case GAUDI_EVENT_MME3_SBAB_DERR:
7548 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7549 		params.block_address =
7550 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7551 		params.num_memories = 33;
7552 		params.derr = true;
7553 		params.disable_clock_gating = true;
7554 		extract_info_from_fw = false;
7555 		break;
7556 	default:
7557 		return;
7558 	}
7559 
7560 extract_ecc_info:
7561 	if (extract_info_from_fw) {
7562 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7563 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7564 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7565 	} else {
7566 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7567 				&ecc_syndrom, &memory_wrapper_idx);
7568 		if (rc)
7569 			return;
7570 	}
7571 
7572 	dev_err(hdev->dev,
7573 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7574 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7575 }
7576 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)7577 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7578 {
7579 	u64 qman_base;
7580 	char desc[32];
7581 	u32 qid_base;
7582 	u8 index;
7583 
7584 	switch (event_type) {
7585 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7586 		index = event_type - GAUDI_EVENT_TPC0_QM;
7587 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7588 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7589 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7590 		break;
7591 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7592 		index = event_type - GAUDI_EVENT_MME0_QM;
7593 		qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7594 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7595 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7596 		break;
7597 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7598 		index = event_type - GAUDI_EVENT_DMA0_QM;
7599 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7600 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7601 		if (index > 1)
7602 			qid_base++;
7603 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7604 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7605 		break;
7606 	case GAUDI_EVENT_NIC0_QM0:
7607 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7608 		qman_base = mmNIC0_QM0_BASE;
7609 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7610 		break;
7611 	case GAUDI_EVENT_NIC0_QM1:
7612 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7613 		qman_base = mmNIC0_QM1_BASE;
7614 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7615 		break;
7616 	case GAUDI_EVENT_NIC1_QM0:
7617 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7618 		qman_base = mmNIC1_QM0_BASE;
7619 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7620 		break;
7621 	case GAUDI_EVENT_NIC1_QM1:
7622 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7623 		qman_base = mmNIC1_QM1_BASE;
7624 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7625 		break;
7626 	case GAUDI_EVENT_NIC2_QM0:
7627 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7628 		qman_base = mmNIC2_QM0_BASE;
7629 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7630 		break;
7631 	case GAUDI_EVENT_NIC2_QM1:
7632 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7633 		qman_base = mmNIC2_QM1_BASE;
7634 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7635 		break;
7636 	case GAUDI_EVENT_NIC3_QM0:
7637 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7638 		qman_base = mmNIC3_QM0_BASE;
7639 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7640 		break;
7641 	case GAUDI_EVENT_NIC3_QM1:
7642 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7643 		qman_base = mmNIC3_QM1_BASE;
7644 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7645 		break;
7646 	case GAUDI_EVENT_NIC4_QM0:
7647 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7648 		qman_base = mmNIC4_QM0_BASE;
7649 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7650 		break;
7651 	case GAUDI_EVENT_NIC4_QM1:
7652 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7653 		qman_base = mmNIC4_QM1_BASE;
7654 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7655 		break;
7656 	default:
7657 		return;
7658 	}
7659 
7660 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7661 }
7662 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7663 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7664 					bool razwi)
7665 {
7666 	char desc[64] = "";
7667 
7668 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7669 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7670 		event_type, desc);
7671 
7672 	if (razwi) {
7673 		gaudi_print_razwi_info(hdev);
7674 		gaudi_print_mmu_error_info(hdev);
7675 	}
7676 }
7677 
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7678 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7679 					struct cpucp_pkt_sync_err *sync_err)
7680 {
7681 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7682 
7683 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7684 			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7685 }
7686 
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7687 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7688 					struct hl_eq_fw_alive *fw_alive)
7689 {
7690 	dev_err(hdev->dev,
7691 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7692 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7693 		"Minor" : "Critical", fw_alive->process_id,
7694 		fw_alive->thread_id, fw_alive->uptime_seconds);
7695 }
7696 
gaudi_soft_reset_late_init(struct hl_device * hdev)7697 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7698 {
7699 	struct gaudi_device *gaudi = hdev->asic_specific;
7700 
7701 	/* Unmask all IRQs since some could have been received
7702 	 * during the soft reset
7703 	 */
7704 	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7705 }
7706 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7707 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7708 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7709 {
7710 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7711 	int rc = 0;
7712 
7713 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7714 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7715 		if (!hbm_ecc_data) {
7716 			dev_err(hdev->dev, "No FW ECC data");
7717 			return 0;
7718 		}
7719 
7720 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7721 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7722 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7723 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7725 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7727 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7729 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7731 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7733 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734 
7735 		dev_err(hdev->dev,
7736 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7737 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7738 		dev_err(hdev->dev,
7739 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7740 			device, ch, hbm_ecc_data->first_addr, type,
7741 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7742 			hbm_ecc_data->dec_cnt);
7743 		return 0;
7744 	}
7745 
7746 	if (hdev->asic_prop.fw_security_enabled) {
7747 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7748 		return 0;
7749 	}
7750 
7751 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7752 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7753 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7754 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7755 		if (val) {
7756 			rc = -EIO;
7757 			dev_err(hdev->dev,
7758 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7759 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7760 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7761 				(val >> 4) & 0x1);
7762 
7763 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7764 			dev_err(hdev->dev,
7765 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7766 				device, ch * 2,
7767 				RREG32(base + ch * 0x1000 + 0x064),
7768 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7769 				(val2 & 0xFF0000) >> 16,
7770 				(val2 & 0xFF000000) >> 24);
7771 		}
7772 
7773 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7774 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7775 		if (val) {
7776 			rc = -EIO;
7777 			dev_err(hdev->dev,
7778 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7779 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7780 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7781 				(val >> 4) & 0x1);
7782 
7783 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7784 			dev_err(hdev->dev,
7785 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7786 				device, ch * 2 + 1,
7787 				RREG32(base + ch * 0x1000 + 0x074),
7788 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7789 				(val2 & 0xFF0000) >> 16,
7790 				(val2 & 0xFF000000) >> 24);
7791 		}
7792 
7793 		/* Clear interrupts */
7794 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7795 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7796 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7797 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7798 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7799 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7800 	}
7801 
7802 	val  = RREG32(base + 0x8F30);
7803 	val2 = RREG32(base + 0x8F34);
7804 	if (val | val2) {
7805 		rc = -EIO;
7806 		dev_err(hdev->dev,
7807 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7808 			device, val, val2);
7809 	}
7810 	val  = RREG32(base + 0x8F40);
7811 	val2 = RREG32(base + 0x8F44);
7812 	if (val | val2) {
7813 		rc = -EIO;
7814 		dev_err(hdev->dev,
7815 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7816 			device, val, val2);
7817 	}
7818 
7819 	return rc;
7820 }
7821 
gaudi_hbm_event_to_dev(u16 hbm_event_type)7822 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7823 {
7824 	switch (hbm_event_type) {
7825 	case GAUDI_EVENT_HBM0_SPI_0:
7826 	case GAUDI_EVENT_HBM0_SPI_1:
7827 		return 0;
7828 	case GAUDI_EVENT_HBM1_SPI_0:
7829 	case GAUDI_EVENT_HBM1_SPI_1:
7830 		return 1;
7831 	case GAUDI_EVENT_HBM2_SPI_0:
7832 	case GAUDI_EVENT_HBM2_SPI_1:
7833 		return 2;
7834 	case GAUDI_EVENT_HBM3_SPI_0:
7835 	case GAUDI_EVENT_HBM3_SPI_1:
7836 		return 3;
7837 	default:
7838 		break;
7839 	}
7840 
7841 	/* Should never happen */
7842 	return 0;
7843 }
7844 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7845 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7846 					char *interrupt_name)
7847 {
7848 	struct gaudi_device *gaudi = hdev->asic_specific;
7849 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7850 	bool soft_reset_required = false;
7851 
7852 	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7853 	 * gating, and thus cannot be done in CPU-CP and should be done instead
7854 	 * by the driver.
7855 	 */
7856 
7857 	mutex_lock(&gaudi->clk_gate_mutex);
7858 
7859 	hdev->asic_funcs->disable_clock_gating(hdev);
7860 
7861 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7862 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7863 
7864 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7865 		if (tpc_interrupts_cause & BIT(i)) {
7866 			dev_err_ratelimited(hdev->dev,
7867 					"TPC%d_%s interrupt cause: %s\n",
7868 					tpc_id, interrupt_name,
7869 					gaudi_tpc_interrupts_cause[i]);
7870 			/* If this is QM error, we need to soft-reset */
7871 			if (i == 15)
7872 				soft_reset_required = true;
7873 		}
7874 
7875 	/* Clear interrupts */
7876 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7877 
7878 	hdev->asic_funcs->set_clock_gating(hdev);
7879 
7880 	mutex_unlock(&gaudi->clk_gate_mutex);
7881 
7882 	return soft_reset_required;
7883 }
7884 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7885 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7886 {
7887 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7888 }
7889 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7890 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7891 {
7892 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7893 }
7894 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7895 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7896 					u16 event_type)
7897 {
7898 	switch (event_type) {
7899 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7900 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7901 		dev_info_ratelimited(hdev->dev,
7902 			"Clock throttling due to power consumption\n");
7903 		break;
7904 
7905 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7906 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7907 		dev_info_ratelimited(hdev->dev,
7908 			"Power envelop is safe, back to optimal clock\n");
7909 		break;
7910 
7911 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7912 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7913 		dev_info_ratelimited(hdev->dev,
7914 			"Clock throttling due to overheating\n");
7915 		break;
7916 
7917 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7918 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7919 		dev_info_ratelimited(hdev->dev,
7920 			"Thermal envelop is safe, back to optimal clock\n");
7921 		break;
7922 
7923 	default:
7924 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7925 			event_type);
7926 		break;
7927 	}
7928 }
7929 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7930 static void gaudi_handle_eqe(struct hl_device *hdev,
7931 				struct hl_eq_entry *eq_entry)
7932 {
7933 	struct gaudi_device *gaudi = hdev->asic_specific;
7934 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7935 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7936 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7937 	bool reset_required;
7938 	u8 cause;
7939 	int rc;
7940 
7941 	if (event_type >= GAUDI_EVENT_SIZE) {
7942 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7943 				event_type, GAUDI_EVENT_SIZE - 1);
7944 		return;
7945 	}
7946 
7947 	gaudi->events_stat[event_type]++;
7948 	gaudi->events_stat_aggregate[event_type]++;
7949 
7950 	switch (event_type) {
7951 	case GAUDI_EVENT_PCIE_CORE_DERR:
7952 	case GAUDI_EVENT_PCIE_IF_DERR:
7953 	case GAUDI_EVENT_PCIE_PHY_DERR:
7954 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7955 	case GAUDI_EVENT_MME0_ACC_DERR:
7956 	case GAUDI_EVENT_MME0_SBAB_DERR:
7957 	case GAUDI_EVENT_MME1_ACC_DERR:
7958 	case GAUDI_EVENT_MME1_SBAB_DERR:
7959 	case GAUDI_EVENT_MME2_ACC_DERR:
7960 	case GAUDI_EVENT_MME2_SBAB_DERR:
7961 	case GAUDI_EVENT_MME3_ACC_DERR:
7962 	case GAUDI_EVENT_MME3_SBAB_DERR:
7963 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7964 		fallthrough;
7965 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7966 	case GAUDI_EVENT_PSOC_MEM_DERR:
7967 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7968 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7969 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7970 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7971 	case GAUDI_EVENT_MMU_DERR:
7972 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7973 		gaudi_print_irq_info(hdev, event_type, true);
7974 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7975 		goto reset_device;
7976 
7977 	case GAUDI_EVENT_GIC500:
7978 	case GAUDI_EVENT_AXI_ECC:
7979 	case GAUDI_EVENT_L2_RAM_ECC:
7980 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7981 		gaudi_print_irq_info(hdev, event_type, false);
7982 		goto reset_device;
7983 
7984 	case GAUDI_EVENT_HBM0_SPI_0:
7985 	case GAUDI_EVENT_HBM1_SPI_0:
7986 	case GAUDI_EVENT_HBM2_SPI_0:
7987 	case GAUDI_EVENT_HBM3_SPI_0:
7988 		gaudi_print_irq_info(hdev, event_type, false);
7989 		gaudi_hbm_read_interrupts(hdev,
7990 				gaudi_hbm_event_to_dev(event_type),
7991 				&eq_entry->hbm_ecc_data);
7992 		goto reset_device;
7993 
7994 	case GAUDI_EVENT_HBM0_SPI_1:
7995 	case GAUDI_EVENT_HBM1_SPI_1:
7996 	case GAUDI_EVENT_HBM2_SPI_1:
7997 	case GAUDI_EVENT_HBM3_SPI_1:
7998 		gaudi_print_irq_info(hdev, event_type, false);
7999 		gaudi_hbm_read_interrupts(hdev,
8000 				gaudi_hbm_event_to_dev(event_type),
8001 				&eq_entry->hbm_ecc_data);
8002 		hl_fw_unmask_irq(hdev, event_type);
8003 		break;
8004 
8005 	case GAUDI_EVENT_TPC0_DEC:
8006 	case GAUDI_EVENT_TPC1_DEC:
8007 	case GAUDI_EVENT_TPC2_DEC:
8008 	case GAUDI_EVENT_TPC3_DEC:
8009 	case GAUDI_EVENT_TPC4_DEC:
8010 	case GAUDI_EVENT_TPC5_DEC:
8011 	case GAUDI_EVENT_TPC6_DEC:
8012 	case GAUDI_EVENT_TPC7_DEC:
8013 		gaudi_print_irq_info(hdev, event_type, true);
8014 		reset_required = gaudi_tpc_read_interrupts(hdev,
8015 					tpc_dec_event_to_tpc_id(event_type),
8016 					"AXI_SLV_DEC_Error");
8017 		if (reset_required) {
8018 			dev_err(hdev->dev, "reset required due to %s\n",
8019 				gaudi_irq_map_table[event_type].name);
8020 
8021 			hl_device_reset(hdev, 0);
8022 		} else {
8023 			hl_fw_unmask_irq(hdev, event_type);
8024 		}
8025 		break;
8026 
8027 	case GAUDI_EVENT_TPC0_KRN_ERR:
8028 	case GAUDI_EVENT_TPC1_KRN_ERR:
8029 	case GAUDI_EVENT_TPC2_KRN_ERR:
8030 	case GAUDI_EVENT_TPC3_KRN_ERR:
8031 	case GAUDI_EVENT_TPC4_KRN_ERR:
8032 	case GAUDI_EVENT_TPC5_KRN_ERR:
8033 	case GAUDI_EVENT_TPC6_KRN_ERR:
8034 	case GAUDI_EVENT_TPC7_KRN_ERR:
8035 		gaudi_print_irq_info(hdev, event_type, true);
8036 		reset_required = gaudi_tpc_read_interrupts(hdev,
8037 					tpc_krn_event_to_tpc_id(event_type),
8038 					"KRN_ERR");
8039 		if (reset_required) {
8040 			dev_err(hdev->dev, "reset required due to %s\n",
8041 				gaudi_irq_map_table[event_type].name);
8042 
8043 			hl_device_reset(hdev, 0);
8044 		} else {
8045 			hl_fw_unmask_irq(hdev, event_type);
8046 		}
8047 		break;
8048 
8049 	case GAUDI_EVENT_PCIE_CORE_SERR:
8050 	case GAUDI_EVENT_PCIE_IF_SERR:
8051 	case GAUDI_EVENT_PCIE_PHY_SERR:
8052 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8053 	case GAUDI_EVENT_MME0_ACC_SERR:
8054 	case GAUDI_EVENT_MME0_SBAB_SERR:
8055 	case GAUDI_EVENT_MME1_ACC_SERR:
8056 	case GAUDI_EVENT_MME1_SBAB_SERR:
8057 	case GAUDI_EVENT_MME2_ACC_SERR:
8058 	case GAUDI_EVENT_MME2_SBAB_SERR:
8059 	case GAUDI_EVENT_MME3_ACC_SERR:
8060 	case GAUDI_EVENT_MME3_SBAB_SERR:
8061 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8062 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
8063 	case GAUDI_EVENT_PSOC_MEM_SERR:
8064 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8065 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8066 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8067 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8068 		fallthrough;
8069 	case GAUDI_EVENT_MMU_SERR:
8070 		gaudi_print_irq_info(hdev, event_type, true);
8071 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8072 		hl_fw_unmask_irq(hdev, event_type);
8073 		break;
8074 
8075 	case GAUDI_EVENT_PCIE_DEC:
8076 	case GAUDI_EVENT_MME0_WBC_RSP:
8077 	case GAUDI_EVENT_MME0_SBAB0_RSP:
8078 	case GAUDI_EVENT_MME1_WBC_RSP:
8079 	case GAUDI_EVENT_MME1_SBAB0_RSP:
8080 	case GAUDI_EVENT_MME2_WBC_RSP:
8081 	case GAUDI_EVENT_MME2_SBAB0_RSP:
8082 	case GAUDI_EVENT_MME3_WBC_RSP:
8083 	case GAUDI_EVENT_MME3_SBAB0_RSP:
8084 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
8085 	case GAUDI_EVENT_PSOC_AXI_DEC:
8086 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
8087 	case GAUDI_EVENT_MMU_PAGE_FAULT:
8088 	case GAUDI_EVENT_MMU_WR_PERM:
8089 	case GAUDI_EVENT_RAZWI_OR_ADC:
8090 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8091 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8092 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8093 		fallthrough;
8094 	case GAUDI_EVENT_NIC0_QM0:
8095 	case GAUDI_EVENT_NIC0_QM1:
8096 	case GAUDI_EVENT_NIC1_QM0:
8097 	case GAUDI_EVENT_NIC1_QM1:
8098 	case GAUDI_EVENT_NIC2_QM0:
8099 	case GAUDI_EVENT_NIC2_QM1:
8100 	case GAUDI_EVENT_NIC3_QM0:
8101 	case GAUDI_EVENT_NIC3_QM1:
8102 	case GAUDI_EVENT_NIC4_QM0:
8103 	case GAUDI_EVENT_NIC4_QM1:
8104 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8105 		gaudi_print_irq_info(hdev, event_type, true);
8106 		gaudi_handle_qman_err(hdev, event_type);
8107 		hl_fw_unmask_irq(hdev, event_type);
8108 		break;
8109 
8110 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8111 		gaudi_print_irq_info(hdev, event_type, true);
8112 		goto reset_device;
8113 
8114 	case GAUDI_EVENT_TPC0_BMON_SPMU:
8115 	case GAUDI_EVENT_TPC1_BMON_SPMU:
8116 	case GAUDI_EVENT_TPC2_BMON_SPMU:
8117 	case GAUDI_EVENT_TPC3_BMON_SPMU:
8118 	case GAUDI_EVENT_TPC4_BMON_SPMU:
8119 	case GAUDI_EVENT_TPC5_BMON_SPMU:
8120 	case GAUDI_EVENT_TPC6_BMON_SPMU:
8121 	case GAUDI_EVENT_TPC7_BMON_SPMU:
8122 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8123 		gaudi_print_irq_info(hdev, event_type, false);
8124 		hl_fw_unmask_irq(hdev, event_type);
8125 		break;
8126 
8127 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8128 		gaudi_print_irq_info(hdev, event_type, false);
8129 		gaudi_print_sm_sei_info(hdev, event_type,
8130 					&eq_entry->sm_sei_data);
8131 		rc = hl_state_dump(hdev);
8132 		if (rc)
8133 			dev_err(hdev->dev,
8134 				"Error during system state dump %d\n", rc);
8135 		hl_fw_unmask_irq(hdev, event_type);
8136 		break;
8137 
8138 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8139 		gaudi_print_clk_change_info(hdev, event_type);
8140 		hl_fw_unmask_irq(hdev, event_type);
8141 		break;
8142 
8143 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
8144 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8145 		dev_err(hdev->dev,
8146 			"Received high temp H/W interrupt %d (cause %d)\n",
8147 			event_type, cause);
8148 		break;
8149 
8150 	case GAUDI_EVENT_DEV_RESET_REQ:
8151 		gaudi_print_irq_info(hdev, event_type, false);
8152 		goto reset_device;
8153 
8154 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8155 		gaudi_print_irq_info(hdev, event_type, false);
8156 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8157 		goto reset_device;
8158 
8159 	case GAUDI_EVENT_FW_ALIVE_S:
8160 		gaudi_print_irq_info(hdev, event_type, false);
8161 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8162 		goto reset_device;
8163 
8164 	default:
8165 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8166 				event_type);
8167 		break;
8168 	}
8169 
8170 	return;
8171 
8172 reset_device:
8173 	if (hdev->asic_prop.fw_security_enabled)
8174 		hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
8175 	else if (hdev->hard_reset_on_fw_events)
8176 		hl_device_reset(hdev, HL_RESET_HARD);
8177 	else
8178 		hl_fw_unmask_irq(hdev, event_type);
8179 }
8180 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)8181 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8182 					u32 *size)
8183 {
8184 	struct gaudi_device *gaudi = hdev->asic_specific;
8185 
8186 	if (aggregate) {
8187 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
8188 		return gaudi->events_stat_aggregate;
8189 	}
8190 
8191 	*size = (u32) sizeof(gaudi->events_stat);
8192 	return gaudi->events_stat;
8193 }
8194 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)8195 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8196 					u32 flags)
8197 {
8198 	struct gaudi_device *gaudi = hdev->asic_specific;
8199 	u32 status, timeout_usec;
8200 	int rc;
8201 
8202 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8203 		hdev->hard_reset_pending)
8204 		return 0;
8205 
8206 	if (hdev->pldm)
8207 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8208 	else
8209 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8210 
8211 	/* L0 & L1 invalidation */
8212 	WREG32(mmSTLB_INV_PS, 3);
8213 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8214 	WREG32(mmSTLB_INV_PS, 2);
8215 
8216 	rc = hl_poll_timeout(
8217 		hdev,
8218 		mmSTLB_INV_PS,
8219 		status,
8220 		!status,
8221 		1000,
8222 		timeout_usec);
8223 
8224 	WREG32(mmSTLB_INV_SET, 0);
8225 
8226 	if (rc) {
8227 		dev_err_ratelimited(hdev->dev,
8228 					"MMU cache invalidation timeout\n");
8229 		hl_device_reset(hdev, HL_RESET_HARD);
8230 	}
8231 
8232 	return rc;
8233 }
8234 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)8235 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8236 						bool is_hard, u32 flags,
8237 						u32 asid, u64 va, u64 size)
8238 {
8239 	/* Treat as invalidate all because there is no range invalidation
8240 	 * in Gaudi
8241 	 */
8242 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8243 }
8244 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)8245 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8246 					u32 asid, u64 phys_addr)
8247 {
8248 	u32 status, timeout_usec;
8249 	int rc;
8250 
8251 	if (hdev->pldm)
8252 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8253 	else
8254 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8255 
8256 	WREG32(MMU_ASID, asid);
8257 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8258 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8259 	WREG32(MMU_BUSY, 0x80000000);
8260 
8261 	rc = hl_poll_timeout(
8262 		hdev,
8263 		MMU_BUSY,
8264 		status,
8265 		!(status & 0x80000000),
8266 		1000,
8267 		timeout_usec);
8268 
8269 	if (rc) {
8270 		dev_err(hdev->dev,
8271 			"Timeout during MMU hop0 config of asid %d\n", asid);
8272 		return rc;
8273 	}
8274 
8275 	return 0;
8276 }
8277 
gaudi_send_heartbeat(struct hl_device * hdev)8278 static int gaudi_send_heartbeat(struct hl_device *hdev)
8279 {
8280 	struct gaudi_device *gaudi = hdev->asic_specific;
8281 
8282 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8283 		return 0;
8284 
8285 	return hl_fw_send_heartbeat(hdev);
8286 }
8287 
gaudi_cpucp_info_get(struct hl_device * hdev)8288 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8289 {
8290 	struct gaudi_device *gaudi = hdev->asic_specific;
8291 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8292 	int rc;
8293 
8294 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8295 		return 0;
8296 
8297 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8298 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8299 					mmCPU_BOOT_ERR1);
8300 	if (rc)
8301 		return rc;
8302 
8303 	if (!strlen(prop->cpucp_info.card_name))
8304 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8305 				CARD_NAME_MAX_LEN);
8306 
8307 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8308 
8309 	set_default_power_values(hdev);
8310 
8311 	hdev->max_power = prop->max_power_default;
8312 
8313 	return 0;
8314 }
8315 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct seq_file * s)8316 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8317 					u8 mask_len, struct seq_file *s)
8318 {
8319 	struct gaudi_device *gaudi = hdev->asic_specific;
8320 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8321 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8322 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8323 	unsigned long *mask = (unsigned long *)mask_arr;
8324 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8325 	bool is_idle = true, is_eng_idle, is_slave;
8326 	u64 offset;
8327 	int i, dma_id, port;
8328 
8329 	mutex_lock(&gaudi->clk_gate_mutex);
8330 
8331 	hdev->asic_funcs->disable_clock_gating(hdev);
8332 
8333 	if (s)
8334 		seq_puts(s,
8335 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8336 			"---  -------  ------------  ----------  -------------\n");
8337 
8338 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8339 		dma_id = gaudi_dma_assignment[i];
8340 		offset = dma_id * DMA_QMAN_OFFSET;
8341 
8342 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8343 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8344 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8345 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8346 				IS_DMA_IDLE(dma_core_sts0);
8347 		is_idle &= is_eng_idle;
8348 
8349 		if (mask && !is_eng_idle)
8350 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8351 		if (s)
8352 			seq_printf(s, fmt, dma_id,
8353 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8354 				qm_cgm_sts, dma_core_sts0);
8355 	}
8356 
8357 	if (s)
8358 		seq_puts(s,
8359 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8360 			"---  -------  ------------  ----------  ----------\n");
8361 
8362 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8363 		offset = i * TPC_QMAN_OFFSET;
8364 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8365 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8366 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8367 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8368 				IS_TPC_IDLE(tpc_cfg_sts);
8369 		is_idle &= is_eng_idle;
8370 
8371 		if (mask && !is_eng_idle)
8372 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8373 		if (s)
8374 			seq_printf(s, fmt, i,
8375 				is_eng_idle ? "Y" : "N",
8376 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8377 	}
8378 
8379 	if (s)
8380 		seq_puts(s,
8381 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8382 			"---  -------  ------------  ----------  -----------\n");
8383 
8384 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8385 		offset = i * MME_QMAN_OFFSET;
8386 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8387 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8388 
8389 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8390 		is_slave = i % 2;
8391 		if (!is_slave) {
8392 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8393 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8394 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8395 		}
8396 
8397 		is_idle &= is_eng_idle;
8398 
8399 		if (mask && !is_eng_idle)
8400 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8401 		if (s) {
8402 			if (!is_slave)
8403 				seq_printf(s, fmt, i,
8404 					is_eng_idle ? "Y" : "N",
8405 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8406 			else
8407 				seq_printf(s, mme_slave_fmt, i,
8408 					is_eng_idle ? "Y" : "N", "-",
8409 					"-", mme_arch_sts);
8410 		}
8411 	}
8412 
8413 	if (s)
8414 		seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8415 				"---  -------  ------------  ----------\n");
8416 
8417 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8418 		offset = i * NIC_MACRO_QMAN_OFFSET;
8419 		port = 2 * i;
8420 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8421 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8422 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8423 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8424 			is_idle &= is_eng_idle;
8425 
8426 			if (mask && !is_eng_idle)
8427 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8428 			if (s)
8429 				seq_printf(s, nic_fmt, port,
8430 						is_eng_idle ? "Y" : "N",
8431 						qm_glbl_sts0, qm_cgm_sts);
8432 		}
8433 
8434 		port = 2 * i + 1;
8435 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8436 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8437 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8438 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8439 			is_idle &= is_eng_idle;
8440 
8441 			if (mask && !is_eng_idle)
8442 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8443 			if (s)
8444 				seq_printf(s, nic_fmt, port,
8445 						is_eng_idle ? "Y" : "N",
8446 						qm_glbl_sts0, qm_cgm_sts);
8447 		}
8448 	}
8449 
8450 	if (s)
8451 		seq_puts(s, "\n");
8452 
8453 	hdev->asic_funcs->set_clock_gating(hdev);
8454 
8455 	mutex_unlock(&gaudi->clk_gate_mutex);
8456 
8457 	return is_idle;
8458 }
8459 
gaudi_hw_queues_lock(struct hl_device * hdev)8460 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8461 	__acquires(&gaudi->hw_queues_lock)
8462 {
8463 	struct gaudi_device *gaudi = hdev->asic_specific;
8464 
8465 	spin_lock(&gaudi->hw_queues_lock);
8466 }
8467 
gaudi_hw_queues_unlock(struct hl_device * hdev)8468 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8469 	__releases(&gaudi->hw_queues_lock)
8470 {
8471 	struct gaudi_device *gaudi = hdev->asic_specific;
8472 
8473 	spin_unlock(&gaudi->hw_queues_lock);
8474 }
8475 
gaudi_get_pci_id(struct hl_device * hdev)8476 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8477 {
8478 	return hdev->pdev->device;
8479 }
8480 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8481 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8482 				size_t max_size)
8483 {
8484 	struct gaudi_device *gaudi = hdev->asic_specific;
8485 
8486 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8487 		return 0;
8488 
8489 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8490 }
8491 
8492 /*
8493  * this function should be used only during initialization and/or after reset,
8494  * when there are no active users.
8495  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8496 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8497 				u32 tpc_id)
8498 {
8499 	struct gaudi_device *gaudi = hdev->asic_specific;
8500 	u64 kernel_timeout;
8501 	u32 status, offset;
8502 	int rc;
8503 
8504 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8505 
8506 	if (hdev->pldm)
8507 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8508 	else
8509 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8510 
8511 	mutex_lock(&gaudi->clk_gate_mutex);
8512 
8513 	hdev->asic_funcs->disable_clock_gating(hdev);
8514 
8515 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8516 			lower_32_bits(tpc_kernel));
8517 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8518 			upper_32_bits(tpc_kernel));
8519 
8520 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8521 			lower_32_bits(tpc_kernel));
8522 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8523 			upper_32_bits(tpc_kernel));
8524 	/* set a valid LUT pointer, content is of no significance */
8525 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8526 			lower_32_bits(tpc_kernel));
8527 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8528 			upper_32_bits(tpc_kernel));
8529 
8530 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8531 			lower_32_bits(CFG_BASE +
8532 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8533 
8534 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8535 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8536 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8537 	/* wait a bit for the engine to start executing */
8538 	usleep_range(1000, 1500);
8539 
8540 	/* wait until engine has finished executing */
8541 	rc = hl_poll_timeout(
8542 		hdev,
8543 		mmTPC0_CFG_STATUS + offset,
8544 		status,
8545 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8546 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8547 		1000,
8548 		kernel_timeout);
8549 
8550 	if (rc) {
8551 		dev_err(hdev->dev,
8552 			"Timeout while waiting for TPC%d icache prefetch\n",
8553 			tpc_id);
8554 		hdev->asic_funcs->set_clock_gating(hdev);
8555 		mutex_unlock(&gaudi->clk_gate_mutex);
8556 		return -EIO;
8557 	}
8558 
8559 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8560 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8561 
8562 	/* wait a bit for the engine to start executing */
8563 	usleep_range(1000, 1500);
8564 
8565 	/* wait until engine has finished executing */
8566 	rc = hl_poll_timeout(
8567 		hdev,
8568 		mmTPC0_CFG_STATUS + offset,
8569 		status,
8570 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8571 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8572 		1000,
8573 		kernel_timeout);
8574 
8575 	if (rc) {
8576 		dev_err(hdev->dev,
8577 			"Timeout while waiting for TPC%d vector pipe\n",
8578 			tpc_id);
8579 		hdev->asic_funcs->set_clock_gating(hdev);
8580 		mutex_unlock(&gaudi->clk_gate_mutex);
8581 		return -EIO;
8582 	}
8583 
8584 	rc = hl_poll_timeout(
8585 		hdev,
8586 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8587 		status,
8588 		(status == 0),
8589 		1000,
8590 		kernel_timeout);
8591 
8592 	hdev->asic_funcs->set_clock_gating(hdev);
8593 	mutex_unlock(&gaudi->clk_gate_mutex);
8594 
8595 	if (rc) {
8596 		dev_err(hdev->dev,
8597 			"Timeout while waiting for TPC%d kernel to execute\n",
8598 			tpc_id);
8599 		return -EIO;
8600 	}
8601 
8602 	return 0;
8603 }
8604 
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8605 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8606 		struct hl_ctx *ctx)
8607 {
8608 	struct gaudi_device *gaudi = hdev->asic_specific;
8609 	int min_alloc_order, rc, collective_cb_size;
8610 
8611 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8612 		return 0;
8613 
8614 	hdev->internal_cb_pool_virt_addr =
8615 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8616 					HOST_SPACE_INTERNAL_CB_SZ,
8617 					&hdev->internal_cb_pool_dma_addr,
8618 					GFP_KERNEL | __GFP_ZERO);
8619 
8620 	if (!hdev->internal_cb_pool_virt_addr)
8621 		return -ENOMEM;
8622 
8623 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8624 			sizeof(struct packet_fence);
8625 	min_alloc_order = ilog2(collective_cb_size);
8626 
8627 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8628 	if (!hdev->internal_cb_pool) {
8629 		dev_err(hdev->dev,
8630 			"Failed to create internal CB pool\n");
8631 		rc = -ENOMEM;
8632 		goto free_internal_cb_pool;
8633 	}
8634 
8635 	rc = gen_pool_add(hdev->internal_cb_pool,
8636 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8637 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8638 	if (rc) {
8639 		dev_err(hdev->dev,
8640 			"Failed to add memory to internal CB pool\n");
8641 		rc = -EFAULT;
8642 		goto destroy_internal_cb_pool;
8643 	}
8644 
8645 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8646 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8647 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8648 
8649 	if (!hdev->internal_cb_va_base) {
8650 		rc = -ENOMEM;
8651 		goto destroy_internal_cb_pool;
8652 	}
8653 
8654 	mutex_lock(&ctx->mmu_lock);
8655 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8656 			hdev->internal_cb_pool_dma_addr,
8657 			HOST_SPACE_INTERNAL_CB_SZ);
8658 
8659 	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8660 	mutex_unlock(&ctx->mmu_lock);
8661 
8662 	if (rc)
8663 		goto unreserve_internal_cb_pool;
8664 
8665 	return 0;
8666 
8667 unreserve_internal_cb_pool:
8668 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8669 			HOST_SPACE_INTERNAL_CB_SZ);
8670 destroy_internal_cb_pool:
8671 	gen_pool_destroy(hdev->internal_cb_pool);
8672 free_internal_cb_pool:
8673 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
8674 			HOST_SPACE_INTERNAL_CB_SZ,
8675 			hdev->internal_cb_pool_virt_addr,
8676 			hdev->internal_cb_pool_dma_addr);
8677 
8678 	return rc;
8679 }
8680 
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8681 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8682 		struct hl_ctx *ctx)
8683 {
8684 	struct gaudi_device *gaudi = hdev->asic_specific;
8685 
8686 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8687 		return;
8688 
8689 	mutex_lock(&ctx->mmu_lock);
8690 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8691 			HOST_SPACE_INTERNAL_CB_SZ);
8692 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8693 			HOST_SPACE_INTERNAL_CB_SZ);
8694 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8695 	mutex_unlock(&ctx->mmu_lock);
8696 
8697 	gen_pool_destroy(hdev->internal_cb_pool);
8698 
8699 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
8700 			HOST_SPACE_INTERNAL_CB_SZ,
8701 			hdev->internal_cb_pool_virt_addr,
8702 			hdev->internal_cb_pool_dma_addr);
8703 }
8704 
gaudi_ctx_init(struct hl_ctx * ctx)8705 static int gaudi_ctx_init(struct hl_ctx *ctx)
8706 {
8707 	int rc;
8708 
8709 	if (ctx->asid == HL_KERNEL_ASID_ID)
8710 		return 0;
8711 
8712 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8713 	if (rc)
8714 		return rc;
8715 
8716 	rc = gaudi_restore_user_registers(ctx->hdev);
8717 	if (rc)
8718 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8719 
8720 	return rc;
8721 }
8722 
gaudi_ctx_fini(struct hl_ctx * ctx)8723 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8724 {
8725 	if (ctx->asid == HL_KERNEL_ASID_ID)
8726 		return;
8727 
8728 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8729 }
8730 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8731 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8732 {
8733 	return gaudi_cq_assignment[cq_idx];
8734 }
8735 
gaudi_get_signal_cb_size(struct hl_device * hdev)8736 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8737 {
8738 	return sizeof(struct packet_msg_short) +
8739 			sizeof(struct packet_msg_prot) * 2;
8740 }
8741 
gaudi_get_wait_cb_size(struct hl_device * hdev)8742 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8743 {
8744 	return sizeof(struct packet_msg_short) * 4 +
8745 			sizeof(struct packet_fence) +
8746 			sizeof(struct packet_msg_prot) * 2;
8747 }
8748 
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8749 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8750 {
8751 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8752 }
8753 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8754 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8755 				u32 size, bool eb)
8756 {
8757 	struct hl_cb *cb = (struct hl_cb *) data;
8758 	struct packet_msg_short *pkt;
8759 	u32 value, ctl, pkt_size = sizeof(*pkt);
8760 
8761 	pkt = cb->kernel_address + size;
8762 	memset(pkt, 0, pkt_size);
8763 
8764 	/* Inc by 1, Mode ADD */
8765 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8766 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8767 
8768 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8769 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8770 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8771 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8772 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8773 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8774 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8775 
8776 	pkt->value = cpu_to_le32(value);
8777 	pkt->ctl = cpu_to_le32(ctl);
8778 
8779 	return size + pkt_size;
8780 }
8781 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8782 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8783 					u16 addr)
8784 {
8785 	u32 ctl, pkt_size = sizeof(*pkt);
8786 
8787 	memset(pkt, 0, pkt_size);
8788 
8789 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8790 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8791 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8792 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8793 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8794 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8795 
8796 	pkt->value = cpu_to_le32(value);
8797 	pkt->ctl = cpu_to_le32(ctl);
8798 
8799 	return pkt_size;
8800 }
8801 
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8802 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8803 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8804 		u16 sob_val, u16 mon_id)
8805 {
8806 	u64 monitor_base;
8807 	u32 ctl, value, pkt_size = sizeof(*pkt);
8808 	u16 msg_addr_offset;
8809 	u8 mask;
8810 
8811 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8812 		dev_err(hdev->dev,
8813 			"sob_base %u (mask %#x) is not valid\n",
8814 			sob_base, sob_mask);
8815 		return 0;
8816 	}
8817 
8818 	/*
8819 	 * monitor_base should be the content of the base0 address registers,
8820 	 * so it will be added to the msg short offsets
8821 	 */
8822 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8823 
8824 	msg_addr_offset =
8825 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8826 				monitor_base;
8827 
8828 	memset(pkt, 0, pkt_size);
8829 
8830 	/* Monitor config packet: bind the monitor to a sync object */
8831 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8832 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8833 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8834 			0); /* GREATER OR EQUAL*/
8835 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8836 
8837 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8838 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8839 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8840 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8841 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8842 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8843 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8844 
8845 	pkt->value = cpu_to_le32(value);
8846 	pkt->ctl = cpu_to_le32(ctl);
8847 
8848 	return pkt_size;
8849 }
8850 
gaudi_add_fence_pkt(struct packet_fence * pkt)8851 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8852 {
8853 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8854 
8855 	memset(pkt, 0, pkt_size);
8856 
8857 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8858 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8859 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8860 
8861 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8862 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8863 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8864 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8865 
8866 	pkt->cfg = cpu_to_le32(cfg);
8867 	pkt->ctl = cpu_to_le32(ctl);
8868 
8869 	return pkt_size;
8870 }
8871 
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8872 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8873 {
8874 	u32 offset, nic_index;
8875 
8876 	switch (queue_id) {
8877 	case GAUDI_QUEUE_ID_DMA_0_0:
8878 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8879 		break;
8880 	case GAUDI_QUEUE_ID_DMA_0_1:
8881 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8882 		break;
8883 	case GAUDI_QUEUE_ID_DMA_0_2:
8884 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8885 		break;
8886 	case GAUDI_QUEUE_ID_DMA_0_3:
8887 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8888 		break;
8889 	case GAUDI_QUEUE_ID_DMA_1_0:
8890 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8891 		break;
8892 	case GAUDI_QUEUE_ID_DMA_1_1:
8893 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8894 		break;
8895 	case GAUDI_QUEUE_ID_DMA_1_2:
8896 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8897 		break;
8898 	case GAUDI_QUEUE_ID_DMA_1_3:
8899 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8900 		break;
8901 	case GAUDI_QUEUE_ID_DMA_5_0:
8902 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8903 		break;
8904 	case GAUDI_QUEUE_ID_DMA_5_1:
8905 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8906 		break;
8907 	case GAUDI_QUEUE_ID_DMA_5_2:
8908 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8909 		break;
8910 	case GAUDI_QUEUE_ID_DMA_5_3:
8911 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8912 		break;
8913 	case GAUDI_QUEUE_ID_TPC_7_0:
8914 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8915 		break;
8916 	case GAUDI_QUEUE_ID_TPC_7_1:
8917 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8918 		break;
8919 	case GAUDI_QUEUE_ID_TPC_7_2:
8920 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8921 		break;
8922 	case GAUDI_QUEUE_ID_TPC_7_3:
8923 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8924 		break;
8925 	case GAUDI_QUEUE_ID_NIC_0_0:
8926 	case GAUDI_QUEUE_ID_NIC_1_0:
8927 	case GAUDI_QUEUE_ID_NIC_2_0:
8928 	case GAUDI_QUEUE_ID_NIC_3_0:
8929 	case GAUDI_QUEUE_ID_NIC_4_0:
8930 	case GAUDI_QUEUE_ID_NIC_5_0:
8931 	case GAUDI_QUEUE_ID_NIC_6_0:
8932 	case GAUDI_QUEUE_ID_NIC_7_0:
8933 	case GAUDI_QUEUE_ID_NIC_8_0:
8934 	case GAUDI_QUEUE_ID_NIC_9_0:
8935 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8936 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8937 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8938 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8939 		break;
8940 	case GAUDI_QUEUE_ID_NIC_0_1:
8941 	case GAUDI_QUEUE_ID_NIC_1_1:
8942 	case GAUDI_QUEUE_ID_NIC_2_1:
8943 	case GAUDI_QUEUE_ID_NIC_3_1:
8944 	case GAUDI_QUEUE_ID_NIC_4_1:
8945 	case GAUDI_QUEUE_ID_NIC_5_1:
8946 	case GAUDI_QUEUE_ID_NIC_6_1:
8947 	case GAUDI_QUEUE_ID_NIC_7_1:
8948 	case GAUDI_QUEUE_ID_NIC_8_1:
8949 	case GAUDI_QUEUE_ID_NIC_9_1:
8950 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8951 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8952 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8953 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8954 		break;
8955 	case GAUDI_QUEUE_ID_NIC_0_2:
8956 	case GAUDI_QUEUE_ID_NIC_1_2:
8957 	case GAUDI_QUEUE_ID_NIC_2_2:
8958 	case GAUDI_QUEUE_ID_NIC_3_2:
8959 	case GAUDI_QUEUE_ID_NIC_4_2:
8960 	case GAUDI_QUEUE_ID_NIC_5_2:
8961 	case GAUDI_QUEUE_ID_NIC_6_2:
8962 	case GAUDI_QUEUE_ID_NIC_7_2:
8963 	case GAUDI_QUEUE_ID_NIC_8_2:
8964 	case GAUDI_QUEUE_ID_NIC_9_2:
8965 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8966 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8967 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8968 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8969 		break;
8970 	case GAUDI_QUEUE_ID_NIC_0_3:
8971 	case GAUDI_QUEUE_ID_NIC_1_3:
8972 	case GAUDI_QUEUE_ID_NIC_2_3:
8973 	case GAUDI_QUEUE_ID_NIC_3_3:
8974 	case GAUDI_QUEUE_ID_NIC_4_3:
8975 	case GAUDI_QUEUE_ID_NIC_5_3:
8976 	case GAUDI_QUEUE_ID_NIC_6_3:
8977 	case GAUDI_QUEUE_ID_NIC_7_3:
8978 	case GAUDI_QUEUE_ID_NIC_8_3:
8979 	case GAUDI_QUEUE_ID_NIC_9_3:
8980 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8981 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8982 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8983 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8984 		break;
8985 	default:
8986 		return -EINVAL;
8987 	}
8988 
8989 	*addr = CFG_BASE + offset;
8990 
8991 	return 0;
8992 }
8993 
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8994 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8995 {
8996 	u64 monitor_base;
8997 	u32 size = 0;
8998 	u16 msg_addr_offset;
8999 
9000 	/*
9001 	 * monitor_base should be the content of the base0 address registers,
9002 	 * so it will be added to the msg short offsets
9003 	 */
9004 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9005 
9006 	/* First monitor config packet: low address of the sync */
9007 	msg_addr_offset =
9008 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9009 				monitor_base;
9010 
9011 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9012 					msg_addr_offset);
9013 
9014 	/* Second monitor config packet: high address of the sync */
9015 	msg_addr_offset =
9016 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9017 				monitor_base;
9018 
9019 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9020 					msg_addr_offset);
9021 
9022 	/*
9023 	 * Third monitor config packet: the payload, i.e. what to write when the
9024 	 * sync triggers
9025 	 */
9026 	msg_addr_offset =
9027 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9028 				monitor_base;
9029 
9030 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9031 
9032 	return size;
9033 }
9034 
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)9035 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9036 				struct hl_gen_wait_properties *prop)
9037 {
9038 	struct hl_cb *cb = (struct hl_cb *) prop->data;
9039 	void *buf = cb->kernel_address;
9040 	u64 fence_addr = 0;
9041 	u32 size = prop->size;
9042 
9043 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9044 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9045 				prop->q_idx);
9046 		return 0;
9047 	}
9048 
9049 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9050 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9051 			prop->sob_mask, prop->sob_val, prop->mon_id);
9052 	size += gaudi_add_fence_pkt(buf + size);
9053 
9054 	return size;
9055 }
9056 
gaudi_reset_sob(struct hl_device * hdev,void * data)9057 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9058 {
9059 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9060 
9061 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9062 		hw_sob->sob_id);
9063 
9064 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9065 			hw_sob->sob_id * 4, 0);
9066 
9067 	kref_init(&hw_sob->kref);
9068 }
9069 
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)9070 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9071 {
9072 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9073 							HL_POWER9_HOST_MAGIC) {
9074 		hdev->power9_64bit_dma_enable = 1;
9075 		hdev->dma_mask = 64;
9076 	} else {
9077 		hdev->power9_64bit_dma_enable = 0;
9078 		hdev->dma_mask = 48;
9079 	}
9080 }
9081 
gaudi_get_device_time(struct hl_device * hdev)9082 static u64 gaudi_get_device_time(struct hl_device *hdev)
9083 {
9084 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9085 
9086 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9087 }
9088 
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)9089 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9090 				u32 *block_size, u32 *block_id)
9091 {
9092 	return -EPERM;
9093 }
9094 
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)9095 static int gaudi_block_mmap(struct hl_device *hdev,
9096 				struct vm_area_struct *vma,
9097 				u32 block_id, u32 block_size)
9098 {
9099 	return -EPERM;
9100 }
9101 
gaudi_enable_events_from_fw(struct hl_device * hdev)9102 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9103 {
9104 	struct cpu_dyn_regs *dyn_regs =
9105 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9106 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9107 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9108 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
9109 
9110 	WREG32(irq_handler_offset,
9111 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9112 }
9113 
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)9114 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9115 {
9116 	switch (pll_idx) {
9117 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
9118 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
9119 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
9120 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
9121 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
9122 	case HL_GAUDI_MME_PLL: return MME_PLL;
9123 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
9124 	case HL_GAUDI_IF_PLL: return IF_PLL;
9125 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9126 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
9127 	default: return -EINVAL;
9128 	}
9129 }
9130 
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)9131 static int gaudi_add_sync_to_engine_map_entry(
9132 	struct hl_sync_to_engine_map *map, u32 reg_value,
9133 	enum hl_sync_engine_type engine_type, u32 engine_id)
9134 {
9135 	struct hl_sync_to_engine_map_entry *entry;
9136 
9137 	/* Reg value represents a partial address of sync object,
9138 	 * it is used as unique identifier. For this we need to
9139 	 * clear the cutoff cfg base bits from the value.
9140 	 */
9141 	if (reg_value == 0 || reg_value == 0xffffffff)
9142 		return 0;
9143 	reg_value -= (u32)CFG_BASE;
9144 
9145 	/* create a new hash entry */
9146 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9147 	if (!entry)
9148 		return -ENOMEM;
9149 	entry->engine_type = engine_type;
9150 	entry->engine_id = engine_id;
9151 	entry->sync_id = reg_value;
9152 	hash_add(map->tb, &entry->node, reg_value);
9153 
9154 	return 0;
9155 }
9156 
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)9157 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9158 				struct hl_sync_to_engine_map *map)
9159 {
9160 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9161 	struct gaudi_device *gaudi = hdev->asic_specific;
9162 	int i, j, rc;
9163 	u32 reg_value;
9164 
9165 	/* Iterate over TPC engines */
9166 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9167 		/* TPC registered must be accessed with clock gating disabled */
9168 		mutex_lock(&gaudi->clk_gate_mutex);
9169 		hdev->asic_funcs->disable_clock_gating(hdev);
9170 
9171 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9172 					sds->props[SP_NEXT_TPC] * i);
9173 
9174 		/* We can reenable clock_gating */
9175 		hdev->asic_funcs->set_clock_gating(hdev);
9176 		mutex_unlock(&gaudi->clk_gate_mutex);
9177 
9178 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9179 							ENGINE_TPC, i);
9180 		if (rc)
9181 			goto free_sync_to_engine_map;
9182 	}
9183 
9184 	/* Iterate over MME engines */
9185 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9186 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9187 			/* MME registered must be accessed with clock gating
9188 			 * disabled
9189 			 */
9190 			mutex_lock(&gaudi->clk_gate_mutex);
9191 			hdev->asic_funcs->disable_clock_gating(hdev);
9192 
9193 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9194 						sds->props[SP_NEXT_MME] * i +
9195 						j * sizeof(u32));
9196 
9197 			/* We can reenable clock_gating */
9198 			hdev->asic_funcs->set_clock_gating(hdev);
9199 			mutex_unlock(&gaudi->clk_gate_mutex);
9200 
9201 			rc = gaudi_add_sync_to_engine_map_entry(
9202 				map, reg_value, ENGINE_MME,
9203 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9204 			if (rc)
9205 				goto free_sync_to_engine_map;
9206 		}
9207 	}
9208 
9209 	/* Iterate over DMA engines */
9210 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9211 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9212 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
9213 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9214 							ENGINE_DMA, i);
9215 		if (rc)
9216 			goto free_sync_to_engine_map;
9217 	}
9218 
9219 	return 0;
9220 
9221 free_sync_to_engine_map:
9222 	hl_state_dump_free_sync_to_engine_map(map);
9223 
9224 	return rc;
9225 }
9226 
gaudi_monitor_valid(struct hl_mon_state_dump * mon)9227 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9228 {
9229 	return FIELD_GET(
9230 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9231 		mon->status);
9232 }
9233 
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)9234 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9235 {
9236 	const size_t max_write = 10;
9237 	u32 gid, mask, sob;
9238 	int i, offset;
9239 
9240 	/* Sync object ID is calculated as follows:
9241 	 * (8 * group_id + cleared bits in mask)
9242 	 */
9243 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9244 			mon->arm_data);
9245 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9246 			mon->arm_data);
9247 
9248 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9249 		max_write; mask >>= 1, i++) {
9250 		if (!(mask & 1)) {
9251 			sob = gid * MONITOR_MAX_SOBS + i;
9252 
9253 			if (offset > 0)
9254 				offset += snprintf(sobs + offset, max_write,
9255 							", ");
9256 
9257 			offset += snprintf(sobs + offset, max_write, "%u", sob);
9258 		}
9259 	}
9260 }
9261 
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)9262 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9263 				struct hl_device *hdev,
9264 				struct hl_mon_state_dump *mon)
9265 {
9266 	const char *name;
9267 	char scratch_buf1[BIN_REG_STRING_SIZE],
9268 		scratch_buf2[BIN_REG_STRING_SIZE];
9269 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9270 
9271 	name = hl_state_dump_get_monitor_name(hdev, mon);
9272 	if (!name)
9273 		name = "";
9274 
9275 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9276 
9277 	return hl_snprintf_resize(
9278 		buf, size, offset,
9279 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9280 		mon->id, name,
9281 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9282 				mon->arm_data),
9283 		hl_format_as_binary(
9284 			scratch_buf1, sizeof(scratch_buf1),
9285 			FIELD_GET(
9286 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9287 				mon->arm_data)),
9288 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9289 				mon->arm_data),
9290 		mon->wr_data,
9291 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9292 		hl_format_as_binary(
9293 			scratch_buf2, sizeof(scratch_buf2),
9294 			FIELD_GET(
9295 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9296 				mon->status)),
9297 		monitored_sobs);
9298 }
9299 
9300 
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)9301 static int gaudi_print_fences_single_engine(
9302 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9303 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9304 	size_t *size, size_t *offset)
9305 {
9306 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9307 	int rc = -ENOMEM, i;
9308 	u32 *statuses, *fences;
9309 
9310 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9311 			sizeof(*statuses), GFP_KERNEL);
9312 	if (!statuses)
9313 		goto out;
9314 
9315 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9316 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9317 			 sizeof(*fences), GFP_KERNEL);
9318 	if (!fences)
9319 		goto free_status;
9320 
9321 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9322 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9323 
9324 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9325 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9326 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9327 
9328 	/* The actual print */
9329 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9330 		u32 fence_id;
9331 		u64 fence_cnt, fence_rdata;
9332 		const char *engine_name;
9333 
9334 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9335 			statuses[i]))
9336 			continue;
9337 
9338 		fence_id =
9339 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9340 		fence_cnt = base_offset + CFG_BASE +
9341 			sizeof(u32) *
9342 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9343 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9344 				sds->props[SP_FENCE0_RDATA_OFFSET];
9345 		engine_name = hl_sync_engine_to_string(engine_type);
9346 
9347 		rc = hl_snprintf_resize(
9348 			buf, size, offset,
9349 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9350 			engine_name, engine_id,
9351 			i, fence_id,
9352 			fence_cnt, engine_name, engine_id, fence_id, i,
9353 			fence_rdata, engine_name, engine_id, fence_id, i,
9354 			fences[fence_id],
9355 			statuses[i]);
9356 		if (rc)
9357 			goto free_fences;
9358 	}
9359 
9360 	rc = 0;
9361 
9362 free_fences:
9363 	kfree(fences);
9364 free_status:
9365 	kfree(statuses);
9366 out:
9367 	return rc;
9368 }
9369 
9370 
9371 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9372 	.monitor_valid = gaudi_monitor_valid,
9373 	.print_single_monitor = gaudi_print_single_monitor,
9374 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9375 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9376 };
9377 
gaudi_state_dump_init(struct hl_device * hdev)9378 static void gaudi_state_dump_init(struct hl_device *hdev)
9379 {
9380 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9381 	int i;
9382 
9383 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9384 		hash_add(sds->so_id_to_str_tb,
9385 			&gaudi_so_id_to_str[i].node,
9386 			gaudi_so_id_to_str[i].id);
9387 
9388 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9389 		hash_add(sds->monitor_id_to_str_tb,
9390 			&gaudi_monitor_id_to_str[i].node,
9391 			gaudi_monitor_id_to_str[i].id);
9392 
9393 	sds->props = gaudi_state_dump_specs_props;
9394 
9395 	sds->sync_namager_names = gaudi_sync_manager_names;
9396 
9397 	sds->funcs = gaudi_state_dump_funcs;
9398 }
9399 
gaudi_get_stream_master_qid_arr(void)9400 static u32 *gaudi_get_stream_master_qid_arr(void)
9401 {
9402 	return gaudi_stream_master;
9403 }
9404 
9405 static const struct hl_asic_funcs gaudi_funcs = {
9406 	.early_init = gaudi_early_init,
9407 	.early_fini = gaudi_early_fini,
9408 	.late_init = gaudi_late_init,
9409 	.late_fini = gaudi_late_fini,
9410 	.sw_init = gaudi_sw_init,
9411 	.sw_fini = gaudi_sw_fini,
9412 	.hw_init = gaudi_hw_init,
9413 	.hw_fini = gaudi_hw_fini,
9414 	.halt_engines = gaudi_halt_engines,
9415 	.suspend = gaudi_suspend,
9416 	.resume = gaudi_resume,
9417 	.mmap = gaudi_mmap,
9418 	.ring_doorbell = gaudi_ring_doorbell,
9419 	.pqe_write = gaudi_pqe_write,
9420 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9421 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9422 	.scrub_device_mem = gaudi_scrub_device_mem,
9423 	.get_int_queue_base = gaudi_get_int_queue_base,
9424 	.test_queues = gaudi_test_queues,
9425 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9426 	.asic_dma_pool_free = gaudi_dma_pool_free,
9427 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9428 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9429 	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9430 	.cs_parser = gaudi_cs_parser,
9431 	.asic_dma_map_sg = gaudi_dma_map_sg,
9432 	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9433 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9434 	.update_eq_ci = gaudi_update_eq_ci,
9435 	.context_switch = gaudi_context_switch,
9436 	.restore_phase_topology = gaudi_restore_phase_topology,
9437 	.debugfs_read32 = gaudi_debugfs_read32,
9438 	.debugfs_write32 = gaudi_debugfs_write32,
9439 	.debugfs_read64 = gaudi_debugfs_read64,
9440 	.debugfs_write64 = gaudi_debugfs_write64,
9441 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9442 	.add_device_attr = gaudi_add_device_attr,
9443 	.handle_eqe = gaudi_handle_eqe,
9444 	.set_pll_profile = gaudi_set_pll_profile,
9445 	.get_events_stat = gaudi_get_events_stat,
9446 	.read_pte = gaudi_read_pte,
9447 	.write_pte = gaudi_write_pte,
9448 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9449 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9450 	.send_heartbeat = gaudi_send_heartbeat,
9451 	.set_clock_gating = gaudi_set_clock_gating,
9452 	.disable_clock_gating = gaudi_disable_clock_gating,
9453 	.debug_coresight = gaudi_debug_coresight,
9454 	.is_device_idle = gaudi_is_device_idle,
9455 	.soft_reset_late_init = gaudi_soft_reset_late_init,
9456 	.hw_queues_lock = gaudi_hw_queues_lock,
9457 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9458 	.get_pci_id = gaudi_get_pci_id,
9459 	.get_eeprom_data = gaudi_get_eeprom_data,
9460 	.send_cpu_message = gaudi_send_cpu_message,
9461 	.pci_bars_map = gaudi_pci_bars_map,
9462 	.init_iatu = gaudi_init_iatu,
9463 	.rreg = hl_rreg,
9464 	.wreg = hl_wreg,
9465 	.halt_coresight = gaudi_halt_coresight,
9466 	.ctx_init = gaudi_ctx_init,
9467 	.ctx_fini = gaudi_ctx_fini,
9468 	.get_clk_rate = gaudi_get_clk_rate,
9469 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9470 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9471 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9472 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9473 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9474 	.gen_signal_cb = gaudi_gen_signal_cb,
9475 	.gen_wait_cb = gaudi_gen_wait_cb,
9476 	.reset_sob = gaudi_reset_sob,
9477 	.reset_sob_group = gaudi_reset_sob_group,
9478 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9479 	.get_device_time = gaudi_get_device_time,
9480 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9481 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9482 	.scramble_addr = hl_mmu_scramble_addr,
9483 	.descramble_addr = hl_mmu_descramble_addr,
9484 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9485 	.get_hw_block_id = gaudi_get_hw_block_id,
9486 	.hw_block_mmap = gaudi_block_mmap,
9487 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9488 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9489 	.init_firmware_loader = gaudi_init_firmware_loader,
9490 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9491 	.state_dump_init = gaudi_state_dump_init,
9492 	.get_sob_addr = gaudi_get_sob_addr,
9493 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9494 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9495 };
9496 
9497 /**
9498  * gaudi_set_asic_funcs - set GAUDI function pointers
9499  *
9500  * @hdev: pointer to hl_device structure
9501  *
9502  */
gaudi_set_asic_funcs(struct hl_device * hdev)9503 void gaudi_set_asic_funcs(struct hl_device *hdev)
9504 {
9505 	hdev->asic_funcs = &gaudi_funcs;
9506 }
9507