1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
24 
25 /*
26  * Gaudi security scheme:
27  *
28  * 1. Host is protected by:
29  *        - Range registers
30  *        - MMU
31  *
32  * 2. DDR is protected by:
33  *        - Range registers (protect the first 512MB)
34  *
35  * 3. Configuration is protected by:
36  *        - Range registers
37  *        - Protection bits
38  *
39  * MMU is always enabled.
40  *
41  * QMAN DMA channels 0,1,5 (PCI DMAN):
42  *     - DMA is not secured.
43  *     - PQ and CQ are secured.
44  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
45  *                      because of TDMA (tensor DMA). Hence, WREG is always not
46  *                      secured.
47  *
48  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49  * channel 0 to be secured, execute the DMA and change it back to not secured.
50  * Currently, the driver doesn't use the DMA while there are compute jobs
51  * running.
52  *
53  * The current use cases for the driver to use the DMA are:
54  *     - Clear SRAM on context switch (happens on context switch when device is
55  *       idle)
56  *     - MMU page tables area clear (happens on init)
57  *
58  * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60  * CQ, CP and the engine are not secured
61  *
62  */
63 
64 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
65 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
66 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
67 
68 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
69 
70 #define GAUDI_RESET_TIMEOUT_MSEC	1000		/* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
74 
75 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
83 
84 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
85 
86 #define GAUDI_MAX_STRING_LEN		20
87 
88 #define GAUDI_CB_POOL_CB_CNT		512
89 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
90 
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
92 
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
94 
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
96 
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
98 
99 #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
100 
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
102 		BIT(GAUDI_ENGINE_ID_MME_0) |\
103 		BIT(GAUDI_ENGINE_ID_MME_2) |\
104 		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 		"gaudi cpu eq"
111 };
112 
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122 };
123 
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
126 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
127 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
128 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
129 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
130 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
131 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
132 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
133 	[8] = GAUDI_QUEUE_ID_DMA_5_0,
134 	[9] = GAUDI_QUEUE_ID_DMA_5_1,
135 	[10] = GAUDI_QUEUE_ID_DMA_5_2,
136 	[11] = GAUDI_QUEUE_ID_DMA_5_3
137 };
138 
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
141 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
142 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
143 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
144 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
145 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
146 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
147 	[PACKET_FENCE]		= sizeof(struct packet_fence),
148 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
149 	[PACKET_NOP]		= sizeof(struct packet_nop),
150 	[PACKET_STOP]		= sizeof(struct packet_stop),
151 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
152 	[PACKET_WAIT]		= sizeof(struct packet_wait),
153 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
154 };
155 
validate_packet_id(enum packet_id id)156 static inline bool validate_packet_id(enum packet_id id)
157 {
158 	switch (id) {
159 	case PACKET_WREG_32:
160 	case PACKET_WREG_BULK:
161 	case PACKET_MSG_LONG:
162 	case PACKET_MSG_SHORT:
163 	case PACKET_CP_DMA:
164 	case PACKET_REPEAT:
165 	case PACKET_MSG_PROT:
166 	case PACKET_FENCE:
167 	case PACKET_LIN_DMA:
168 	case PACKET_NOP:
169 	case PACKET_STOP:
170 	case PACKET_ARB_POINT:
171 	case PACKET_WAIT:
172 	case PACKET_LOAD_AND_EXE:
173 		return true;
174 	default:
175 		return false;
176 	}
177 }
178 
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 	"tpc_address_exceed_slm",
182 	"tpc_div_by_0",
183 	"tpc_spu_mac_overflow",
184 	"tpc_spu_addsub_overflow",
185 	"tpc_spu_abs_overflow",
186 	"tpc_spu_fp_dst_nan_inf",
187 	"tpc_spu_fp_dst_denorm",
188 	"tpc_vpu_mac_overflow",
189 	"tpc_vpu_addsub_overflow",
190 	"tpc_vpu_abs_overflow",
191 	"tpc_vpu_fp_dst_nan_inf",
192 	"tpc_vpu_fp_dst_denorm",
193 	"tpc_assertions",
194 	"tpc_illegal_instruction",
195 	"tpc_pc_wrap_around",
196 	"tpc_qm_sw_err",
197 	"tpc_hbw_rresp_err",
198 	"tpc_hbw_bresp_err",
199 	"tpc_lbw_rresp_err",
200 	"tpc_lbw_bresp_err"
201 };
202 
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 	"PQ AXI HBW error",
206 	"CQ AXI HBW error",
207 	"CP AXI HBW error",
208 	"CP error due to undefined OPCODE",
209 	"CP encountered STOP OPCODE",
210 	"CP AXI LBW error",
211 	"CP WRREG32 or WRBULK returned error",
212 	"N/A",
213 	"FENCE 0 inc over max value and clipped",
214 	"FENCE 1 inc over max value and clipped",
215 	"FENCE 2 inc over max value and clipped",
216 	"FENCE 3 inc over max value and clipped",
217 	"FENCE 0 dec under min value and clipped",
218 	"FENCE 1 dec under min value and clipped",
219 	"FENCE 2 dec under min value and clipped",
220 	"FENCE 3 dec under min value and clipped"
221 };
222 
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 	"Choice push while full error",
226 	"Choice Q watchdog error",
227 	"MSG AXI LBW returned with error"
228 };
229 
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
305 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
306 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
307 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
308 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
309 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
310 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
311 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
312 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
313 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
314 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
315 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
316 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
317 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
318 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
319 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
320 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
321 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
322 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
323 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
324 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
325 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
326 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
327 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
328 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
329 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
330 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
331 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
332 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
333 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
334 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
335 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
336 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
337 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
338 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
339 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
340 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
341 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
342 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
343 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345 
346 struct ecc_info_extract_params {
347 	u64 block_address;
348 	u32 num_memories;
349 	bool derr;
350 	bool disable_clock_gating;
351 };
352 
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354 								u64 phys_addr);
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 					struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358 					u32 size, u64 val);
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360 				u32 tpc_id);
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365 
gaudi_get_fixed_properties(struct hl_device * hdev)366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
367 {
368 	struct asic_fixed_properties *prop = &hdev->asic_prop;
369 	u32 num_sync_stream_queues = 0;
370 	int i;
371 
372 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 	prop->hw_queues_props = kcalloc(prop->max_queues,
374 			sizeof(struct hw_queue_properties),
375 			GFP_KERNEL);
376 
377 	if (!prop->hw_queues_props)
378 		return -ENOMEM;
379 
380 	for (i = 0 ; i < prop->max_queues ; i++) {
381 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 			prop->hw_queues_props[i].driver_only = 0;
384 			prop->hw_queues_props[i].requires_kernel_cb = 1;
385 			prop->hw_queues_props[i].supports_sync_stream = 1;
386 			num_sync_stream_queues++;
387 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 			prop->hw_queues_props[i].driver_only = 1;
390 			prop->hw_queues_props[i].requires_kernel_cb = 0;
391 			prop->hw_queues_props[i].supports_sync_stream = 0;
392 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 			prop->hw_queues_props[i].driver_only = 0;
395 			prop->hw_queues_props[i].requires_kernel_cb = 0;
396 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 			prop->hw_queues_props[i].driver_only = 0;
399 			prop->hw_queues_props[i].requires_kernel_cb = 0;
400 			prop->hw_queues_props[i].supports_sync_stream = 0;
401 		}
402 	}
403 
404 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405 	prop->sync_stream_first_sob = 0;
406 	prop->sync_stream_first_mon = 0;
407 	prop->dram_base_address = DRAM_PHYS_BASE;
408 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 	prop->dram_end_address = prop->dram_base_address +
410 					prop->dram_size;
411 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412 
413 	prop->sram_base_address = SRAM_BASE_ADDR;
414 	prop->sram_size = SRAM_SIZE;
415 	prop->sram_end_address = prop->sram_base_address +
416 					prop->sram_size;
417 	prop->sram_user_base_address = prop->sram_base_address +
418 					SRAM_USER_BASE_OFFSET;
419 
420 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421 	if (hdev->pldm)
422 		prop->mmu_pgt_size = 0x800000; /* 8MB */
423 	else
424 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 	prop->mmu_pte_size = HL_PTE_SIZE;
426 	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 	prop->dram_page_size = PAGE_SIZE_2MB;
429 
430 	prop->pmmu.hop0_shift = HOP0_SHIFT;
431 	prop->pmmu.hop1_shift = HOP1_SHIFT;
432 	prop->pmmu.hop2_shift = HOP2_SHIFT;
433 	prop->pmmu.hop3_shift = HOP3_SHIFT;
434 	prop->pmmu.hop4_shift = HOP4_SHIFT;
435 	prop->pmmu.hop0_mask = HOP0_MASK;
436 	prop->pmmu.hop1_mask = HOP1_MASK;
437 	prop->pmmu.hop2_mask = HOP2_MASK;
438 	prop->pmmu.hop3_mask = HOP3_MASK;
439 	prop->pmmu.hop4_mask = HOP4_MASK;
440 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 	prop->pmmu.end_addr =
442 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 	prop->pmmu.page_size = PAGE_SIZE_4KB;
444 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445 
446 	/* PMMU and HPMMU are the same except of page size */
447 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449 
450 	/* shifts and masks are the same in PMMU and DMMU */
451 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 	prop->dmmu.page_size = PAGE_SIZE_2MB;
455 
456 	prop->cfg_size = CFG_SIZE;
457 	prop->max_asid = MAX_ASID;
458 	prop->num_of_events = GAUDI_EVENT_SIZE;
459 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460 
461 	prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462 
463 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465 
466 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468 
469 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470 					CARD_NAME_MAX_LEN);
471 
472 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473 
474 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 			num_sync_stream_queues * HL_RSVD_SOBS;
476 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 			num_sync_stream_queues * HL_RSVD_MONS;
478 
479 	return 0;
480 }
481 
gaudi_pci_bars_map(struct hl_device * hdev)482 static int gaudi_pci_bars_map(struct hl_device *hdev)
483 {
484 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 	bool is_wc[3] = {false, false, true};
486 	int rc;
487 
488 	rc = hl_pci_bars_map(hdev, name, is_wc);
489 	if (rc)
490 		return rc;
491 
492 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
494 
495 	return 0;
496 }
497 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499 {
500 	struct gaudi_device *gaudi = hdev->asic_specific;
501 	struct hl_inbound_pci_region pci_region;
502 	u64 old_addr = addr;
503 	int rc;
504 
505 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506 		return old_addr;
507 
508 	/* Inbound Region 2 - Bar 4 - Point to HBM */
509 	pci_region.mode = PCI_BAR_MATCH_MODE;
510 	pci_region.bar = HBM_BAR_ID;
511 	pci_region.addr = addr;
512 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513 	if (rc)
514 		return U64_MAX;
515 
516 	if (gaudi) {
517 		old_addr = gaudi->hbm_bar_cur_addr;
518 		gaudi->hbm_bar_cur_addr = addr;
519 	}
520 
521 	return old_addr;
522 }
523 
gaudi_init_iatu(struct hl_device * hdev)524 static int gaudi_init_iatu(struct hl_device *hdev)
525 {
526 	struct hl_inbound_pci_region inbound_region;
527 	struct hl_outbound_pci_region outbound_region;
528 	int rc;
529 
530 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 	inbound_region.mode = PCI_BAR_MATCH_MODE;
532 	inbound_region.bar = SRAM_BAR_ID;
533 	inbound_region.addr = SRAM_BASE_ADDR;
534 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535 	if (rc)
536 		goto done;
537 
538 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539 	inbound_region.mode = PCI_BAR_MATCH_MODE;
540 	inbound_region.bar = CFG_BAR_ID;
541 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543 	if (rc)
544 		goto done;
545 
546 	/* Inbound Region 2 - Bar 4 - Point to HBM */
547 	inbound_region.mode = PCI_BAR_MATCH_MODE;
548 	inbound_region.bar = HBM_BAR_ID;
549 	inbound_region.addr = DRAM_PHYS_BASE;
550 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551 	if (rc)
552 		goto done;
553 
554 	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555 
556 	/* Outbound Region 0 - Point to Host */
557 	outbound_region.addr = HOST_PHYS_BASE;
558 	outbound_region.size = HOST_PHYS_SIZE;
559 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560 
561 done:
562 	return rc;
563 }
564 
gaudi_early_init(struct hl_device * hdev)565 static int gaudi_early_init(struct hl_device *hdev)
566 {
567 	struct asic_fixed_properties *prop = &hdev->asic_prop;
568 	struct pci_dev *pdev = hdev->pdev;
569 	int rc;
570 
571 	rc = gaudi_get_fixed_properties(hdev);
572 	if (rc) {
573 		dev_err(hdev->dev, "Failed to get fixed properties\n");
574 		return rc;
575 	}
576 
577 	/* Check BAR sizes */
578 	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579 		dev_err(hdev->dev,
580 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581 			SRAM_BAR_ID,
582 			(unsigned long long) pci_resource_len(pdev,
583 							SRAM_BAR_ID),
584 			SRAM_BAR_SIZE);
585 		rc = -ENODEV;
586 		goto free_queue_props;
587 	}
588 
589 	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590 		dev_err(hdev->dev,
591 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592 			CFG_BAR_ID,
593 			(unsigned long long) pci_resource_len(pdev,
594 								CFG_BAR_ID),
595 			CFG_BAR_SIZE);
596 		rc = -ENODEV;
597 		goto free_queue_props;
598 	}
599 
600 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601 
602 	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603 			mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604 	if (rc)
605 		goto free_queue_props;
606 
607 	/* GAUDI Firmware does not yet support security */
608 	prop->fw_security_disabled = true;
609 	dev_info(hdev->dev, "firmware-level security is disabled\n");
610 
611 	return 0;
612 
613 free_queue_props:
614 	kfree(hdev->asic_prop.hw_queues_props);
615 	return rc;
616 }
617 
gaudi_early_fini(struct hl_device * hdev)618 static int gaudi_early_fini(struct hl_device *hdev)
619 {
620 	kfree(hdev->asic_prop.hw_queues_props);
621 	hl_pci_fini(hdev);
622 
623 	return 0;
624 }
625 
626 /**
627  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628  *
629  * @hdev: pointer to hl_device structure
630  *
631  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)632 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633 {
634 	struct asic_fixed_properties *prop = &hdev->asic_prop;
635 	u32 trace_freq = 0;
636 	u32 pll_clk = 0;
637 	u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638 	u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639 	u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640 	u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641 	u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642 
643 	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644 		if (div_sel == DIV_SEL_REF_CLK)
645 			trace_freq = PLL_REF_CLK;
646 		else
647 			trace_freq = PLL_REF_CLK / (div_fctr + 1);
648 	} else if (div_sel == DIV_SEL_PLL_CLK ||
649 					div_sel == DIV_SEL_DIVIDED_PLL) {
650 		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651 		if (div_sel == DIV_SEL_PLL_CLK)
652 			trace_freq = pll_clk;
653 		else
654 			trace_freq = pll_clk / (div_fctr + 1);
655 	} else {
656 		dev_warn(hdev->dev,
657 			"Received invalid div select value: %d", div_sel);
658 	}
659 
660 	prop->psoc_timestamp_frequency = trace_freq;
661 	prop->psoc_pci_pll_nr = nr;
662 	prop->psoc_pci_pll_nf = nf;
663 	prop->psoc_pci_pll_od = od;
664 	prop->psoc_pci_pll_div_factor = div_fctr;
665 }
666 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)667 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669 {
670 	struct asic_fixed_properties *prop = &hdev->asic_prop;
671 	struct packet_lin_dma *init_tpc_mem_pkt;
672 	struct hl_cs_job *job;
673 	struct hl_cb *cb;
674 	u64 dst_addr;
675 	u32 cb_size, ctl;
676 	u8 tpc_id;
677 	int rc;
678 
679 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680 	if (!cb)
681 		return -EFAULT;
682 
683 	init_tpc_mem_pkt = cb->kernel_address;
684 	cb_size = sizeof(*init_tpc_mem_pkt);
685 	memset(init_tpc_mem_pkt, 0, cb_size);
686 
687 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688 
689 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693 
694 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695 
696 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697 	dst_addr = (prop->sram_user_base_address &
698 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701 
702 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703 	if (!job) {
704 		dev_err(hdev->dev, "Failed to allocate a new job\n");
705 		rc = -ENOMEM;
706 		goto release_cb;
707 	}
708 
709 	job->id = 0;
710 	job->user_cb = cb;
711 	job->user_cb->cs_cnt++;
712 	job->user_cb_size = cb_size;
713 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714 	job->patched_cb = job->user_cb;
715 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716 
717 	hl_debugfs_add_job(hdev, job);
718 
719 	rc = gaudi_send_job_on_qman0(hdev, job);
720 
721 	if (rc)
722 		goto free_job;
723 
724 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726 		if (rc)
727 			break;
728 	}
729 
730 free_job:
731 	hl_userptr_delete_list(hdev, &job->userptr_list);
732 	hl_debugfs_remove_job(hdev, job);
733 	kfree(job);
734 	cb->cs_cnt--;
735 
736 release_cb:
737 	hl_cb_put(cb);
738 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739 
740 	return rc;
741 }
742 
743 /*
744  * gaudi_init_tpc_mem() - Initialize TPC memories.
745  * @hdev: Pointer to hl_device structure.
746  *
747  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748  *
749  * Return: 0 for success, negative value for error.
750  */
gaudi_init_tpc_mem(struct hl_device * hdev)751 static int gaudi_init_tpc_mem(struct hl_device *hdev)
752 {
753 	const struct firmware *fw;
754 	size_t fw_size;
755 	void *cpu_addr;
756 	dma_addr_t dma_handle;
757 	int rc;
758 
759 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
760 	if (rc) {
761 		dev_err(hdev->dev, "Firmware file %s is not found!\n",
762 				GAUDI_TPC_FW_FILE);
763 		goto out;
764 	}
765 
766 	fw_size = fw->size;
767 	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
768 			&dma_handle, GFP_KERNEL | __GFP_ZERO);
769 	if (!cpu_addr) {
770 		dev_err(hdev->dev,
771 			"Failed to allocate %zu of dma memory for TPC kernel\n",
772 			fw_size);
773 		rc = -ENOMEM;
774 		goto out;
775 	}
776 
777 	memcpy(cpu_addr, fw->data, fw_size);
778 
779 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
780 
781 	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
782 			dma_handle);
783 
784 out:
785 	release_firmware(fw);
786 	return rc;
787 }
788 
gaudi_late_init(struct hl_device * hdev)789 static int gaudi_late_init(struct hl_device *hdev)
790 {
791 	struct gaudi_device *gaudi = hdev->asic_specific;
792 	int rc;
793 
794 	rc = gaudi->cpucp_info_get(hdev);
795 	if (rc) {
796 		dev_err(hdev->dev, "Failed to get cpucp info\n");
797 		return rc;
798 	}
799 
800 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
801 	if (rc) {
802 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
803 		return rc;
804 	}
805 
806 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
807 
808 	gaudi_fetch_psoc_frequency(hdev);
809 
810 	rc = gaudi_mmu_clear_pgt_range(hdev);
811 	if (rc) {
812 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
813 		goto disable_pci_access;
814 	}
815 
816 	rc = gaudi_init_tpc_mem(hdev);
817 	if (rc) {
818 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
819 		goto disable_pci_access;
820 	}
821 
822 	return 0;
823 
824 disable_pci_access:
825 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
826 
827 	return rc;
828 }
829 
gaudi_late_fini(struct hl_device * hdev)830 static void gaudi_late_fini(struct hl_device *hdev)
831 {
832 	const struct hwmon_channel_info **channel_info_arr;
833 	int i = 0;
834 
835 	if (!hdev->hl_chip_info->info)
836 		return;
837 
838 	channel_info_arr = hdev->hl_chip_info->info;
839 
840 	while (channel_info_arr[i]) {
841 		kfree(channel_info_arr[i]->config);
842 		kfree(channel_info_arr[i]);
843 		i++;
844 	}
845 
846 	kfree(channel_info_arr);
847 
848 	hdev->hl_chip_info->info = NULL;
849 }
850 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)851 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
852 {
853 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
854 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
855 	int i, j, rc = 0;
856 
857 	/*
858 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
859 	 * to '1' when accessing the host.
860 	 * Bits 49:39 of the full host address are saved for a later
861 	 * configuration of the HW to perform extension to 50 bits.
862 	 * Because there is a single HW register that holds the extension bits,
863 	 * these bits must be identical in all allocated range.
864 	 */
865 
866 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
867 		virt_addr_arr[i] =
868 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
869 						HL_CPU_ACCESSIBLE_MEM_SIZE,
870 						&dma_addr_arr[i],
871 						GFP_KERNEL | __GFP_ZERO);
872 		if (!virt_addr_arr[i]) {
873 			rc = -ENOMEM;
874 			goto free_dma_mem_arr;
875 		}
876 
877 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
878 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
879 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
880 			break;
881 	}
882 
883 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
884 		dev_err(hdev->dev,
885 			"MSB of CPU accessible DMA memory are not identical in all range\n");
886 		rc = -EFAULT;
887 		goto free_dma_mem_arr;
888 	}
889 
890 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
891 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
892 	hdev->cpu_pci_msb_addr =
893 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
894 
895 	GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
896 
897 free_dma_mem_arr:
898 	for (j = 0 ; j < i ; j++)
899 		hdev->asic_funcs->asic_dma_free_coherent(hdev,
900 						HL_CPU_ACCESSIBLE_MEM_SIZE,
901 						virt_addr_arr[j],
902 						dma_addr_arr[j]);
903 
904 	return rc;
905 }
906 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)907 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
908 {
909 	struct gaudi_device *gaudi = hdev->asic_specific;
910 	struct gaudi_internal_qman_info *q;
911 	u32 i;
912 
913 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
914 		q = &gaudi->internal_qmans[i];
915 		if (!q->pq_kernel_addr)
916 			continue;
917 		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
918 							q->pq_kernel_addr,
919 							q->pq_dma_addr);
920 	}
921 }
922 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)923 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
924 {
925 	struct gaudi_device *gaudi = hdev->asic_specific;
926 	struct gaudi_internal_qman_info *q;
927 	int rc, i;
928 
929 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
930 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
931 			continue;
932 
933 		q = &gaudi->internal_qmans[i];
934 
935 		switch (i) {
936 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
937 		case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
938 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
939 			break;
940 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
941 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
942 			break;
943 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
944 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
945 			break;
946 		default:
947 			dev_err(hdev->dev, "Bad internal queue index %d", i);
948 			rc = -EINVAL;
949 			goto free_internal_qmans_pq_mem;
950 		}
951 
952 		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
953 						hdev, q->pq_size,
954 						&q->pq_dma_addr,
955 						GFP_KERNEL | __GFP_ZERO);
956 		if (!q->pq_kernel_addr) {
957 			rc = -ENOMEM;
958 			goto free_internal_qmans_pq_mem;
959 		}
960 	}
961 
962 	return 0;
963 
964 free_internal_qmans_pq_mem:
965 	gaudi_free_internal_qmans_pq_mem(hdev);
966 	return rc;
967 }
968 
gaudi_sw_init(struct hl_device * hdev)969 static int gaudi_sw_init(struct hl_device *hdev)
970 {
971 	struct gaudi_device *gaudi;
972 	u32 i, event_id = 0;
973 	int rc;
974 
975 	/* Allocate device structure */
976 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
977 	if (!gaudi)
978 		return -ENOMEM;
979 
980 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
981 		if (gaudi_irq_map_table[i].valid) {
982 			if (event_id == GAUDI_EVENT_SIZE) {
983 				dev_err(hdev->dev,
984 					"Event array exceeds the limit of %u events\n",
985 					GAUDI_EVENT_SIZE);
986 				rc = -EINVAL;
987 				goto free_gaudi_device;
988 			}
989 
990 			gaudi->events[event_id++] =
991 					gaudi_irq_map_table[i].fc_id;
992 		}
993 	}
994 
995 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
996 
997 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
998 
999 	hdev->asic_specific = gaudi;
1000 
1001 	/* Create DMA pool for small allocations */
1002 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1003 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1004 	if (!hdev->dma_pool) {
1005 		dev_err(hdev->dev, "failed to create DMA pool\n");
1006 		rc = -ENOMEM;
1007 		goto free_gaudi_device;
1008 	}
1009 
1010 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1011 	if (rc)
1012 		goto free_dma_pool;
1013 
1014 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1015 	if (!hdev->cpu_accessible_dma_pool) {
1016 		dev_err(hdev->dev,
1017 			"Failed to create CPU accessible DMA pool\n");
1018 		rc = -ENOMEM;
1019 		goto free_cpu_dma_mem;
1020 	}
1021 
1022 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1023 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1024 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1025 	if (rc) {
1026 		dev_err(hdev->dev,
1027 			"Failed to add memory to CPU accessible DMA pool\n");
1028 		rc = -EFAULT;
1029 		goto free_cpu_accessible_dma_pool;
1030 	}
1031 
1032 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1033 	if (rc)
1034 		goto free_cpu_accessible_dma_pool;
1035 
1036 	spin_lock_init(&gaudi->hw_queues_lock);
1037 	mutex_init(&gaudi->clk_gate_mutex);
1038 
1039 	hdev->supports_sync_stream = true;
1040 	hdev->supports_coresight = true;
1041 
1042 	return 0;
1043 
1044 free_cpu_accessible_dma_pool:
1045 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1046 free_cpu_dma_mem:
1047 	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1048 				hdev->cpu_pci_msb_addr);
1049 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1050 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1051 			hdev->cpu_accessible_dma_mem,
1052 			hdev->cpu_accessible_dma_address);
1053 free_dma_pool:
1054 	dma_pool_destroy(hdev->dma_pool);
1055 free_gaudi_device:
1056 	kfree(gaudi);
1057 	return rc;
1058 }
1059 
gaudi_sw_fini(struct hl_device * hdev)1060 static int gaudi_sw_fini(struct hl_device *hdev)
1061 {
1062 	struct gaudi_device *gaudi = hdev->asic_specific;
1063 
1064 	gaudi_free_internal_qmans_pq_mem(hdev);
1065 
1066 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1067 
1068 	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1069 					hdev->cpu_pci_msb_addr);
1070 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1071 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1072 			hdev->cpu_accessible_dma_mem,
1073 			hdev->cpu_accessible_dma_address);
1074 
1075 	dma_pool_destroy(hdev->dma_pool);
1076 
1077 	mutex_destroy(&gaudi->clk_gate_mutex);
1078 
1079 	kfree(gaudi);
1080 
1081 	return 0;
1082 }
1083 
gaudi_irq_handler_single(int irq,void * arg)1084 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1085 {
1086 	struct hl_device *hdev = arg;
1087 	int i;
1088 
1089 	if (hdev->disabled)
1090 		return IRQ_HANDLED;
1091 
1092 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1093 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1094 
1095 	hl_irq_handler_eq(irq, &hdev->event_queue);
1096 
1097 	return IRQ_HANDLED;
1098 }
1099 
1100 /*
1101  * For backward compatibility, new MSI interrupts should be set after the
1102  * existing CPU and NIC interrupts.
1103  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1104 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1105 				bool cpu_eq)
1106 {
1107 	int msi_vec;
1108 
1109 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1110 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1111 				GAUDI_EVENT_QUEUE_MSI_IDX);
1112 
1113 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1114 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1115 
1116 	return pci_irq_vector(hdev->pdev, msi_vec);
1117 }
1118 
gaudi_enable_msi_single(struct hl_device * hdev)1119 static int gaudi_enable_msi_single(struct hl_device *hdev)
1120 {
1121 	int rc, irq;
1122 
1123 	dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1124 
1125 	irq = gaudi_pci_irq_vector(hdev, 0, false);
1126 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1127 			"gaudi single msi", hdev);
1128 	if (rc)
1129 		dev_err(hdev->dev,
1130 			"Failed to request single MSI IRQ\n");
1131 
1132 	return rc;
1133 }
1134 
gaudi_enable_msi_multi(struct hl_device * hdev)1135 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1136 {
1137 	int cq_cnt = hdev->asic_prop.completion_queues_count;
1138 	int rc, i, irq_cnt_init, irq;
1139 
1140 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1141 		irq = gaudi_pci_irq_vector(hdev, i, false);
1142 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1143 				&hdev->completion_queue[i]);
1144 		if (rc) {
1145 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1146 			goto free_irqs;
1147 		}
1148 	}
1149 
1150 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1151 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1152 				&hdev->event_queue);
1153 	if (rc) {
1154 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1155 		goto free_irqs;
1156 	}
1157 
1158 	return 0;
1159 
1160 free_irqs:
1161 	for (i = 0 ; i < irq_cnt_init ; i++)
1162 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
1163 				&hdev->completion_queue[i]);
1164 	return rc;
1165 }
1166 
gaudi_enable_msi(struct hl_device * hdev)1167 static int gaudi_enable_msi(struct hl_device *hdev)
1168 {
1169 	struct gaudi_device *gaudi = hdev->asic_specific;
1170 	int rc;
1171 
1172 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1173 		return 0;
1174 
1175 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1176 					PCI_IRQ_MSI);
1177 	if (rc < 0) {
1178 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1179 		return rc;
1180 	}
1181 
1182 	if (rc < NUMBER_OF_INTERRUPTS) {
1183 		gaudi->multi_msi_mode = false;
1184 		rc = gaudi_enable_msi_single(hdev);
1185 	} else {
1186 		gaudi->multi_msi_mode = true;
1187 		rc = gaudi_enable_msi_multi(hdev);
1188 	}
1189 
1190 	if (rc)
1191 		goto free_pci_irq_vectors;
1192 
1193 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
1194 
1195 	return 0;
1196 
1197 free_pci_irq_vectors:
1198 	pci_free_irq_vectors(hdev->pdev);
1199 	return rc;
1200 }
1201 
gaudi_sync_irqs(struct hl_device * hdev)1202 static void gaudi_sync_irqs(struct hl_device *hdev)
1203 {
1204 	struct gaudi_device *gaudi = hdev->asic_specific;
1205 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1206 
1207 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1208 		return;
1209 
1210 	/* Wait for all pending IRQs to be finished */
1211 	if (gaudi->multi_msi_mode) {
1212 		for (i = 0 ; i < cq_cnt ; i++)
1213 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1214 
1215 		synchronize_irq(gaudi_pci_irq_vector(hdev,
1216 						GAUDI_EVENT_QUEUE_MSI_IDX,
1217 						true));
1218 	} else {
1219 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1220 	}
1221 }
1222 
gaudi_disable_msi(struct hl_device * hdev)1223 static void gaudi_disable_msi(struct hl_device *hdev)
1224 {
1225 	struct gaudi_device *gaudi = hdev->asic_specific;
1226 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1227 
1228 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1229 		return;
1230 
1231 	gaudi_sync_irqs(hdev);
1232 
1233 	if (gaudi->multi_msi_mode) {
1234 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1235 						true);
1236 		free_irq(irq, &hdev->event_queue);
1237 
1238 		for (i = 0 ; i < cq_cnt ; i++) {
1239 			irq = gaudi_pci_irq_vector(hdev, i, false);
1240 			free_irq(irq, &hdev->completion_queue[i]);
1241 		}
1242 	} else {
1243 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1244 	}
1245 
1246 	pci_free_irq_vectors(hdev->pdev);
1247 
1248 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1249 }
1250 
gaudi_init_scrambler_sram(struct hl_device * hdev)1251 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1252 {
1253 	struct gaudi_device *gaudi = hdev->asic_specific;
1254 
1255 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1256 		return;
1257 
1258 	if (!hdev->sram_scrambler_enable)
1259 		return;
1260 
1261 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1262 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1264 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1266 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1268 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1270 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1272 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1274 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1276 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277 
1278 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1279 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1281 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1283 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1285 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1287 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1289 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1291 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1293 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294 
1295 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1296 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1298 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1300 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1302 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1304 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1306 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1308 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1310 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311 
1312 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1313 }
1314 
gaudi_init_scrambler_hbm(struct hl_device * hdev)1315 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1316 {
1317 	struct gaudi_device *gaudi = hdev->asic_specific;
1318 
1319 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1320 		return;
1321 
1322 	if (!hdev->dram_scrambler_enable)
1323 		return;
1324 
1325 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1326 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1328 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1330 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1332 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1334 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1336 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1338 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1340 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341 
1342 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1343 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1345 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1347 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1349 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1351 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1353 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1355 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1357 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358 
1359 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1360 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1362 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1364 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1366 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1368 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1370 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1372 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1374 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375 
1376 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1377 }
1378 
gaudi_init_e2e(struct hl_device * hdev)1379 static void gaudi_init_e2e(struct hl_device *hdev)
1380 {
1381 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1382 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1383 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1384 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1385 
1386 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1387 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1388 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1389 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1390 
1391 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1392 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1393 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1394 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1395 
1396 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1397 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1398 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1399 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1400 
1401 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1402 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1403 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1404 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1405 
1406 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1407 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1408 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1409 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1410 
1411 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1412 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1413 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1414 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1415 
1416 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1417 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1418 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1419 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1420 
1421 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1422 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1423 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1424 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1425 
1426 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1427 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1428 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1429 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1430 
1431 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1432 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1433 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1434 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1435 
1436 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1437 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1438 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1439 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1440 
1441 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1442 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1443 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1444 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1445 
1446 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1447 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1448 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1449 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1450 
1451 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1452 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1453 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1454 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1455 
1456 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1457 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1458 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1459 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1460 
1461 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1462 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1463 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1464 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1465 
1466 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1467 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1468 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1469 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1470 
1471 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1472 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1473 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1474 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1475 
1476 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1477 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1478 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1479 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1480 
1481 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1482 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1483 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1484 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1485 
1486 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1487 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1488 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1489 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1490 
1491 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1492 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1493 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1494 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1495 
1496 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1497 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1498 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1499 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1500 
1501 	if (!hdev->dram_scrambler_enable) {
1502 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1503 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1504 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1505 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1506 
1507 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1508 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1509 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1510 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1511 
1512 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1513 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1514 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1515 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1516 
1517 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1518 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1519 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1520 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1521 
1522 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1523 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1524 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1525 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1526 
1527 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1528 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1529 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1530 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1531 
1532 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1533 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1534 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1535 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1536 
1537 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1538 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1539 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1540 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1541 
1542 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1543 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1544 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1545 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1546 
1547 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1548 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1549 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1550 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1551 
1552 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1553 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1554 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1555 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1556 
1557 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1558 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1559 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1560 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1561 
1562 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1563 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1564 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1565 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1566 
1567 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1568 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1569 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1570 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1571 
1572 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1573 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1574 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1575 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1576 
1577 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1578 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1579 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1580 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1581 
1582 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1583 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1584 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1585 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1586 
1587 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1588 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1589 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1590 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1591 
1592 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1593 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1594 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1595 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1596 
1597 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1598 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1599 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1600 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1601 
1602 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1603 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1604 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1605 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1606 
1607 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1608 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1609 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1610 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1611 
1612 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1613 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1614 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1615 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1616 
1617 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1618 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1619 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1620 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1621 	}
1622 
1623 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1624 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1625 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1626 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1627 
1628 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1629 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1630 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1631 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1632 
1633 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1634 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1635 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1636 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1637 
1638 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1639 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1640 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1641 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1642 
1643 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1644 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1645 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1646 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1647 
1648 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1649 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1650 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1651 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1652 
1653 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1654 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1655 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1656 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1657 
1658 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1659 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1660 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1661 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1662 
1663 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1664 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1665 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1666 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1667 
1668 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1669 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1670 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1671 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1672 
1673 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1674 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1675 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1676 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1677 
1678 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1679 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1680 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1681 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1682 
1683 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1684 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1685 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1686 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1687 
1688 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1689 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1690 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1691 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1692 
1693 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1694 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1695 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1696 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1697 
1698 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1699 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1700 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1701 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1702 
1703 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1704 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1705 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1706 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1707 
1708 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1709 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1710 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1711 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1712 
1713 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1714 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1715 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1716 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1717 
1718 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1719 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1720 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1721 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1722 
1723 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1724 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1725 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1726 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1727 
1728 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1729 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1730 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1731 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1732 
1733 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1734 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1735 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1736 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1737 
1738 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1739 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1740 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1741 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1742 }
1743 
gaudi_init_hbm_cred(struct hl_device * hdev)1744 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1745 {
1746 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1747 
1748 	hbm0_wr = 0x33333333;
1749 	hbm0_rd = 0x77777777;
1750 	hbm1_wr = 0x55555555;
1751 	hbm1_rd = 0xDDDDDDDD;
1752 
1753 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1754 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1755 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1756 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1757 
1758 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1759 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1760 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1761 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1762 
1763 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1764 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1765 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1766 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1767 
1768 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1769 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1770 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1771 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1772 
1773 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1774 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1775 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1776 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1777 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1778 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1779 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1780 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1783 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785 
1786 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1787 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1788 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1789 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1790 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1791 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1792 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1793 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1796 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798 }
1799 
gaudi_init_golden_registers(struct hl_device * hdev)1800 static void gaudi_init_golden_registers(struct hl_device *hdev)
1801 {
1802 	u32 tpc_offset;
1803 	int tpc_id, i;
1804 
1805 	gaudi_init_e2e(hdev);
1806 
1807 	gaudi_init_hbm_cred(hdev);
1808 
1809 	hdev->asic_funcs->disable_clock_gating(hdev);
1810 
1811 	for (tpc_id = 0, tpc_offset = 0;
1812 				tpc_id < TPC_NUMBER_OF_ENGINES;
1813 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1814 		/* Mask all arithmetic interrupts from TPC */
1815 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1816 		/* Set 16 cache lines */
1817 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1818 				ICACHE_FETCH_LINE_NUM, 2);
1819 	}
1820 
1821 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1822 	for (i = 0 ; i < 128 ; i += 8)
1823 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1824 
1825 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1826 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1827 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1828 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1829 }
1830 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)1831 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1832 					int qman_id, dma_addr_t qman_pq_addr)
1833 {
1834 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1835 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1836 	u32 q_off, dma_qm_offset;
1837 	u32 dma_qm_err_cfg;
1838 
1839 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1840 
1841 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
1842 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1843 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
1844 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1845 	so_base_en_lo = lower_32_bits(CFG_BASE +
1846 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1847 	so_base_en_hi = upper_32_bits(CFG_BASE +
1848 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1849 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1850 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1852 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1853 	so_base_ws_lo = lower_32_bits(CFG_BASE +
1854 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855 	so_base_ws_hi = upper_32_bits(CFG_BASE +
1856 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1857 
1858 	q_off = dma_qm_offset + qman_id * 4;
1859 
1860 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1861 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1862 
1863 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1864 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1865 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1866 
1867 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1868 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1869 							QMAN_LDMA_SRC_OFFSET);
1870 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1871 							QMAN_LDMA_DST_OFFSET);
1872 
1873 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1874 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1875 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1876 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1877 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1878 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1879 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1880 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1881 
1882 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1883 
1884 	/* The following configuration is needed only once per QMAN */
1885 	if (qman_id == 0) {
1886 		/* Configure RAZWI IRQ */
1887 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1888 		if (hdev->stop_on_err) {
1889 			dma_qm_err_cfg |=
1890 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1891 		}
1892 
1893 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1894 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1895 			lower_32_bits(CFG_BASE +
1896 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1897 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1898 			upper_32_bits(CFG_BASE +
1899 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1900 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1901 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1902 									dma_id);
1903 
1904 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1905 				QM_ARB_ERR_MSG_EN_MASK);
1906 
1907 		/* Increase ARB WDT to support streams architecture */
1908 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1909 				GAUDI_ARB_WDT_TIMEOUT);
1910 
1911 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1912 				QMAN_EXTERNAL_MAKE_TRUSTED);
1913 
1914 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1915 	}
1916 }
1917 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)1918 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1919 {
1920 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1921 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1922 
1923 	/* Set to maximum possible according to physical size */
1924 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1925 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1926 
1927 	/* WA for H/W bug H3-2116 */
1928 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1929 
1930 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
1931 	if (hdev->stop_on_err)
1932 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1933 
1934 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1935 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1936 		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1937 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1938 		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1939 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1940 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1941 	WREG32(mmDMA0_CORE_PROT + dma_offset,
1942 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1943 	/* If the channel is secured, it should be in MMU bypass mode */
1944 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1945 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1946 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1947 }
1948 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)1949 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1950 				u32 enable_mask)
1951 {
1952 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1953 
1954 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1955 }
1956 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)1957 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1958 {
1959 	struct gaudi_device *gaudi = hdev->asic_specific;
1960 	struct hl_hw_queue *q;
1961 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1962 
1963 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1964 		return;
1965 
1966 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1967 		dma_id = gaudi_dma_assignment[i];
1968 		/*
1969 		 * For queues after the CPU Q need to add 1 to get the correct
1970 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1971 		 * order to get the correct MSI register.
1972 		 */
1973 		if (dma_id > 1) {
1974 			cpu_skip = 1;
1975 			nic_skip = NIC_NUMBER_OF_ENGINES;
1976 		} else {
1977 			cpu_skip = 0;
1978 			nic_skip = 0;
1979 		}
1980 
1981 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
1982 			q_idx = 4 * dma_id + j + cpu_skip;
1983 			q = &hdev->kernel_queues[q_idx];
1984 			q->cq_id = cq_id++;
1985 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1986 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
1987 						q->bus_address);
1988 		}
1989 
1990 		gaudi_init_dma_core(hdev, dma_id);
1991 
1992 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1993 	}
1994 
1995 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1996 }
1997 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)1998 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1999 					int qman_id, u64 qman_base_addr)
2000 {
2001 	u32 mtr_base_lo, mtr_base_hi;
2002 	u32 so_base_lo, so_base_hi;
2003 	u32 q_off, dma_qm_offset;
2004 	u32 dma_qm_err_cfg;
2005 
2006 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2007 
2008 	mtr_base_lo = lower_32_bits(CFG_BASE +
2009 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2010 	mtr_base_hi = upper_32_bits(CFG_BASE +
2011 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2012 	so_base_lo = lower_32_bits(CFG_BASE +
2013 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2014 	so_base_hi = upper_32_bits(CFG_BASE +
2015 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2016 
2017 	q_off = dma_qm_offset + qman_id * 4;
2018 
2019 	if (qman_id < 4) {
2020 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2021 					lower_32_bits(qman_base_addr));
2022 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2023 					upper_32_bits(qman_base_addr));
2024 
2025 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2026 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2027 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2028 
2029 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2030 							QMAN_CPDMA_SIZE_OFFSET);
2031 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2032 							QMAN_CPDMA_SRC_OFFSET);
2033 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2034 							QMAN_CPDMA_DST_OFFSET);
2035 	} else {
2036 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2037 							QMAN_LDMA_SIZE_OFFSET);
2038 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2039 							QMAN_LDMA_SRC_OFFSET);
2040 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2041 							QMAN_LDMA_DST_OFFSET);
2042 
2043 		/* Configure RAZWI IRQ */
2044 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2045 		if (hdev->stop_on_err) {
2046 			dma_qm_err_cfg |=
2047 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2048 		}
2049 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2050 
2051 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2052 			lower_32_bits(CFG_BASE +
2053 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2054 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2055 			upper_32_bits(CFG_BASE +
2056 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2057 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2058 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2059 									dma_id);
2060 
2061 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2062 				QM_ARB_ERR_MSG_EN_MASK);
2063 
2064 		/* Increase ARB WDT to support streams architecture */
2065 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2066 				GAUDI_ARB_WDT_TIMEOUT);
2067 
2068 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2069 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2070 				QMAN_INTERNAL_MAKE_TRUSTED);
2071 	}
2072 
2073 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2074 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2075 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2076 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2077 }
2078 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2079 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2080 {
2081 	struct gaudi_device *gaudi = hdev->asic_specific;
2082 	struct gaudi_internal_qman_info *q;
2083 	u64 qman_base_addr;
2084 	int i, j, dma_id, internal_q_index;
2085 
2086 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2087 		return;
2088 
2089 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2090 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2091 
2092 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2093 			 /*
2094 			  * Add the CPU queue in order to get the correct queue
2095 			  * number as all internal queue are placed after it
2096 			  */
2097 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2098 
2099 			q = &gaudi->internal_qmans[internal_q_index];
2100 			qman_base_addr = (u64) q->pq_dma_addr;
2101 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2102 						qman_base_addr);
2103 		}
2104 
2105 		/* Initializing lower CP for HBM DMA QMAN */
2106 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2107 
2108 		gaudi_init_dma_core(hdev, dma_id);
2109 
2110 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2111 	}
2112 
2113 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2114 }
2115 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2116 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2117 					int qman_id, u64 qman_base_addr)
2118 {
2119 	u32 mtr_base_lo, mtr_base_hi;
2120 	u32 so_base_lo, so_base_hi;
2121 	u32 q_off, mme_id;
2122 	u32 mme_qm_err_cfg;
2123 
2124 	mtr_base_lo = lower_32_bits(CFG_BASE +
2125 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2126 	mtr_base_hi = upper_32_bits(CFG_BASE +
2127 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2128 	so_base_lo = lower_32_bits(CFG_BASE +
2129 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2130 	so_base_hi = upper_32_bits(CFG_BASE +
2131 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2132 
2133 	q_off = mme_offset + qman_id * 4;
2134 
2135 	if (qman_id < 4) {
2136 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2137 					lower_32_bits(qman_base_addr));
2138 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2139 					upper_32_bits(qman_base_addr));
2140 
2141 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2142 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2143 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2144 
2145 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2146 							QMAN_CPDMA_SIZE_OFFSET);
2147 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2148 							QMAN_CPDMA_SRC_OFFSET);
2149 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2150 							QMAN_CPDMA_DST_OFFSET);
2151 	} else {
2152 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2153 							QMAN_LDMA_SIZE_OFFSET);
2154 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2155 							QMAN_LDMA_SRC_OFFSET);
2156 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2157 							QMAN_LDMA_DST_OFFSET);
2158 
2159 		/* Configure RAZWI IRQ */
2160 		mme_id = mme_offset /
2161 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2162 
2163 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2164 		if (hdev->stop_on_err) {
2165 			mme_qm_err_cfg |=
2166 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2167 		}
2168 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2169 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2170 			lower_32_bits(CFG_BASE +
2171 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2172 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2173 			upper_32_bits(CFG_BASE +
2174 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2175 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2176 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2177 									mme_id);
2178 
2179 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2180 				QM_ARB_ERR_MSG_EN_MASK);
2181 
2182 		/* Increase ARB WDT to support streams architecture */
2183 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2184 				GAUDI_ARB_WDT_TIMEOUT);
2185 
2186 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2187 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2188 				QMAN_INTERNAL_MAKE_TRUSTED);
2189 	}
2190 
2191 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2192 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2193 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2194 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2195 }
2196 
gaudi_init_mme_qmans(struct hl_device * hdev)2197 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2198 {
2199 	struct gaudi_device *gaudi = hdev->asic_specific;
2200 	struct gaudi_internal_qman_info *q;
2201 	u64 qman_base_addr;
2202 	u32 mme_offset;
2203 	int i, internal_q_index;
2204 
2205 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2206 		return;
2207 
2208 	/*
2209 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2210 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2211 	 */
2212 
2213 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2214 
2215 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2216 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2217 		q = &gaudi->internal_qmans[internal_q_index];
2218 		qman_base_addr = (u64) q->pq_dma_addr;
2219 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2220 					qman_base_addr);
2221 		if (i == 3)
2222 			mme_offset = 0;
2223 	}
2224 
2225 	/* Initializing lower CP for MME QMANs */
2226 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2227 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2228 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2229 
2230 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2231 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2232 
2233 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2234 }
2235 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2236 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2237 				int qman_id, u64 qman_base_addr)
2238 {
2239 	u32 mtr_base_lo, mtr_base_hi;
2240 	u32 so_base_lo, so_base_hi;
2241 	u32 q_off, tpc_id;
2242 	u32 tpc_qm_err_cfg;
2243 
2244 	mtr_base_lo = lower_32_bits(CFG_BASE +
2245 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2246 	mtr_base_hi = upper_32_bits(CFG_BASE +
2247 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2248 	so_base_lo = lower_32_bits(CFG_BASE +
2249 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2250 	so_base_hi = upper_32_bits(CFG_BASE +
2251 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2252 
2253 	q_off = tpc_offset + qman_id * 4;
2254 
2255 	if (qman_id < 4) {
2256 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2257 					lower_32_bits(qman_base_addr));
2258 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2259 					upper_32_bits(qman_base_addr));
2260 
2261 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2262 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2263 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2264 
2265 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2266 							QMAN_CPDMA_SIZE_OFFSET);
2267 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2268 							QMAN_CPDMA_SRC_OFFSET);
2269 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2270 							QMAN_CPDMA_DST_OFFSET);
2271 	} else {
2272 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2273 							QMAN_LDMA_SIZE_OFFSET);
2274 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2275 							QMAN_LDMA_SRC_OFFSET);
2276 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2277 							QMAN_LDMA_DST_OFFSET);
2278 
2279 		/* Configure RAZWI IRQ */
2280 		tpc_id = tpc_offset /
2281 				(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2282 
2283 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2284 		if (hdev->stop_on_err) {
2285 			tpc_qm_err_cfg |=
2286 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2287 		}
2288 
2289 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2290 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2291 			lower_32_bits(CFG_BASE +
2292 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2293 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2294 			upper_32_bits(CFG_BASE +
2295 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2296 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2297 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2298 									tpc_id);
2299 
2300 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2301 				QM_ARB_ERR_MSG_EN_MASK);
2302 
2303 		/* Increase ARB WDT to support streams architecture */
2304 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2305 				GAUDI_ARB_WDT_TIMEOUT);
2306 
2307 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2308 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2309 				QMAN_INTERNAL_MAKE_TRUSTED);
2310 	}
2311 
2312 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2313 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2314 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2315 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2316 }
2317 
gaudi_init_tpc_qmans(struct hl_device * hdev)2318 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2319 {
2320 	struct gaudi_device *gaudi = hdev->asic_specific;
2321 	struct gaudi_internal_qman_info *q;
2322 	u64 qman_base_addr;
2323 	u32 so_base_hi, tpc_offset = 0;
2324 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2325 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2326 	int i, tpc_id, internal_q_index;
2327 
2328 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2329 		return;
2330 
2331 	so_base_hi = upper_32_bits(CFG_BASE +
2332 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2333 
2334 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2335 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
2336 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2337 						tpc_id * QMAN_STREAMS + i;
2338 			q = &gaudi->internal_qmans[internal_q_index];
2339 			qman_base_addr = (u64) q->pq_dma_addr;
2340 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
2341 						qman_base_addr);
2342 
2343 			if (i == 3) {
2344 				/* Initializing lower CP for TPC QMAN */
2345 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2346 
2347 				/* Enable the QMAN and TPC channel */
2348 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2349 						QMAN_TPC_ENABLE);
2350 			}
2351 		}
2352 
2353 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2354 				so_base_hi);
2355 
2356 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2357 
2358 		gaudi->hw_cap_initialized |=
2359 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2360 	}
2361 }
2362 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)2363 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2364 {
2365 	struct gaudi_device *gaudi = hdev->asic_specific;
2366 
2367 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2368 		return;
2369 
2370 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2371 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2372 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2373 }
2374 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)2375 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2376 {
2377 	struct gaudi_device *gaudi = hdev->asic_specific;
2378 
2379 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2380 		return;
2381 
2382 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2383 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2384 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2385 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2386 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2387 }
2388 
gaudi_disable_mme_qmans(struct hl_device * hdev)2389 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2390 {
2391 	struct gaudi_device *gaudi = hdev->asic_specific;
2392 
2393 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2394 		return;
2395 
2396 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
2397 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
2398 }
2399 
gaudi_disable_tpc_qmans(struct hl_device * hdev)2400 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2401 {
2402 	struct gaudi_device *gaudi = hdev->asic_specific;
2403 	u32 tpc_offset = 0;
2404 	int tpc_id;
2405 
2406 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2407 		return;
2408 
2409 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2410 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2411 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2412 	}
2413 }
2414 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)2415 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2416 {
2417 	struct gaudi_device *gaudi = hdev->asic_specific;
2418 
2419 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2420 		return;
2421 
2422 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2423 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2424 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2425 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426 }
2427 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)2428 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2429 {
2430 	struct gaudi_device *gaudi = hdev->asic_specific;
2431 
2432 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2433 		return;
2434 
2435 	/* Stop CPs of HBM DMA QMANs */
2436 
2437 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442 }
2443 
gaudi_stop_mme_qmans(struct hl_device * hdev)2444 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2445 {
2446 	struct gaudi_device *gaudi = hdev->asic_specific;
2447 
2448 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2449 		return;
2450 
2451 	/* Stop CPs of MME QMANs */
2452 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2453 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2454 }
2455 
gaudi_stop_tpc_qmans(struct hl_device * hdev)2456 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2457 {
2458 	struct gaudi_device *gaudi = hdev->asic_specific;
2459 
2460 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2461 		return;
2462 
2463 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2464 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2465 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2466 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2467 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2468 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2469 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471 }
2472 
gaudi_pci_dma_stall(struct hl_device * hdev)2473 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2474 {
2475 	struct gaudi_device *gaudi = hdev->asic_specific;
2476 
2477 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2478 		return;
2479 
2480 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2481 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2482 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2483 }
2484 
gaudi_hbm_dma_stall(struct hl_device * hdev)2485 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2486 {
2487 	struct gaudi_device *gaudi = hdev->asic_specific;
2488 
2489 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2490 		return;
2491 
2492 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2493 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2494 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2495 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2496 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2497 }
2498 
gaudi_mme_stall(struct hl_device * hdev)2499 static void gaudi_mme_stall(struct hl_device *hdev)
2500 {
2501 	struct gaudi_device *gaudi = hdev->asic_specific;
2502 
2503 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2504 		return;
2505 
2506 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
2507 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2508 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2509 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2510 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2511 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2512 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2513 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2514 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2515 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2516 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2517 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2518 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2519 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2520 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2521 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2522 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2523 }
2524 
gaudi_tpc_stall(struct hl_device * hdev)2525 static void gaudi_tpc_stall(struct hl_device *hdev)
2526 {
2527 	struct gaudi_device *gaudi = hdev->asic_specific;
2528 
2529 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2530 		return;
2531 
2532 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2533 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2534 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2535 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2536 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2537 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2538 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540 }
2541 
gaudi_set_clock_gating(struct hl_device * hdev)2542 static void gaudi_set_clock_gating(struct hl_device *hdev)
2543 {
2544 	struct gaudi_device *gaudi = hdev->asic_specific;
2545 	u32 qman_offset;
2546 	bool enable;
2547 	int i;
2548 
2549 	/* In case we are during debug session, don't enable the clock gate
2550 	 * as it may interfere
2551 	 */
2552 	if (hdev->in_debug)
2553 		return;
2554 
2555 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2556 		enable = !!(hdev->clock_gating_mask &
2557 				(BIT_ULL(gaudi_dma_assignment[i])));
2558 
2559 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2560 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2561 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2562 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2563 				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2564 	}
2565 
2566 	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2567 		enable = !!(hdev->clock_gating_mask &
2568 				(BIT_ULL(gaudi_dma_assignment[i])));
2569 
2570 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2571 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2572 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2573 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2574 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2575 	}
2576 
2577 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2578 	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579 	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2580 
2581 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2582 	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2583 	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2584 
2585 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2586 		enable = !!(hdev->clock_gating_mask &
2587 				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2588 
2589 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2590 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2591 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2592 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2593 
2594 		qman_offset += TPC_QMAN_OFFSET;
2595 	}
2596 
2597 	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2598 }
2599 
gaudi_disable_clock_gating(struct hl_device * hdev)2600 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2601 {
2602 	struct gaudi_device *gaudi = hdev->asic_specific;
2603 	u32 qman_offset;
2604 	int i;
2605 
2606 	if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2607 		return;
2608 
2609 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2610 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2611 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2612 
2613 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2614 	}
2615 
2616 	WREG32(mmMME0_QM_CGM_CFG, 0);
2617 	WREG32(mmMME0_QM_CGM_CFG1, 0);
2618 	WREG32(mmMME2_QM_CGM_CFG, 0);
2619 	WREG32(mmMME2_QM_CGM_CFG1, 0);
2620 
2621 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2622 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2623 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2624 
2625 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2626 	}
2627 
2628 	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2629 }
2630 
gaudi_enable_timestamp(struct hl_device * hdev)2631 static void gaudi_enable_timestamp(struct hl_device *hdev)
2632 {
2633 	/* Disable the timestamp counter */
2634 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2635 
2636 	/* Zero the lower/upper parts of the 64-bit counter */
2637 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2638 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2639 
2640 	/* Enable the counter */
2641 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2642 }
2643 
gaudi_disable_timestamp(struct hl_device * hdev)2644 static void gaudi_disable_timestamp(struct hl_device *hdev)
2645 {
2646 	/* Disable the timestamp counter */
2647 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2648 }
2649 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset)2650 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2651 {
2652 	u32 wait_timeout_ms;
2653 
2654 	dev_info(hdev->dev,
2655 		"Halting compute engines and disabling interrupts\n");
2656 
2657 	if (hdev->pldm)
2658 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2659 	else
2660 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2661 
2662 
2663 	gaudi_stop_mme_qmans(hdev);
2664 	gaudi_stop_tpc_qmans(hdev);
2665 	gaudi_stop_hbm_dma_qmans(hdev);
2666 	gaudi_stop_pci_dma_qmans(hdev);
2667 
2668 	hdev->asic_funcs->disable_clock_gating(hdev);
2669 
2670 	msleep(wait_timeout_ms);
2671 
2672 	gaudi_pci_dma_stall(hdev);
2673 	gaudi_hbm_dma_stall(hdev);
2674 	gaudi_tpc_stall(hdev);
2675 	gaudi_mme_stall(hdev);
2676 
2677 	msleep(wait_timeout_ms);
2678 
2679 	gaudi_disable_mme_qmans(hdev);
2680 	gaudi_disable_tpc_qmans(hdev);
2681 	gaudi_disable_hbm_dma_qmans(hdev);
2682 	gaudi_disable_pci_dma_qmans(hdev);
2683 
2684 	gaudi_disable_timestamp(hdev);
2685 
2686 	gaudi_disable_msi(hdev);
2687 }
2688 
gaudi_mmu_init(struct hl_device * hdev)2689 static int gaudi_mmu_init(struct hl_device *hdev)
2690 {
2691 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2692 	struct gaudi_device *gaudi = hdev->asic_specific;
2693 	u64 hop0_addr;
2694 	int rc, i;
2695 
2696 	if (!hdev->mmu_enable)
2697 		return 0;
2698 
2699 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2700 		return 0;
2701 
2702 	hdev->dram_supports_virtual_memory = false;
2703 
2704 	for (i = 0 ; i < prop->max_asid ; i++) {
2705 		hop0_addr = prop->mmu_pgt_addr +
2706 				(i * prop->mmu_hop_table_size);
2707 
2708 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2709 		if (rc) {
2710 			dev_err(hdev->dev,
2711 				"failed to set hop0 addr for asid %d\n", i);
2712 			goto err;
2713 		}
2714 	}
2715 
2716 	/* init MMU cache manage page */
2717 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2718 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2719 
2720 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2721 
2722 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
2723 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
2724 
2725 	WREG32(mmSTLB_HOP_CONFIGURATION,
2726 			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2727 
2728 	/*
2729 	 * The H/W expects the first PI after init to be 1. After wraparound
2730 	 * we'll write 0.
2731 	 */
2732 	gaudi->mmu_cache_inv_pi = 1;
2733 
2734 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
2735 
2736 	return 0;
2737 
2738 err:
2739 	return rc;
2740 }
2741 
gaudi_load_firmware_to_device(struct hl_device * hdev)2742 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2743 {
2744 	void __iomem *dst;
2745 
2746 	/* HBM scrambler must be initialized before pushing F/W to HBM */
2747 	gaudi_init_scrambler_hbm(hdev);
2748 
2749 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2750 
2751 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2752 }
2753 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)2754 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2755 {
2756 	void __iomem *dst;
2757 
2758 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2759 
2760 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2761 }
2762 
gaudi_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)2763 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2764 					enum hl_fw_component fwc)
2765 {
2766 	const char *name;
2767 	u32 ver_off;
2768 	char *dest;
2769 
2770 	switch (fwc) {
2771 	case FW_COMP_UBOOT:
2772 		ver_off = RREG32(mmUBOOT_VER_OFFSET);
2773 		dest = hdev->asic_prop.uboot_ver;
2774 		name = "U-Boot";
2775 		break;
2776 	case FW_COMP_PREBOOT:
2777 		ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2778 		dest = hdev->asic_prop.preboot_ver;
2779 		name = "Preboot";
2780 		break;
2781 	default:
2782 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2783 		return;
2784 	}
2785 
2786 	ver_off &= ~((u32)SRAM_BASE_ADDR);
2787 
2788 	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2789 		memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2790 							VERSION_MAX_LEN);
2791 	} else {
2792 		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2793 								name, ver_off);
2794 		strcpy(dest, "unavailable");
2795 	}
2796 }
2797 
gaudi_init_cpu(struct hl_device * hdev)2798 static int gaudi_init_cpu(struct hl_device *hdev)
2799 {
2800 	struct gaudi_device *gaudi = hdev->asic_specific;
2801 	int rc;
2802 
2803 	if (!hdev->cpu_enable)
2804 		return 0;
2805 
2806 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2807 		return 0;
2808 
2809 	/*
2810 	 * The device CPU works with 40 bits addresses.
2811 	 * This register sets the extension to 50 bits.
2812 	 */
2813 	WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2814 
2815 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2816 			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2817 			mmCPU_CMD_STATUS_TO_HOST,
2818 			mmCPU_BOOT_ERR0,
2819 			!hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2820 			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2821 
2822 	if (rc)
2823 		return rc;
2824 
2825 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
2826 
2827 	return 0;
2828 }
2829 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)2830 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2831 {
2832 	struct gaudi_device *gaudi = hdev->asic_specific;
2833 	struct hl_eq *eq;
2834 	u32 status;
2835 	struct hl_hw_queue *cpu_pq =
2836 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2837 	int err;
2838 
2839 	if (!hdev->cpu_queues_enable)
2840 		return 0;
2841 
2842 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2843 		return 0;
2844 
2845 	eq = &hdev->event_queue;
2846 
2847 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2848 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2849 
2850 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2851 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2852 
2853 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2854 			lower_32_bits(hdev->cpu_accessible_dma_address));
2855 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2856 			upper_32_bits(hdev->cpu_accessible_dma_address));
2857 
2858 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2859 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2860 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2861 
2862 	/* Used for EQ CI */
2863 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2864 
2865 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
2866 
2867 	if (gaudi->multi_msi_mode)
2868 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2869 	else
2870 		WREG32(mmCPU_IF_QUEUE_INIT,
2871 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2872 
2873 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2874 
2875 	err = hl_poll_timeout(
2876 		hdev,
2877 		mmCPU_IF_QUEUE_INIT,
2878 		status,
2879 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
2880 		1000,
2881 		cpu_timeout);
2882 
2883 	if (err) {
2884 		dev_err(hdev->dev,
2885 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
2886 		return -EIO;
2887 	}
2888 
2889 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2890 	return 0;
2891 }
2892 
gaudi_pre_hw_init(struct hl_device * hdev)2893 static void gaudi_pre_hw_init(struct hl_device *hdev)
2894 {
2895 	/* Perform read from the device to make sure device is up */
2896 	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2897 
2898 	/* Set the access through PCI bars (Linux driver only) as
2899 	 * secured
2900 	 */
2901 	WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2902 			(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2903 			PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2904 
2905 	/* Perform read to flush the waiting writes to ensure
2906 	 * configuration was set in the device
2907 	 */
2908 	RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2909 
2910 	/*
2911 	 * Let's mark in the H/W that we have reached this point. We check
2912 	 * this value in the reset_before_init function to understand whether
2913 	 * we need to reset the chip before doing H/W init. This register is
2914 	 * cleared by the H/W upon H/W reset
2915 	 */
2916 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2917 
2918 	/* Configure the reset registers. Must be done as early as possible
2919 	 * in case we fail during H/W initialization
2920 	 */
2921 	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2922 					(CFG_RST_H_DMA_MASK |
2923 					CFG_RST_H_MME_MASK |
2924 					CFG_RST_H_SM_MASK |
2925 					CFG_RST_H_TPC_7_MASK));
2926 
2927 	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2928 
2929 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2930 					(CFG_RST_H_HBM_MASK |
2931 					CFG_RST_H_TPC_7_MASK |
2932 					CFG_RST_H_NIC_MASK |
2933 					CFG_RST_H_SM_MASK |
2934 					CFG_RST_H_DMA_MASK |
2935 					CFG_RST_H_MME_MASK |
2936 					CFG_RST_H_CPU_MASK |
2937 					CFG_RST_H_MMU_MASK));
2938 
2939 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2940 					(CFG_RST_L_IF_MASK |
2941 					CFG_RST_L_PSOC_MASK |
2942 					CFG_RST_L_TPC_MASK));
2943 }
2944 
gaudi_hw_init(struct hl_device * hdev)2945 static int gaudi_hw_init(struct hl_device *hdev)
2946 {
2947 	int rc;
2948 
2949 	dev_info(hdev->dev, "Starting initialization of H/W\n");
2950 
2951 	gaudi_pre_hw_init(hdev);
2952 
2953 	gaudi_init_pci_dma_qmans(hdev);
2954 
2955 	gaudi_init_hbm_dma_qmans(hdev);
2956 
2957 	rc = gaudi_init_cpu(hdev);
2958 	if (rc) {
2959 		dev_err(hdev->dev, "failed to initialize CPU\n");
2960 		return rc;
2961 	}
2962 
2963 	/* SRAM scrambler must be initialized after CPU is running from HBM */
2964 	gaudi_init_scrambler_sram(hdev);
2965 
2966 	/* This is here just in case we are working without CPU */
2967 	gaudi_init_scrambler_hbm(hdev);
2968 
2969 	gaudi_init_golden_registers(hdev);
2970 
2971 	rc = gaudi_mmu_init(hdev);
2972 	if (rc)
2973 		return rc;
2974 
2975 	gaudi_init_security(hdev);
2976 
2977 	gaudi_init_mme_qmans(hdev);
2978 
2979 	gaudi_init_tpc_qmans(hdev);
2980 
2981 	hdev->asic_funcs->set_clock_gating(hdev);
2982 
2983 	gaudi_enable_timestamp(hdev);
2984 
2985 	/* MSI must be enabled before CPU queues are initialized */
2986 	rc = gaudi_enable_msi(hdev);
2987 	if (rc)
2988 		goto disable_queues;
2989 
2990 	/* must be called after MSI was enabled */
2991 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2992 	if (rc) {
2993 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2994 			rc);
2995 		goto disable_msi;
2996 	}
2997 
2998 	/* Perform read from the device to flush all configuration */
2999 	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3000 
3001 	return 0;
3002 
3003 disable_msi:
3004 	gaudi_disable_msi(hdev);
3005 disable_queues:
3006 	gaudi_disable_mme_qmans(hdev);
3007 	gaudi_disable_pci_dma_qmans(hdev);
3008 
3009 	return rc;
3010 }
3011 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset)3012 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3013 {
3014 	struct gaudi_device *gaudi = hdev->asic_specific;
3015 	u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3016 
3017 	if (!hard_reset) {
3018 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3019 		return;
3020 	}
3021 
3022 	if (hdev->pldm) {
3023 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3024 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3025 	} else {
3026 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3027 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3028 	}
3029 
3030 	/* Set device to handle FLR by H/W as we will put the device CPU to
3031 	 * halt mode
3032 	 */
3033 	WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3034 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3035 
3036 	/* I don't know what is the state of the CPU so make sure it is
3037 	 * stopped in any means necessary
3038 	 */
3039 	WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3040 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3041 
3042 	msleep(cpu_timeout_ms);
3043 
3044 	/* Tell ASIC not to re-initialize PCIe */
3045 	WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3046 
3047 	boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3048 
3049 	/* H/W bug WA:
3050 	 * rdata[31:0] = strap_read_val;
3051 	 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3052 	 */
3053 	boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3054 			(boot_strap & 0x001FFFFF));
3055 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3056 
3057 	/* Restart BTL/BLR upon hard-reset */
3058 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3059 
3060 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3061 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3062 	dev_info(hdev->dev,
3063 		"Issued HARD reset command, going to wait %dms\n",
3064 		reset_timeout_ms);
3065 
3066 	/*
3067 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3068 	 * itself is in reset. Need to wait until the reset is deasserted
3069 	 */
3070 	msleep(reset_timeout_ms);
3071 
3072 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3073 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3074 		dev_err(hdev->dev,
3075 			"Timeout while waiting for device to reset 0x%x\n",
3076 			status);
3077 
3078 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3079 
3080 	gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3081 					HW_CAP_HBM | HW_CAP_PCI_DMA |
3082 					HW_CAP_MME | HW_CAP_TPC_MASK |
3083 					HW_CAP_HBM_DMA | HW_CAP_PLL |
3084 					HW_CAP_MMU |
3085 					HW_CAP_SRAM_SCRAMBLER |
3086 					HW_CAP_HBM_SCRAMBLER |
3087 					HW_CAP_CLK_GATE);
3088 
3089 	memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3090 }
3091 
gaudi_suspend(struct hl_device * hdev)3092 static int gaudi_suspend(struct hl_device *hdev)
3093 {
3094 	int rc;
3095 
3096 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3097 	if (rc)
3098 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3099 
3100 	return rc;
3101 }
3102 
gaudi_resume(struct hl_device * hdev)3103 static int gaudi_resume(struct hl_device *hdev)
3104 {
3105 	return gaudi_init_iatu(hdev);
3106 }
3107 
gaudi_cb_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)3108 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3109 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
3110 {
3111 	int rc;
3112 
3113 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3114 			VM_DONTCOPY | VM_NORESERVE;
3115 
3116 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
3117 	if (rc)
3118 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3119 
3120 	return rc;
3121 }
3122 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)3123 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3124 {
3125 	struct gaudi_device *gaudi = hdev->asic_specific;
3126 	u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3127 	int dma_id;
3128 	bool invalid_queue = false;
3129 
3130 	switch (hw_queue_id) {
3131 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3132 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3133 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3135 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136 		break;
3137 
3138 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3139 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3140 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 		break;
3144 
3145 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3146 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3147 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 		break;
3151 
3152 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3153 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3154 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 		break;
3158 
3159 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3160 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3161 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 		break;
3165 
3166 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3167 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3168 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 		break;
3172 
3173 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3174 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3175 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178 		break;
3179 
3180 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3181 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3182 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185 		break;
3186 
3187 	case GAUDI_QUEUE_ID_CPU_PQ:
3188 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3189 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
3190 		else
3191 			invalid_queue = true;
3192 		break;
3193 
3194 	case GAUDI_QUEUE_ID_MME_0_0:
3195 		db_reg_offset = mmMME2_QM_PQ_PI_0;
3196 		break;
3197 
3198 	case GAUDI_QUEUE_ID_MME_0_1:
3199 		db_reg_offset = mmMME2_QM_PQ_PI_1;
3200 		break;
3201 
3202 	case GAUDI_QUEUE_ID_MME_0_2:
3203 		db_reg_offset = mmMME2_QM_PQ_PI_2;
3204 		break;
3205 
3206 	case GAUDI_QUEUE_ID_MME_0_3:
3207 		db_reg_offset = mmMME2_QM_PQ_PI_3;
3208 		break;
3209 
3210 	case GAUDI_QUEUE_ID_MME_1_0:
3211 		db_reg_offset = mmMME0_QM_PQ_PI_0;
3212 		break;
3213 
3214 	case GAUDI_QUEUE_ID_MME_1_1:
3215 		db_reg_offset = mmMME0_QM_PQ_PI_1;
3216 		break;
3217 
3218 	case GAUDI_QUEUE_ID_MME_1_2:
3219 		db_reg_offset = mmMME0_QM_PQ_PI_2;
3220 		break;
3221 
3222 	case GAUDI_QUEUE_ID_MME_1_3:
3223 		db_reg_offset = mmMME0_QM_PQ_PI_3;
3224 		break;
3225 
3226 	case GAUDI_QUEUE_ID_TPC_0_0:
3227 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
3228 		break;
3229 
3230 	case GAUDI_QUEUE_ID_TPC_0_1:
3231 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
3232 		break;
3233 
3234 	case GAUDI_QUEUE_ID_TPC_0_2:
3235 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
3236 		break;
3237 
3238 	case GAUDI_QUEUE_ID_TPC_0_3:
3239 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
3240 		break;
3241 
3242 	case GAUDI_QUEUE_ID_TPC_1_0:
3243 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
3244 		break;
3245 
3246 	case GAUDI_QUEUE_ID_TPC_1_1:
3247 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
3248 		break;
3249 
3250 	case GAUDI_QUEUE_ID_TPC_1_2:
3251 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
3252 		break;
3253 
3254 	case GAUDI_QUEUE_ID_TPC_1_3:
3255 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
3256 		break;
3257 
3258 	case GAUDI_QUEUE_ID_TPC_2_0:
3259 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
3260 		break;
3261 
3262 	case GAUDI_QUEUE_ID_TPC_2_1:
3263 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
3264 		break;
3265 
3266 	case GAUDI_QUEUE_ID_TPC_2_2:
3267 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
3268 		break;
3269 
3270 	case GAUDI_QUEUE_ID_TPC_2_3:
3271 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
3272 		break;
3273 
3274 	case GAUDI_QUEUE_ID_TPC_3_0:
3275 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
3276 		break;
3277 
3278 	case GAUDI_QUEUE_ID_TPC_3_1:
3279 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
3280 		break;
3281 
3282 	case GAUDI_QUEUE_ID_TPC_3_2:
3283 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
3284 		break;
3285 
3286 	case GAUDI_QUEUE_ID_TPC_3_3:
3287 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
3288 		break;
3289 
3290 	case GAUDI_QUEUE_ID_TPC_4_0:
3291 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
3292 		break;
3293 
3294 	case GAUDI_QUEUE_ID_TPC_4_1:
3295 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
3296 		break;
3297 
3298 	case GAUDI_QUEUE_ID_TPC_4_2:
3299 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
3300 		break;
3301 
3302 	case GAUDI_QUEUE_ID_TPC_4_3:
3303 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
3304 		break;
3305 
3306 	case GAUDI_QUEUE_ID_TPC_5_0:
3307 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
3308 		break;
3309 
3310 	case GAUDI_QUEUE_ID_TPC_5_1:
3311 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
3312 		break;
3313 
3314 	case GAUDI_QUEUE_ID_TPC_5_2:
3315 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
3316 		break;
3317 
3318 	case GAUDI_QUEUE_ID_TPC_5_3:
3319 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
3320 		break;
3321 
3322 	case GAUDI_QUEUE_ID_TPC_6_0:
3323 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
3324 		break;
3325 
3326 	case GAUDI_QUEUE_ID_TPC_6_1:
3327 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
3328 		break;
3329 
3330 	case GAUDI_QUEUE_ID_TPC_6_2:
3331 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
3332 		break;
3333 
3334 	case GAUDI_QUEUE_ID_TPC_6_3:
3335 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
3336 		break;
3337 
3338 	case GAUDI_QUEUE_ID_TPC_7_0:
3339 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
3340 		break;
3341 
3342 	case GAUDI_QUEUE_ID_TPC_7_1:
3343 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
3344 		break;
3345 
3346 	case GAUDI_QUEUE_ID_TPC_7_2:
3347 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
3348 		break;
3349 
3350 	case GAUDI_QUEUE_ID_TPC_7_3:
3351 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
3352 		break;
3353 
3354 	default:
3355 		invalid_queue = true;
3356 	}
3357 
3358 	if (invalid_queue) {
3359 		/* Should never get here */
3360 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3361 			hw_queue_id);
3362 		return;
3363 	}
3364 
3365 	db_value = pi;
3366 
3367 	/* ring the doorbell */
3368 	WREG32(db_reg_offset, db_value);
3369 
3370 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3371 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3372 				GAUDI_EVENT_PI_UPDATE);
3373 }
3374 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)3375 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3376 				struct hl_bd *bd)
3377 {
3378 	__le64 *pbd = (__le64 *) bd;
3379 
3380 	/* The QMANs are on the host memory so a simple copy suffice */
3381 	pqe[0] = pbd[0];
3382 	pqe[1] = pbd[1];
3383 }
3384 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)3385 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3386 					dma_addr_t *dma_handle, gfp_t flags)
3387 {
3388 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3389 						dma_handle, flags);
3390 
3391 	/* Shift to the device's base physical address of host memory */
3392 	if (kernel_addr)
3393 		*dma_handle += HOST_PHYS_BASE;
3394 
3395 	return kernel_addr;
3396 }
3397 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)3398 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3399 		void *cpu_addr, dma_addr_t dma_handle)
3400 {
3401 	/* Cancel the device's base physical address of host memory */
3402 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3403 
3404 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3405 }
3406 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)3407 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3408 				u32 queue_id, dma_addr_t *dma_handle,
3409 				u16 *queue_len)
3410 {
3411 	struct gaudi_device *gaudi = hdev->asic_specific;
3412 	struct gaudi_internal_qman_info *q;
3413 
3414 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3415 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3416 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3417 		return NULL;
3418 	}
3419 
3420 	q = &gaudi->internal_qmans[queue_id];
3421 	*dma_handle = q->pq_dma_addr;
3422 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3423 
3424 	return q->pq_kernel_addr;
3425 }
3426 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,long * result)3427 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3428 				u16 len, u32 timeout, long *result)
3429 {
3430 	struct gaudi_device *gaudi = hdev->asic_specific;
3431 
3432 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3433 		if (result)
3434 			*result = 0;
3435 		return 0;
3436 	}
3437 
3438 	if (!timeout)
3439 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3440 
3441 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3442 						timeout, result);
3443 }
3444 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)3445 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3446 {
3447 	struct packet_msg_prot *fence_pkt;
3448 	dma_addr_t pkt_dma_addr;
3449 	u32 fence_val, tmp, timeout_usec;
3450 	dma_addr_t fence_dma_addr;
3451 	u32 *fence_ptr;
3452 	int rc;
3453 
3454 	if (hdev->pldm)
3455 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3456 	else
3457 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3458 
3459 	fence_val = GAUDI_QMAN0_FENCE_VAL;
3460 
3461 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3462 							&fence_dma_addr);
3463 	if (!fence_ptr) {
3464 		dev_err(hdev->dev,
3465 			"Failed to allocate memory for H/W queue %d testing\n",
3466 			hw_queue_id);
3467 		return -ENOMEM;
3468 	}
3469 
3470 	*fence_ptr = 0;
3471 
3472 	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3473 					sizeof(struct packet_msg_prot),
3474 					GFP_KERNEL, &pkt_dma_addr);
3475 	if (!fence_pkt) {
3476 		dev_err(hdev->dev,
3477 			"Failed to allocate packet for H/W queue %d testing\n",
3478 			hw_queue_id);
3479 		rc = -ENOMEM;
3480 		goto free_fence_ptr;
3481 	}
3482 
3483 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3484 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3485 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3486 
3487 	fence_pkt->ctl = cpu_to_le32(tmp);
3488 	fence_pkt->value = cpu_to_le32(fence_val);
3489 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3490 
3491 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3492 					sizeof(struct packet_msg_prot),
3493 					pkt_dma_addr);
3494 	if (rc) {
3495 		dev_err(hdev->dev,
3496 			"Failed to send fence packet to H/W queue %d\n",
3497 			hw_queue_id);
3498 		goto free_pkt;
3499 	}
3500 
3501 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3502 					1000, timeout_usec, true);
3503 
3504 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3505 
3506 	if (rc == -ETIMEDOUT) {
3507 		dev_err(hdev->dev,
3508 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3509 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3510 		rc = -EIO;
3511 	}
3512 
3513 free_pkt:
3514 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3515 					pkt_dma_addr);
3516 free_fence_ptr:
3517 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3518 					fence_dma_addr);
3519 	return rc;
3520 }
3521 
gaudi_test_cpu_queue(struct hl_device * hdev)3522 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3523 {
3524 	struct gaudi_device *gaudi = hdev->asic_specific;
3525 
3526 	/*
3527 	 * check capability here as send_cpu_message() won't update the result
3528 	 * value if no capability
3529 	 */
3530 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3531 		return 0;
3532 
3533 	return hl_fw_test_cpu_queue(hdev);
3534 }
3535 
gaudi_test_queues(struct hl_device * hdev)3536 static int gaudi_test_queues(struct hl_device *hdev)
3537 {
3538 	int i, rc, ret_val = 0;
3539 
3540 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3541 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3542 			rc = gaudi_test_queue(hdev, i);
3543 			if (rc)
3544 				ret_val = -EINVAL;
3545 		}
3546 	}
3547 
3548 	rc = gaudi_test_cpu_queue(hdev);
3549 	if (rc)
3550 		ret_val = -EINVAL;
3551 
3552 	return ret_val;
3553 }
3554 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)3555 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3556 		gfp_t mem_flags, dma_addr_t *dma_handle)
3557 {
3558 	void *kernel_addr;
3559 
3560 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
3561 		return NULL;
3562 
3563 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3564 
3565 	/* Shift to the device's base physical address of host memory */
3566 	if (kernel_addr)
3567 		*dma_handle += HOST_PHYS_BASE;
3568 
3569 	return kernel_addr;
3570 }
3571 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)3572 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3573 			dma_addr_t dma_addr)
3574 {
3575 	/* Cancel the device's base physical address of host memory */
3576 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3577 
3578 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3579 }
3580 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)3581 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3582 					size_t size, dma_addr_t *dma_handle)
3583 {
3584 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3585 }
3586 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)3587 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3588 						size_t size, void *vaddr)
3589 {
3590 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3591 }
3592 
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3593 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3594 			int nents, enum dma_data_direction dir)
3595 {
3596 	struct scatterlist *sg;
3597 	int i;
3598 
3599 	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3600 		return -ENOMEM;
3601 
3602 	/* Shift to the device's base physical address of host memory */
3603 	for_each_sg(sgl, sg, nents, i)
3604 		sg->dma_address += HOST_PHYS_BASE;
3605 
3606 	return 0;
3607 }
3608 
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3609 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3610 			int nents, enum dma_data_direction dir)
3611 {
3612 	struct scatterlist *sg;
3613 	int i;
3614 
3615 	/* Cancel the device's base physical address of host memory */
3616 	for_each_sg(sgl, sg, nents, i)
3617 		sg->dma_address -= HOST_PHYS_BASE;
3618 
3619 	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3620 }
3621 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)3622 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3623 					struct sg_table *sgt)
3624 {
3625 	struct scatterlist *sg, *sg_next_iter;
3626 	u32 count, dma_desc_cnt;
3627 	u64 len, len_next;
3628 	dma_addr_t addr, addr_next;
3629 
3630 	dma_desc_cnt = 0;
3631 
3632 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3633 
3634 		len = sg_dma_len(sg);
3635 		addr = sg_dma_address(sg);
3636 
3637 		if (len == 0)
3638 			break;
3639 
3640 		while ((count + 1) < sgt->nents) {
3641 			sg_next_iter = sg_next(sg);
3642 			len_next = sg_dma_len(sg_next_iter);
3643 			addr_next = sg_dma_address(sg_next_iter);
3644 
3645 			if (len_next == 0)
3646 				break;
3647 
3648 			if ((addr + len == addr_next) &&
3649 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3650 				len += len_next;
3651 				count++;
3652 				sg = sg_next_iter;
3653 			} else {
3654 				break;
3655 			}
3656 		}
3657 
3658 		dma_desc_cnt++;
3659 	}
3660 
3661 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
3662 }
3663 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)3664 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3665 				struct hl_cs_parser *parser,
3666 				struct packet_lin_dma *user_dma_pkt,
3667 				u64 addr, enum dma_data_direction dir)
3668 {
3669 	struct hl_userptr *userptr;
3670 	int rc;
3671 
3672 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3673 			parser->job_userptr_list, &userptr))
3674 		goto already_pinned;
3675 
3676 	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3677 	if (!userptr)
3678 		return -ENOMEM;
3679 
3680 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3681 				userptr);
3682 	if (rc)
3683 		goto free_userptr;
3684 
3685 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
3686 
3687 	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3688 					userptr->sgt->nents, dir);
3689 	if (rc) {
3690 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3691 		goto unpin_memory;
3692 	}
3693 
3694 	userptr->dma_mapped = true;
3695 	userptr->dir = dir;
3696 
3697 already_pinned:
3698 	parser->patched_cb_size +=
3699 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3700 
3701 	return 0;
3702 
3703 unpin_memory:
3704 	hl_unpin_host_memory(hdev, userptr);
3705 free_userptr:
3706 	kfree(userptr);
3707 	return rc;
3708 }
3709 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)3710 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3711 				struct hl_cs_parser *parser,
3712 				struct packet_lin_dma *user_dma_pkt,
3713 				bool src_in_host)
3714 {
3715 	enum dma_data_direction dir;
3716 	bool skip_host_mem_pin = false, user_memset;
3717 	u64 addr;
3718 	int rc = 0;
3719 
3720 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3721 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3722 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3723 
3724 	if (src_in_host) {
3725 		if (user_memset)
3726 			skip_host_mem_pin = true;
3727 
3728 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3729 		dir = DMA_TO_DEVICE;
3730 		addr = le64_to_cpu(user_dma_pkt->src_addr);
3731 	} else {
3732 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3733 		dir = DMA_FROM_DEVICE;
3734 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3735 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3736 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3737 	}
3738 
3739 	if (skip_host_mem_pin)
3740 		parser->patched_cb_size += sizeof(*user_dma_pkt);
3741 	else
3742 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3743 						addr, dir);
3744 
3745 	return rc;
3746 }
3747 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3748 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3749 				struct hl_cs_parser *parser,
3750 				struct packet_lin_dma *user_dma_pkt)
3751 {
3752 	bool src_in_host = false;
3753 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3754 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3755 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3756 
3757 	dev_dbg(hdev->dev, "DMA packet details:\n");
3758 	dev_dbg(hdev->dev, "source == 0x%llx\n",
3759 				le64_to_cpu(user_dma_pkt->src_addr));
3760 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3761 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3762 
3763 	/*
3764 	 * Special handling for DMA with size 0. Bypass all validations
3765 	 * because no transactions will be done except for WR_COMP, which
3766 	 * is not a security issue
3767 	 */
3768 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
3769 		parser->patched_cb_size += sizeof(*user_dma_pkt);
3770 		return 0;
3771 	}
3772 
3773 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3774 		src_in_host = true;
3775 
3776 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3777 						src_in_host);
3778 }
3779 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)3780 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3781 					struct hl_cs_parser *parser,
3782 					struct packet_load_and_exe *user_pkt)
3783 {
3784 	u32 cfg;
3785 
3786 	cfg = le32_to_cpu(user_pkt->cfg);
3787 
3788 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3789 		dev_err(hdev->dev,
3790 			"User not allowed to use Load and Execute\n");
3791 		return -EPERM;
3792 	}
3793 
3794 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3795 
3796 	return 0;
3797 }
3798 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)3799 static int gaudi_validate_cb(struct hl_device *hdev,
3800 			struct hl_cs_parser *parser, bool is_mmu)
3801 {
3802 	u32 cb_parsed_length = 0;
3803 	int rc = 0;
3804 
3805 	parser->patched_cb_size = 0;
3806 
3807 	/* cb_user_size is more than 0 so loop will always be executed */
3808 	while (cb_parsed_length < parser->user_cb_size) {
3809 		enum packet_id pkt_id;
3810 		u16 pkt_size;
3811 		struct gaudi_packet *user_pkt;
3812 
3813 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3814 
3815 		pkt_id = (enum packet_id) (
3816 				(le64_to_cpu(user_pkt->header) &
3817 				PACKET_HEADER_PACKET_ID_MASK) >>
3818 					PACKET_HEADER_PACKET_ID_SHIFT);
3819 
3820 		if (!validate_packet_id(pkt_id)) {
3821 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3822 			rc = -EINVAL;
3823 			break;
3824 		}
3825 
3826 		pkt_size = gaudi_packet_sizes[pkt_id];
3827 		cb_parsed_length += pkt_size;
3828 		if (cb_parsed_length > parser->user_cb_size) {
3829 			dev_err(hdev->dev,
3830 				"packet 0x%x is out of CB boundary\n", pkt_id);
3831 			rc = -EINVAL;
3832 			break;
3833 		}
3834 
3835 		switch (pkt_id) {
3836 		case PACKET_MSG_PROT:
3837 			dev_err(hdev->dev,
3838 				"User not allowed to use MSG_PROT\n");
3839 			rc = -EPERM;
3840 			break;
3841 
3842 		case PACKET_CP_DMA:
3843 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3844 			rc = -EPERM;
3845 			break;
3846 
3847 		case PACKET_STOP:
3848 			dev_err(hdev->dev, "User not allowed to use STOP\n");
3849 			rc = -EPERM;
3850 			break;
3851 
3852 		case PACKET_WREG_BULK:
3853 			dev_err(hdev->dev,
3854 				"User not allowed to use WREG_BULK\n");
3855 			rc = -EPERM;
3856 			break;
3857 
3858 		case PACKET_LOAD_AND_EXE:
3859 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3860 				(struct packet_load_and_exe *) user_pkt);
3861 			break;
3862 
3863 		case PACKET_LIN_DMA:
3864 			parser->contains_dma_pkt = true;
3865 			if (is_mmu)
3866 				parser->patched_cb_size += pkt_size;
3867 			else
3868 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3869 					(struct packet_lin_dma *) user_pkt);
3870 			break;
3871 
3872 		case PACKET_WREG_32:
3873 		case PACKET_MSG_LONG:
3874 		case PACKET_MSG_SHORT:
3875 		case PACKET_REPEAT:
3876 		case PACKET_FENCE:
3877 		case PACKET_NOP:
3878 		case PACKET_ARB_POINT:
3879 			parser->patched_cb_size += pkt_size;
3880 			break;
3881 
3882 		default:
3883 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3884 				pkt_id);
3885 			rc = -EINVAL;
3886 			break;
3887 		}
3888 
3889 		if (rc)
3890 			break;
3891 	}
3892 
3893 	/*
3894 	 * The new CB should have space at the end for two MSG_PROT packets:
3895 	 * 1. A packet that will act as a completion packet
3896 	 * 2. A packet that will generate MSI-X interrupt
3897 	 */
3898 	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3899 
3900 	return rc;
3901 }
3902 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)3903 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3904 				struct hl_cs_parser *parser,
3905 				struct packet_lin_dma *user_dma_pkt,
3906 				struct packet_lin_dma *new_dma_pkt,
3907 				u32 *new_dma_pkt_size)
3908 {
3909 	struct hl_userptr *userptr;
3910 	struct scatterlist *sg, *sg_next_iter;
3911 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3912 	u64 len, len_next;
3913 	dma_addr_t dma_addr, dma_addr_next;
3914 	u64 device_memory_addr, addr;
3915 	enum dma_data_direction dir;
3916 	struct sg_table *sgt;
3917 	bool src_in_host = false;
3918 	bool skip_host_mem_pin = false;
3919 	bool user_memset;
3920 
3921 	ctl = le32_to_cpu(user_dma_pkt->ctl);
3922 
3923 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3924 		src_in_host = true;
3925 
3926 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3927 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3928 
3929 	if (src_in_host) {
3930 		addr = le64_to_cpu(user_dma_pkt->src_addr);
3931 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3932 		dir = DMA_TO_DEVICE;
3933 		if (user_memset)
3934 			skip_host_mem_pin = true;
3935 	} else {
3936 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3937 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3938 		dir = DMA_FROM_DEVICE;
3939 	}
3940 
3941 	if ((!skip_host_mem_pin) &&
3942 		(!hl_userptr_is_pinned(hdev, addr,
3943 					le32_to_cpu(user_dma_pkt->tsize),
3944 					parser->job_userptr_list, &userptr))) {
3945 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3946 				addr, user_dma_pkt->tsize);
3947 		return -EFAULT;
3948 	}
3949 
3950 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3951 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3952 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
3953 		return 0;
3954 	}
3955 
3956 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3957 
3958 	sgt = userptr->sgt;
3959 	dma_desc_cnt = 0;
3960 
3961 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3962 		len = sg_dma_len(sg);
3963 		dma_addr = sg_dma_address(sg);
3964 
3965 		if (len == 0)
3966 			break;
3967 
3968 		while ((count + 1) < sgt->nents) {
3969 			sg_next_iter = sg_next(sg);
3970 			len_next = sg_dma_len(sg_next_iter);
3971 			dma_addr_next = sg_dma_address(sg_next_iter);
3972 
3973 			if (len_next == 0)
3974 				break;
3975 
3976 			if ((dma_addr + len == dma_addr_next) &&
3977 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3978 				len += len_next;
3979 				count++;
3980 				sg = sg_next_iter;
3981 			} else {
3982 				break;
3983 			}
3984 		}
3985 
3986 		ctl = le32_to_cpu(user_dma_pkt->ctl);
3987 		if (likely(dma_desc_cnt))
3988 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3989 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3990 		new_dma_pkt->ctl = cpu_to_le32(ctl);
3991 		new_dma_pkt->tsize = cpu_to_le32(len);
3992 
3993 		if (dir == DMA_TO_DEVICE) {
3994 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3995 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3996 		} else {
3997 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3998 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3999 		}
4000 
4001 		if (!user_memset)
4002 			device_memory_addr += len;
4003 		dma_desc_cnt++;
4004 		new_dma_pkt++;
4005 	}
4006 
4007 	if (!dma_desc_cnt) {
4008 		dev_err(hdev->dev,
4009 			"Error of 0 SG entries when patching DMA packet\n");
4010 		return -EFAULT;
4011 	}
4012 
4013 	/* Fix the last dma packet - wrcomp must be as user set it */
4014 	new_dma_pkt--;
4015 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4016 
4017 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4018 
4019 	return 0;
4020 }
4021 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)4022 static int gaudi_patch_cb(struct hl_device *hdev,
4023 				struct hl_cs_parser *parser)
4024 {
4025 	u32 cb_parsed_length = 0;
4026 	u32 cb_patched_cur_length = 0;
4027 	int rc = 0;
4028 
4029 	/* cb_user_size is more than 0 so loop will always be executed */
4030 	while (cb_parsed_length < parser->user_cb_size) {
4031 		enum packet_id pkt_id;
4032 		u16 pkt_size;
4033 		u32 new_pkt_size = 0;
4034 		struct gaudi_packet *user_pkt, *kernel_pkt;
4035 
4036 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4037 		kernel_pkt = parser->patched_cb->kernel_address +
4038 					cb_patched_cur_length;
4039 
4040 		pkt_id = (enum packet_id) (
4041 				(le64_to_cpu(user_pkt->header) &
4042 				PACKET_HEADER_PACKET_ID_MASK) >>
4043 					PACKET_HEADER_PACKET_ID_SHIFT);
4044 
4045 		if (!validate_packet_id(pkt_id)) {
4046 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4047 			rc = -EINVAL;
4048 			break;
4049 		}
4050 
4051 		pkt_size = gaudi_packet_sizes[pkt_id];
4052 		cb_parsed_length += pkt_size;
4053 		if (cb_parsed_length > parser->user_cb_size) {
4054 			dev_err(hdev->dev,
4055 				"packet 0x%x is out of CB boundary\n", pkt_id);
4056 			rc = -EINVAL;
4057 			break;
4058 		}
4059 
4060 		switch (pkt_id) {
4061 		case PACKET_LIN_DMA:
4062 			rc = gaudi_patch_dma_packet(hdev, parser,
4063 					(struct packet_lin_dma *) user_pkt,
4064 					(struct packet_lin_dma *) kernel_pkt,
4065 					&new_pkt_size);
4066 			cb_patched_cur_length += new_pkt_size;
4067 			break;
4068 
4069 		case PACKET_MSG_PROT:
4070 			dev_err(hdev->dev,
4071 				"User not allowed to use MSG_PROT\n");
4072 			rc = -EPERM;
4073 			break;
4074 
4075 		case PACKET_CP_DMA:
4076 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4077 			rc = -EPERM;
4078 			break;
4079 
4080 		case PACKET_STOP:
4081 			dev_err(hdev->dev, "User not allowed to use STOP\n");
4082 			rc = -EPERM;
4083 			break;
4084 
4085 		case PACKET_WREG_32:
4086 		case PACKET_WREG_BULK:
4087 		case PACKET_MSG_LONG:
4088 		case PACKET_MSG_SHORT:
4089 		case PACKET_REPEAT:
4090 		case PACKET_FENCE:
4091 		case PACKET_NOP:
4092 		case PACKET_ARB_POINT:
4093 		case PACKET_LOAD_AND_EXE:
4094 			memcpy(kernel_pkt, user_pkt, pkt_size);
4095 			cb_patched_cur_length += pkt_size;
4096 			break;
4097 
4098 		default:
4099 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4100 				pkt_id);
4101 			rc = -EINVAL;
4102 			break;
4103 		}
4104 
4105 		if (rc)
4106 			break;
4107 	}
4108 
4109 	return rc;
4110 }
4111 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)4112 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4113 		struct hl_cs_parser *parser)
4114 {
4115 	u64 patched_cb_handle;
4116 	u32 patched_cb_size;
4117 	struct hl_cb *user_cb;
4118 	int rc;
4119 
4120 	/*
4121 	 * The new CB should have space at the end for two MSG_PROT pkt:
4122 	 * 1. A packet that will act as a completion packet
4123 	 * 2. A packet that will generate MSI interrupt
4124 	 */
4125 	parser->patched_cb_size = parser->user_cb_size +
4126 			sizeof(struct packet_msg_prot) * 2;
4127 
4128 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4129 				parser->patched_cb_size, false, false,
4130 				&patched_cb_handle);
4131 
4132 	if (rc) {
4133 		dev_err(hdev->dev,
4134 			"Failed to allocate patched CB for DMA CS %d\n",
4135 			rc);
4136 		return rc;
4137 	}
4138 
4139 	patched_cb_handle >>= PAGE_SHIFT;
4140 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4141 				(u32) patched_cb_handle);
4142 	/* hl_cb_get should never fail here so use kernel WARN */
4143 	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4144 			(u32) patched_cb_handle);
4145 	if (!parser->patched_cb) {
4146 		rc = -EFAULT;
4147 		goto out;
4148 	}
4149 
4150 	/*
4151 	 * The check that parser->user_cb_size <= parser->user_cb->size was done
4152 	 * in validate_queue_index().
4153 	 */
4154 	memcpy(parser->patched_cb->kernel_address,
4155 		parser->user_cb->kernel_address,
4156 		parser->user_cb_size);
4157 
4158 	patched_cb_size = parser->patched_cb_size;
4159 
4160 	/* Validate patched CB instead of user CB */
4161 	user_cb = parser->user_cb;
4162 	parser->user_cb = parser->patched_cb;
4163 	rc = gaudi_validate_cb(hdev, parser, true);
4164 	parser->user_cb = user_cb;
4165 
4166 	if (rc) {
4167 		hl_cb_put(parser->patched_cb);
4168 		goto out;
4169 	}
4170 
4171 	if (patched_cb_size != parser->patched_cb_size) {
4172 		dev_err(hdev->dev, "user CB size mismatch\n");
4173 		hl_cb_put(parser->patched_cb);
4174 		rc = -EINVAL;
4175 		goto out;
4176 	}
4177 
4178 out:
4179 	/*
4180 	 * Always call cb destroy here because we still have 1 reference
4181 	 * to it by calling cb_get earlier. After the job will be completed,
4182 	 * cb_put will release it, but here we want to remove it from the
4183 	 * idr
4184 	 */
4185 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4186 					patched_cb_handle << PAGE_SHIFT);
4187 
4188 	return rc;
4189 }
4190 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)4191 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4192 		struct hl_cs_parser *parser)
4193 {
4194 	u64 patched_cb_handle;
4195 	int rc;
4196 
4197 	rc = gaudi_validate_cb(hdev, parser, false);
4198 
4199 	if (rc)
4200 		goto free_userptr;
4201 
4202 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4203 				parser->patched_cb_size, false, false,
4204 				&patched_cb_handle);
4205 	if (rc) {
4206 		dev_err(hdev->dev,
4207 			"Failed to allocate patched CB for DMA CS %d\n", rc);
4208 		goto free_userptr;
4209 	}
4210 
4211 	patched_cb_handle >>= PAGE_SHIFT;
4212 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4213 				(u32) patched_cb_handle);
4214 	/* hl_cb_get should never fail here so use kernel WARN */
4215 	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4216 			(u32) patched_cb_handle);
4217 	if (!parser->patched_cb) {
4218 		rc = -EFAULT;
4219 		goto out;
4220 	}
4221 
4222 	rc = gaudi_patch_cb(hdev, parser);
4223 
4224 	if (rc)
4225 		hl_cb_put(parser->patched_cb);
4226 
4227 out:
4228 	/*
4229 	 * Always call cb destroy here because we still have 1 reference
4230 	 * to it by calling cb_get earlier. After the job will be completed,
4231 	 * cb_put will release it, but here we want to remove it from the
4232 	 * idr
4233 	 */
4234 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4235 				patched_cb_handle << PAGE_SHIFT);
4236 
4237 free_userptr:
4238 	if (rc)
4239 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
4240 	return rc;
4241 }
4242 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)4243 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4244 					struct hl_cs_parser *parser)
4245 {
4246 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4247 
4248 	/* For internal queue jobs just check if CB address is valid */
4249 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4250 					parser->user_cb_size,
4251 					asic_prop->sram_user_base_address,
4252 					asic_prop->sram_end_address))
4253 		return 0;
4254 
4255 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4256 					parser->user_cb_size,
4257 					asic_prop->dram_user_base_address,
4258 					asic_prop->dram_end_address))
4259 		return 0;
4260 
4261 	/* PMMU and HPMMU addresses are equal, check only one of them */
4262 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4263 					parser->user_cb_size,
4264 					asic_prop->pmmu.start_addr,
4265 					asic_prop->pmmu.end_addr))
4266 		return 0;
4267 
4268 	dev_err(hdev->dev,
4269 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4270 		parser->user_cb, parser->user_cb_size);
4271 
4272 	return -EFAULT;
4273 }
4274 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)4275 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4276 {
4277 	struct gaudi_device *gaudi = hdev->asic_specific;
4278 
4279 	if (parser->queue_type == QUEUE_TYPE_INT)
4280 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
4281 
4282 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4283 		return gaudi_parse_cb_mmu(hdev, parser);
4284 	else
4285 		return gaudi_parse_cb_no_mmu(hdev, parser);
4286 }
4287 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)4288 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4289 					void *kernel_address, u32 len,
4290 					u64 cq_addr, u32 cq_val, u32 msi_vec,
4291 					bool eb)
4292 {
4293 	struct gaudi_device *gaudi = hdev->asic_specific;
4294 	struct packet_msg_prot *cq_pkt;
4295 	u32 tmp;
4296 
4297 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4298 
4299 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4300 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4301 
4302 	if (eb)
4303 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4304 
4305 	cq_pkt->ctl = cpu_to_le32(tmp);
4306 	cq_pkt->value = cpu_to_le32(cq_val);
4307 	cq_pkt->addr = cpu_to_le64(cq_addr);
4308 
4309 	cq_pkt++;
4310 
4311 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4312 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4313 	cq_pkt->ctl = cpu_to_le32(tmp);
4314 	cq_pkt->value = cpu_to_le32(1);
4315 
4316 	if (!gaudi->multi_msi_mode)
4317 		msi_vec = 0;
4318 
4319 	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4320 }
4321 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)4322 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4323 {
4324 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4325 }
4326 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)4327 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4328 					u32 size, u64 val)
4329 {
4330 	struct packet_lin_dma *lin_dma_pkt;
4331 	struct hl_cs_job *job;
4332 	u32 cb_size, ctl, err_cause;
4333 	struct hl_cb *cb;
4334 	int rc;
4335 
4336 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4337 	if (!cb)
4338 		return -EFAULT;
4339 
4340 	lin_dma_pkt = cb->kernel_address;
4341 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4342 	cb_size = sizeof(*lin_dma_pkt);
4343 
4344 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4345 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4346 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4347 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4348 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4349 
4350 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
4351 	lin_dma_pkt->src_addr = cpu_to_le64(val);
4352 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4353 	lin_dma_pkt->tsize = cpu_to_le32(size);
4354 
4355 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4356 	if (!job) {
4357 		dev_err(hdev->dev, "Failed to allocate a new job\n");
4358 		rc = -ENOMEM;
4359 		goto release_cb;
4360 	}
4361 
4362 	/* Verify DMA is OK */
4363 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4364 	if (err_cause && !hdev->init_done) {
4365 		dev_dbg(hdev->dev,
4366 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
4367 			err_cause);
4368 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4369 	}
4370 
4371 	job->id = 0;
4372 	job->user_cb = cb;
4373 	job->user_cb->cs_cnt++;
4374 	job->user_cb_size = cb_size;
4375 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4376 	job->patched_cb = job->user_cb;
4377 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4378 
4379 	hl_debugfs_add_job(hdev, job);
4380 
4381 	rc = gaudi_send_job_on_qman0(hdev, job);
4382 	hl_debugfs_remove_job(hdev, job);
4383 	kfree(job);
4384 	cb->cs_cnt--;
4385 
4386 	/* Verify DMA is OK */
4387 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4388 	if (err_cause) {
4389 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4390 		rc = -EIO;
4391 		if (!hdev->init_done) {
4392 			dev_dbg(hdev->dev,
4393 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
4394 				err_cause);
4395 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4396 		}
4397 	}
4398 
4399 release_cb:
4400 	hl_cb_put(cb);
4401 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4402 
4403 	return rc;
4404 }
4405 
gaudi_restore_sm_registers(struct hl_device * hdev)4406 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4407 {
4408 	int i;
4409 
4410 	for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4411 		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4412 		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4413 		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4414 	}
4415 
4416 	for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4417 		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4418 		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4419 		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4420 	}
4421 
4422 	i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4423 
4424 	for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4425 		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4426 
4427 	i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4428 
4429 	for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4430 		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4431 }
4432 
gaudi_restore_dma_registers(struct hl_device * hdev)4433 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4434 {
4435 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4436 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4437 	int i;
4438 
4439 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4440 		u64 sob_addr = CFG_BASE +
4441 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4442 				(i * sob_delta);
4443 		u32 dma_offset = i * DMA_CORE_OFFSET;
4444 
4445 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4446 				lower_32_bits(sob_addr));
4447 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4448 				upper_32_bits(sob_addr));
4449 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4450 
4451 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4452 		 * modified by the user for SRAM reduction
4453 		 */
4454 		if (i > 1)
4455 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4456 								0x00000001);
4457 	}
4458 }
4459 
gaudi_restore_qm_registers(struct hl_device * hdev)4460 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4461 {
4462 	u32 qman_offset;
4463 	int i;
4464 
4465 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4466 		qman_offset = i * DMA_QMAN_OFFSET;
4467 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4468 	}
4469 
4470 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4471 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4472 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4473 	}
4474 
4475 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4476 		qman_offset = i * TPC_QMAN_OFFSET;
4477 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4478 	}
4479 }
4480 
gaudi_restore_user_registers(struct hl_device * hdev)4481 static void gaudi_restore_user_registers(struct hl_device *hdev)
4482 {
4483 	gaudi_restore_sm_registers(hdev);
4484 	gaudi_restore_dma_registers(hdev);
4485 	gaudi_restore_qm_registers(hdev);
4486 }
4487 
gaudi_context_switch(struct hl_device * hdev,u32 asid)4488 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4489 {
4490 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4491 	u64 addr = prop->sram_user_base_address;
4492 	u32 size = hdev->pldm ? 0x10000 :
4493 			(prop->sram_size - SRAM_USER_BASE_OFFSET);
4494 	u64 val = 0x7777777777777777ull;
4495 	int rc;
4496 
4497 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4498 	if (rc) {
4499 		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4500 		return rc;
4501 	}
4502 
4503 	gaudi_mmu_prepare(hdev, asid);
4504 
4505 	gaudi_restore_user_registers(hdev);
4506 
4507 	return 0;
4508 }
4509 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)4510 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4511 {
4512 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4513 	struct gaudi_device *gaudi = hdev->asic_specific;
4514 	u64 addr = prop->mmu_pgt_addr;
4515 	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4516 
4517 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4518 		return 0;
4519 
4520 	return gaudi_memset_device_memory(hdev, addr, size, 0);
4521 }
4522 
gaudi_restore_phase_topology(struct hl_device * hdev)4523 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4524 {
4525 
4526 }
4527 
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,u32 * val)4528 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4529 {
4530 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4531 	struct gaudi_device *gaudi = hdev->asic_specific;
4532 	u64 hbm_bar_addr;
4533 	int rc = 0;
4534 
4535 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4536 
4537 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4538 				(hdev->clock_gating_mask &
4539 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4540 
4541 			dev_err_ratelimited(hdev->dev,
4542 				"Can't read register - clock gating is enabled!\n");
4543 			rc = -EFAULT;
4544 		} else {
4545 			*val = RREG32(addr - CFG_BASE);
4546 		}
4547 
4548 	} else if ((addr >= SRAM_BASE_ADDR) &&
4549 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4550 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4551 				(addr - SRAM_BASE_ADDR));
4552 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4553 		u64 bar_base_addr = DRAM_PHYS_BASE +
4554 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4555 
4556 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4557 		if (hbm_bar_addr != U64_MAX) {
4558 			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4559 						(addr - bar_base_addr));
4560 
4561 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4562 						hbm_bar_addr);
4563 		}
4564 		if (hbm_bar_addr == U64_MAX)
4565 			rc = -EIO;
4566 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4567 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4568 	} else {
4569 		rc = -EFAULT;
4570 	}
4571 
4572 	return rc;
4573 }
4574 
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,u32 val)4575 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4576 {
4577 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4578 	struct gaudi_device *gaudi = hdev->asic_specific;
4579 	u64 hbm_bar_addr;
4580 	int rc = 0;
4581 
4582 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4583 
4584 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4585 				(hdev->clock_gating_mask &
4586 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4587 
4588 			dev_err_ratelimited(hdev->dev,
4589 				"Can't write register - clock gating is enabled!\n");
4590 			rc = -EFAULT;
4591 		} else {
4592 			WREG32(addr - CFG_BASE, val);
4593 		}
4594 
4595 	} else if ((addr >= SRAM_BASE_ADDR) &&
4596 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4597 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4598 					(addr - SRAM_BASE_ADDR));
4599 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4600 		u64 bar_base_addr = DRAM_PHYS_BASE +
4601 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4602 
4603 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4604 		if (hbm_bar_addr != U64_MAX) {
4605 			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4606 						(addr - bar_base_addr));
4607 
4608 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4609 						hbm_bar_addr);
4610 		}
4611 		if (hbm_bar_addr == U64_MAX)
4612 			rc = -EIO;
4613 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4614 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4615 	} else {
4616 		rc = -EFAULT;
4617 	}
4618 
4619 	return rc;
4620 }
4621 
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,u64 * val)4622 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4623 {
4624 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4625 	struct gaudi_device *gaudi = hdev->asic_specific;
4626 	u64 hbm_bar_addr;
4627 	int rc = 0;
4628 
4629 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4630 
4631 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4632 				(hdev->clock_gating_mask &
4633 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4634 
4635 			dev_err_ratelimited(hdev->dev,
4636 				"Can't read register - clock gating is enabled!\n");
4637 			rc = -EFAULT;
4638 		} else {
4639 			u32 val_l = RREG32(addr - CFG_BASE);
4640 			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4641 
4642 			*val = (((u64) val_h) << 32) | val_l;
4643 		}
4644 
4645 	} else if ((addr >= SRAM_BASE_ADDR) &&
4646 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4647 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4648 				(addr - SRAM_BASE_ADDR));
4649 	} else if (addr <=
4650 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4651 		u64 bar_base_addr = DRAM_PHYS_BASE +
4652 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4653 
4654 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4655 		if (hbm_bar_addr != U64_MAX) {
4656 			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4657 						(addr - bar_base_addr));
4658 
4659 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4660 						hbm_bar_addr);
4661 		}
4662 		if (hbm_bar_addr == U64_MAX)
4663 			rc = -EIO;
4664 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4665 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4666 	} else {
4667 		rc = -EFAULT;
4668 	}
4669 
4670 	return rc;
4671 }
4672 
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,u64 val)4673 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4674 {
4675 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4676 	struct gaudi_device *gaudi = hdev->asic_specific;
4677 	u64 hbm_bar_addr;
4678 	int rc = 0;
4679 
4680 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4681 
4682 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4683 				(hdev->clock_gating_mask &
4684 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4685 
4686 			dev_err_ratelimited(hdev->dev,
4687 				"Can't write register - clock gating is enabled!\n");
4688 			rc = -EFAULT;
4689 		} else {
4690 			WREG32(addr - CFG_BASE, lower_32_bits(val));
4691 			WREG32(addr + sizeof(u32) - CFG_BASE,
4692 				upper_32_bits(val));
4693 		}
4694 
4695 	} else if ((addr >= SRAM_BASE_ADDR) &&
4696 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4697 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4698 					(addr - SRAM_BASE_ADDR));
4699 	} else if (addr <=
4700 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4701 		u64 bar_base_addr = DRAM_PHYS_BASE +
4702 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4703 
4704 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4705 		if (hbm_bar_addr != U64_MAX) {
4706 			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4707 						(addr - bar_base_addr));
4708 
4709 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4710 						hbm_bar_addr);
4711 		}
4712 		if (hbm_bar_addr == U64_MAX)
4713 			rc = -EIO;
4714 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4715 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4716 	} else {
4717 		rc = -EFAULT;
4718 	}
4719 
4720 	return rc;
4721 }
4722 
gaudi_read_pte(struct hl_device * hdev,u64 addr)4723 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4724 {
4725 	struct gaudi_device *gaudi = hdev->asic_specific;
4726 
4727 	if (hdev->hard_reset_pending)
4728 		return U64_MAX;
4729 
4730 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
4731 			(addr - gaudi->hbm_bar_cur_addr));
4732 }
4733 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)4734 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4735 {
4736 	struct gaudi_device *gaudi = hdev->asic_specific;
4737 
4738 	if (hdev->hard_reset_pending)
4739 		return;
4740 
4741 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4742 			(addr - gaudi->hbm_bar_cur_addr));
4743 }
4744 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)4745 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4746 {
4747 	/* mask to zero the MMBP and ASID bits */
4748 	WREG32_AND(reg, ~0x7FF);
4749 	WREG32_OR(reg, asid);
4750 }
4751 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)4752 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4753 {
4754 	struct gaudi_device *gaudi = hdev->asic_specific;
4755 
4756 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4757 		return;
4758 
4759 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4760 		WARN(1, "asid %u is too big\n", asid);
4761 		return;
4762 	}
4763 
4764 	mutex_lock(&gaudi->clk_gate_mutex);
4765 
4766 	hdev->asic_funcs->disable_clock_gating(hdev);
4767 
4768 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4769 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4770 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4771 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4772 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4773 
4774 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4775 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4776 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4777 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4778 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4779 
4780 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4781 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4782 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4783 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4784 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4785 
4786 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4787 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4788 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4789 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4790 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4791 
4792 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4793 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4794 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4795 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4796 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4797 
4798 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4799 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4800 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4801 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4802 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4803 
4804 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4805 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4806 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4807 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4808 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4809 
4810 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4811 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4812 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4813 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4814 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4815 
4816 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4817 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4818 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4819 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4820 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4821 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4822 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4823 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4824 
4825 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4826 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4827 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4828 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4829 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4830 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4831 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4832 
4833 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4839 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4840 
4841 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4847 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4848 
4849 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4855 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4856 
4857 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4863 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4864 
4865 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4871 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4872 
4873 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4879 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4880 
4881 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4887 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4888 
4889 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4895 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4896 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4897 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4898 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4899 
4900 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4901 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4902 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4903 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4904 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4905 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4906 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4907 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4908 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4909 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4910 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4911 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4912 
4913 	hdev->asic_funcs->set_clock_gating(hdev);
4914 
4915 	mutex_unlock(&gaudi->clk_gate_mutex);
4916 }
4917 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)4918 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4919 		struct hl_cs_job *job)
4920 {
4921 	struct packet_msg_prot *fence_pkt;
4922 	u32 *fence_ptr;
4923 	dma_addr_t fence_dma_addr;
4924 	struct hl_cb *cb;
4925 	u32 tmp, timeout, dma_offset;
4926 	int rc;
4927 
4928 	if (hdev->pldm)
4929 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4930 	else
4931 		timeout = HL_DEVICE_TIMEOUT_USEC;
4932 
4933 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4934 		dev_err_ratelimited(hdev->dev,
4935 			"Can't send driver job on QMAN0 because the device is not idle\n");
4936 		return -EBUSY;
4937 	}
4938 
4939 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4940 							&fence_dma_addr);
4941 	if (!fence_ptr) {
4942 		dev_err(hdev->dev,
4943 			"Failed to allocate fence memory for QMAN0\n");
4944 		return -ENOMEM;
4945 	}
4946 
4947 	cb = job->patched_cb;
4948 
4949 	fence_pkt = cb->kernel_address +
4950 			job->job_cb_size - sizeof(struct packet_msg_prot);
4951 
4952 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4953 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4954 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4955 
4956 	fence_pkt->ctl = cpu_to_le32(tmp);
4957 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4958 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4959 
4960 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4961 
4962 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4963 
4964 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4965 					job->job_cb_size, cb->bus_address);
4966 	if (rc) {
4967 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4968 		goto free_fence_ptr;
4969 	}
4970 
4971 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4972 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4973 				timeout, true);
4974 
4975 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4976 
4977 	if (rc == -ETIMEDOUT) {
4978 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4979 		goto free_fence_ptr;
4980 	}
4981 
4982 free_fence_ptr:
4983 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4984 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4985 
4986 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4987 					fence_dma_addr);
4988 	return rc;
4989 }
4990 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)4991 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4992 {
4993 	if (event_type >= GAUDI_EVENT_SIZE)
4994 		goto event_not_supported;
4995 
4996 	if (!gaudi_irq_map_table[event_type].valid)
4997 		goto event_not_supported;
4998 
4999 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5000 
5001 	return;
5002 
5003 event_not_supported:
5004 	snprintf(desc, size, "N/A");
5005 }
5006 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)5007 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5008 							u32 x_y, bool is_write)
5009 {
5010 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5011 
5012 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5013 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5014 
5015 	switch (x_y) {
5016 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5017 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5018 		dma_id[0] = 0;
5019 		dma_id[1] = 2;
5020 		break;
5021 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5022 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5023 		dma_id[0] = 1;
5024 		dma_id[1] = 3;
5025 		break;
5026 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5027 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5028 		dma_id[0] = 4;
5029 		dma_id[1] = 6;
5030 		break;
5031 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5032 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5033 		dma_id[0] = 5;
5034 		dma_id[1] = 7;
5035 		break;
5036 	default:
5037 		goto unknown_initiator;
5038 	}
5039 
5040 	for (i = 0 ; i < 2 ; i++) {
5041 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5042 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5043 	}
5044 
5045 	switch (x_y) {
5046 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5047 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5048 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5049 			return "DMA0";
5050 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5051 			return "DMA2";
5052 		else
5053 			return "DMA0 or DMA2";
5054 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5055 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5056 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057 			return "DMA1";
5058 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059 			return "DMA3";
5060 		else
5061 			return "DMA1 or DMA3";
5062 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5063 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5064 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065 			return "DMA4";
5066 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067 			return "DMA6";
5068 		else
5069 			return "DMA4 or DMA6";
5070 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5071 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5072 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073 			return "DMA5";
5074 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075 			return "DMA7";
5076 		else
5077 			return "DMA5 or DMA7";
5078 	}
5079 
5080 unknown_initiator:
5081 	return "unknown initiator";
5082 }
5083 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)5084 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5085 							bool is_write)
5086 {
5087 	u32 val, x_y, axi_id;
5088 
5089 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5090 				RREG32(mmMMU_UP_RAZWI_READ_ID);
5091 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5092 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5093 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5094 			RAZWI_INITIATOR_AXI_ID_SHIFT);
5095 
5096 	switch (x_y) {
5097 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5098 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5099 			return "TPC0";
5100 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5101 			return "NIC0";
5102 		break;
5103 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
5104 		return "TPC1";
5105 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5106 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5107 		return "MME0";
5108 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5109 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5110 		return "MME1";
5111 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
5112 		return "TPC2";
5113 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5114 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5115 			return "TPC3";
5116 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5117 			return "PCI";
5118 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5119 			return "CPU";
5120 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5121 			return "PSOC";
5122 		break;
5123 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5124 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5125 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5126 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5127 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5128 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5129 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5130 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5131 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5132 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5133 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5134 			return "TPC4";
5135 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5136 			return "NIC1";
5137 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5138 			return "NIC2";
5139 		break;
5140 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
5141 		return "TPC5";
5142 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5143 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5144 		return "MME2";
5145 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5146 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5147 		return "MME3";
5148 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
5149 		return "TPC6";
5150 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5151 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5152 			return "TPC7";
5153 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5154 			return "NIC4";
5155 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5156 			return "NIC5";
5157 		break;
5158 	default:
5159 		break;
5160 	}
5161 
5162 	dev_err(hdev->dev,
5163 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5164 		val,
5165 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5166 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5167 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5168 			RAZWI_INITIATOR_AXI_ID_MASK);
5169 
5170 	return "unknown initiator";
5171 }
5172 
gaudi_print_razwi_info(struct hl_device * hdev)5173 static void gaudi_print_razwi_info(struct hl_device *hdev)
5174 {
5175 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5176 		dev_err_ratelimited(hdev->dev,
5177 			"RAZWI event caused by illegal write of %s\n",
5178 			gaudi_get_razwi_initiator_name(hdev, true));
5179 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5180 	}
5181 
5182 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5183 		dev_err_ratelimited(hdev->dev,
5184 			"RAZWI event caused by illegal read of %s\n",
5185 			gaudi_get_razwi_initiator_name(hdev, false));
5186 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5187 	}
5188 }
5189 
gaudi_print_mmu_error_info(struct hl_device * hdev)5190 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5191 {
5192 	struct gaudi_device *gaudi = hdev->asic_specific;
5193 	u64 addr;
5194 	u32 val;
5195 
5196 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5197 		return;
5198 
5199 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5200 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5201 		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5202 		addr <<= 32;
5203 		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5204 
5205 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5206 					addr);
5207 
5208 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5209 	}
5210 
5211 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5212 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5213 		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5214 		addr <<= 32;
5215 		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5216 
5217 		dev_err_ratelimited(hdev->dev,
5218 				"MMU access error on va 0x%llx\n", addr);
5219 
5220 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5221 	}
5222 }
5223 
5224 /*
5225  *  +-------------------+------------------------------------------------------+
5226  *  | Configuration Reg |                     Description                      |
5227  *  |      Address      |                                                      |
5228  *  +-------------------+------------------------------------------------------+
5229  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5230  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5231  *  |                   |0xF34 memory wrappers 63:32                           |
5232  *  |                   |0xF38 memory wrappers 95:64                           |
5233  *  |                   |0xF3C memory wrappers 127:96                          |
5234  *  +-------------------+------------------------------------------------------+
5235  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5236  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5237  *  |                   |0xF44 memory wrappers 63:32                           |
5238  *  |                   |0xF48 memory wrappers 95:64                           |
5239  *  |                   |0xF4C memory wrappers 127:96                          |
5240  *  +-------------------+------------------------------------------------------+
5241  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)5242 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5243 		struct ecc_info_extract_params *params, u64 *ecc_address,
5244 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5245 {
5246 	struct gaudi_device *gaudi = hdev->asic_specific;
5247 	u32 i, num_mem_regs, reg, err_bit;
5248 	u64 err_addr, err_word = 0;
5249 	int rc = 0;
5250 
5251 	num_mem_regs = params->num_memories / 32 +
5252 			((params->num_memories % 32) ? 1 : 0);
5253 
5254 	if (params->block_address >= CFG_BASE)
5255 		params->block_address -= CFG_BASE;
5256 
5257 	if (params->derr)
5258 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5259 	else
5260 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5261 
5262 	if (params->disable_clock_gating) {
5263 		mutex_lock(&gaudi->clk_gate_mutex);
5264 		hdev->asic_funcs->disable_clock_gating(hdev);
5265 	}
5266 
5267 	/* Set invalid wrapper index */
5268 	*memory_wrapper_idx = 0xFF;
5269 
5270 	/* Iterate through memory wrappers, a single bit must be set */
5271 	for (i = 0 ; i < num_mem_regs ; i++) {
5272 		err_addr += i * 4;
5273 		err_word = RREG32(err_addr);
5274 		if (err_word) {
5275 			err_bit = __ffs(err_word);
5276 			*memory_wrapper_idx = err_bit + (32 * i);
5277 			break;
5278 		}
5279 	}
5280 
5281 	if (*memory_wrapper_idx == 0xFF) {
5282 		dev_err(hdev->dev, "ECC error information cannot be found\n");
5283 		rc = -EINVAL;
5284 		goto enable_clk_gate;
5285 	}
5286 
5287 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5288 			*memory_wrapper_idx);
5289 
5290 	*ecc_address =
5291 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5292 	*ecc_syndrom =
5293 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5294 
5295 	/* Clear error indication */
5296 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5297 	if (params->derr)
5298 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5299 	else
5300 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5301 
5302 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5303 
5304 enable_clk_gate:
5305 	if (params->disable_clock_gating) {
5306 		hdev->asic_funcs->set_clock_gating(hdev);
5307 
5308 		mutex_unlock(&gaudi->clk_gate_mutex);
5309 	}
5310 
5311 	return rc;
5312 }
5313 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 glbl_sts_addr,u64 arb_err_addr)5314 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5315 					  const char *qm_name,
5316 					  u64 glbl_sts_addr,
5317 					  u64 arb_err_addr)
5318 {
5319 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5320 	char reg_desc[32];
5321 
5322 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
5323 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5324 		glbl_sts_clr_val = 0;
5325 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5326 
5327 		if (!glbl_sts_val)
5328 			continue;
5329 
5330 		if (i == QMAN_STREAMS)
5331 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5332 		else
5333 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5334 
5335 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5336 			if (glbl_sts_val & BIT(j)) {
5337 				dev_err_ratelimited(hdev->dev,
5338 						"%s %s. err cause: %s\n",
5339 						qm_name, reg_desc,
5340 						gaudi_qman_error_cause[j]);
5341 				glbl_sts_clr_val |= BIT(j);
5342 			}
5343 		}
5344 
5345 		/* Write 1 clear errors */
5346 		WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5347 	}
5348 
5349 	arb_err_val = RREG32(arb_err_addr);
5350 
5351 	if (!arb_err_val)
5352 		return;
5353 
5354 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5355 		if (arb_err_val & BIT(j)) {
5356 			dev_err_ratelimited(hdev->dev,
5357 					"%s ARB_ERR. err cause: %s\n",
5358 					qm_name,
5359 					gaudi_qman_arb_error_cause[j]);
5360 		}
5361 	}
5362 }
5363 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)5364 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5365 		struct hl_eq_ecc_data *ecc_data)
5366 {
5367 	struct ecc_info_extract_params params;
5368 	u64 ecc_address = 0, ecc_syndrom = 0;
5369 	u8 index, memory_wrapper_idx = 0;
5370 	bool extract_info_from_fw;
5371 	int rc;
5372 
5373 	switch (event_type) {
5374 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5375 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5376 		extract_info_from_fw = true;
5377 		break;
5378 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5379 		index = event_type - GAUDI_EVENT_TPC0_SERR;
5380 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5381 		params.num_memories = 90;
5382 		params.derr = false;
5383 		params.disable_clock_gating = true;
5384 		extract_info_from_fw = false;
5385 		break;
5386 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5387 		index = event_type - GAUDI_EVENT_TPC0_DERR;
5388 		params.block_address =
5389 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5390 		params.num_memories = 90;
5391 		params.derr = true;
5392 		params.disable_clock_gating = true;
5393 		extract_info_from_fw = false;
5394 		break;
5395 	case GAUDI_EVENT_MME0_ACC_SERR:
5396 	case GAUDI_EVENT_MME1_ACC_SERR:
5397 	case GAUDI_EVENT_MME2_ACC_SERR:
5398 	case GAUDI_EVENT_MME3_ACC_SERR:
5399 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5400 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5401 		params.num_memories = 128;
5402 		params.derr = false;
5403 		params.disable_clock_gating = true;
5404 		extract_info_from_fw = false;
5405 		break;
5406 	case GAUDI_EVENT_MME0_ACC_DERR:
5407 	case GAUDI_EVENT_MME1_ACC_DERR:
5408 	case GAUDI_EVENT_MME2_ACC_DERR:
5409 	case GAUDI_EVENT_MME3_ACC_DERR:
5410 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5411 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5412 		params.num_memories = 128;
5413 		params.derr = true;
5414 		params.disable_clock_gating = true;
5415 		extract_info_from_fw = false;
5416 		break;
5417 	case GAUDI_EVENT_MME0_SBAB_SERR:
5418 	case GAUDI_EVENT_MME1_SBAB_SERR:
5419 	case GAUDI_EVENT_MME2_SBAB_SERR:
5420 	case GAUDI_EVENT_MME3_SBAB_SERR:
5421 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5422 		params.block_address =
5423 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5424 		params.num_memories = 33;
5425 		params.derr = false;
5426 		params.disable_clock_gating = true;
5427 		extract_info_from_fw = false;
5428 		break;
5429 	case GAUDI_EVENT_MME0_SBAB_DERR:
5430 	case GAUDI_EVENT_MME1_SBAB_DERR:
5431 	case GAUDI_EVENT_MME2_SBAB_DERR:
5432 	case GAUDI_EVENT_MME3_SBAB_DERR:
5433 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5434 		params.block_address =
5435 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5436 		params.num_memories = 33;
5437 		params.derr = true;
5438 		params.disable_clock_gating = true;
5439 		extract_info_from_fw = false;
5440 		break;
5441 	default:
5442 		return;
5443 	}
5444 
5445 	if (extract_info_from_fw) {
5446 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
5447 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5448 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5449 	} else {
5450 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5451 				&ecc_syndrom, &memory_wrapper_idx);
5452 		if (rc)
5453 			return;
5454 	}
5455 
5456 	dev_err(hdev->dev,
5457 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5458 		ecc_address, ecc_syndrom, memory_wrapper_idx);
5459 }
5460 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)5461 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5462 {
5463 	u64 glbl_sts_addr, arb_err_addr;
5464 	u8 index;
5465 	char desc[32];
5466 
5467 	switch (event_type) {
5468 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5469 		index = event_type - GAUDI_EVENT_TPC0_QM;
5470 		glbl_sts_addr =
5471 			mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5472 		arb_err_addr =
5473 			mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5474 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5475 		break;
5476 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5477 		index = event_type - GAUDI_EVENT_MME0_QM;
5478 		glbl_sts_addr =
5479 			mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5480 		arb_err_addr =
5481 			mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5482 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5483 		break;
5484 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5485 		index = event_type - GAUDI_EVENT_DMA0_QM;
5486 		glbl_sts_addr =
5487 			mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5488 		arb_err_addr =
5489 			mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5490 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5491 		break;
5492 	default:
5493 		return;
5494 	}
5495 
5496 	gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5497 }
5498 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)5499 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5500 					bool razwi)
5501 {
5502 	char desc[64] = "";
5503 
5504 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
5505 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5506 		event_type, desc);
5507 
5508 	if (razwi) {
5509 		gaudi_print_razwi_info(hdev);
5510 		gaudi_print_mmu_error_info(hdev);
5511 	}
5512 }
5513 
gaudi_soft_reset_late_init(struct hl_device * hdev)5514 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5515 {
5516 	struct gaudi_device *gaudi = hdev->asic_specific;
5517 
5518 	/* Unmask all IRQs since some could have been received
5519 	 * during the soft reset
5520 	 */
5521 	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5522 }
5523 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device)5524 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5525 {
5526 	int ch, err = 0;
5527 	u32 base, val, val2;
5528 
5529 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5530 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5531 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5532 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5533 		if (val) {
5534 			err = 1;
5535 			dev_err(hdev->dev,
5536 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5537 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5538 				(val >> 2) & 0x1, (val >> 3) & 0x1,
5539 				(val >> 4) & 0x1);
5540 
5541 			val2 = RREG32(base + ch * 0x1000 + 0x060);
5542 			dev_err(hdev->dev,
5543 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5544 				device, ch * 2,
5545 				RREG32(base + ch * 0x1000 + 0x064),
5546 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5547 				(val2 & 0xFF0000) >> 16,
5548 				(val2 & 0xFF000000) >> 24);
5549 		}
5550 
5551 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5552 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5553 		if (val) {
5554 			err = 1;
5555 			dev_err(hdev->dev,
5556 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5557 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5558 				(val >> 2) & 0x1, (val >> 3) & 0x1,
5559 				(val >> 4) & 0x1);
5560 
5561 			val2 = RREG32(base + ch * 0x1000 + 0x070);
5562 			dev_err(hdev->dev,
5563 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5564 				device, ch * 2 + 1,
5565 				RREG32(base + ch * 0x1000 + 0x074),
5566 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5567 				(val2 & 0xFF0000) >> 16,
5568 				(val2 & 0xFF000000) >> 24);
5569 		}
5570 
5571 		/* Clear interrupts */
5572 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5573 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5574 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5575 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5576 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5577 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5578 	}
5579 
5580 	val  = RREG32(base + 0x8F30);
5581 	val2 = RREG32(base + 0x8F34);
5582 	if (val | val2) {
5583 		err = 1;
5584 		dev_err(hdev->dev,
5585 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5586 			device, val, val2);
5587 	}
5588 	val  = RREG32(base + 0x8F40);
5589 	val2 = RREG32(base + 0x8F44);
5590 	if (val | val2) {
5591 		err = 1;
5592 		dev_err(hdev->dev,
5593 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5594 			device, val, val2);
5595 	}
5596 
5597 	return err;
5598 }
5599 
gaudi_hbm_event_to_dev(u16 hbm_event_type)5600 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5601 {
5602 	switch (hbm_event_type) {
5603 	case GAUDI_EVENT_HBM0_SPI_0:
5604 	case GAUDI_EVENT_HBM0_SPI_1:
5605 		return 0;
5606 	case GAUDI_EVENT_HBM1_SPI_0:
5607 	case GAUDI_EVENT_HBM1_SPI_1:
5608 		return 1;
5609 	case GAUDI_EVENT_HBM2_SPI_0:
5610 	case GAUDI_EVENT_HBM2_SPI_1:
5611 		return 2;
5612 	case GAUDI_EVENT_HBM3_SPI_0:
5613 	case GAUDI_EVENT_HBM3_SPI_1:
5614 		return 3;
5615 	default:
5616 		break;
5617 	}
5618 
5619 	/* Should never happen */
5620 	return 0;
5621 }
5622 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)5623 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5624 					char *interrupt_name)
5625 {
5626 	struct gaudi_device *gaudi = hdev->asic_specific;
5627 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5628 	bool soft_reset_required = false;
5629 
5630 	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5631 	 * gating, and thus cannot be done in CPU-CP and should be done instead
5632 	 * by the driver.
5633 	 */
5634 
5635 	mutex_lock(&gaudi->clk_gate_mutex);
5636 
5637 	hdev->asic_funcs->disable_clock_gating(hdev);
5638 
5639 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5640 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5641 
5642 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5643 		if (tpc_interrupts_cause & BIT(i)) {
5644 			dev_err_ratelimited(hdev->dev,
5645 					"TPC%d_%s interrupt cause: %s\n",
5646 					tpc_id, interrupt_name,
5647 					gaudi_tpc_interrupts_cause[i]);
5648 			/* If this is QM error, we need to soft-reset */
5649 			if (i == 15)
5650 				soft_reset_required = true;
5651 		}
5652 
5653 	/* Clear interrupts */
5654 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5655 
5656 	hdev->asic_funcs->set_clock_gating(hdev);
5657 
5658 	mutex_unlock(&gaudi->clk_gate_mutex);
5659 
5660 	return soft_reset_required;
5661 }
5662 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)5663 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5664 {
5665 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5666 }
5667 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)5668 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5669 {
5670 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5671 }
5672 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)5673 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5674 					u16 event_type)
5675 {
5676 	switch (event_type) {
5677 	case GAUDI_EVENT_FIX_POWER_ENV_S:
5678 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5679 		dev_info_ratelimited(hdev->dev,
5680 			"Clock throttling due to power consumption\n");
5681 		break;
5682 
5683 	case GAUDI_EVENT_FIX_POWER_ENV_E:
5684 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5685 		dev_info_ratelimited(hdev->dev,
5686 			"Power envelop is safe, back to optimal clock\n");
5687 		break;
5688 
5689 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5690 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5691 		dev_info_ratelimited(hdev->dev,
5692 			"Clock throttling due to overheating\n");
5693 		break;
5694 
5695 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5696 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5697 		dev_info_ratelimited(hdev->dev,
5698 			"Thermal envelop is safe, back to optimal clock\n");
5699 		break;
5700 
5701 	default:
5702 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
5703 			event_type);
5704 		break;
5705 	}
5706 }
5707 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)5708 static void gaudi_handle_eqe(struct hl_device *hdev,
5709 				struct hl_eq_entry *eq_entry)
5710 {
5711 	struct gaudi_device *gaudi = hdev->asic_specific;
5712 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5713 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5714 			>> EQ_CTL_EVENT_TYPE_SHIFT);
5715 	u8 cause;
5716 	bool reset_required;
5717 
5718 	gaudi->events_stat[event_type]++;
5719 	gaudi->events_stat_aggregate[event_type]++;
5720 
5721 	switch (event_type) {
5722 	case GAUDI_EVENT_PCIE_CORE_DERR:
5723 	case GAUDI_EVENT_PCIE_IF_DERR:
5724 	case GAUDI_EVENT_PCIE_PHY_DERR:
5725 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5726 	case GAUDI_EVENT_MME0_ACC_DERR:
5727 	case GAUDI_EVENT_MME0_SBAB_DERR:
5728 	case GAUDI_EVENT_MME1_ACC_DERR:
5729 	case GAUDI_EVENT_MME1_SBAB_DERR:
5730 	case GAUDI_EVENT_MME2_ACC_DERR:
5731 	case GAUDI_EVENT_MME2_SBAB_DERR:
5732 	case GAUDI_EVENT_MME3_ACC_DERR:
5733 	case GAUDI_EVENT_MME3_SBAB_DERR:
5734 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5735 		fallthrough;
5736 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
5737 	case GAUDI_EVENT_PSOC_MEM_DERR:
5738 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5739 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5740 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5741 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5742 	case GAUDI_EVENT_MMU_DERR:
5743 		gaudi_print_irq_info(hdev, event_type, true);
5744 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5745 		if (hdev->hard_reset_on_fw_events)
5746 			hl_device_reset(hdev, true, false);
5747 		break;
5748 
5749 	case GAUDI_EVENT_GIC500:
5750 	case GAUDI_EVENT_AXI_ECC:
5751 	case GAUDI_EVENT_L2_RAM_ECC:
5752 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5753 		gaudi_print_irq_info(hdev, event_type, false);
5754 		if (hdev->hard_reset_on_fw_events)
5755 			hl_device_reset(hdev, true, false);
5756 		break;
5757 
5758 	case GAUDI_EVENT_HBM0_SPI_0:
5759 	case GAUDI_EVENT_HBM1_SPI_0:
5760 	case GAUDI_EVENT_HBM2_SPI_0:
5761 	case GAUDI_EVENT_HBM3_SPI_0:
5762 		gaudi_print_irq_info(hdev, event_type, false);
5763 		gaudi_hbm_read_interrupts(hdev,
5764 					  gaudi_hbm_event_to_dev(event_type));
5765 		if (hdev->hard_reset_on_fw_events)
5766 			hl_device_reset(hdev, true, false);
5767 		break;
5768 
5769 	case GAUDI_EVENT_HBM0_SPI_1:
5770 	case GAUDI_EVENT_HBM1_SPI_1:
5771 	case GAUDI_EVENT_HBM2_SPI_1:
5772 	case GAUDI_EVENT_HBM3_SPI_1:
5773 		gaudi_print_irq_info(hdev, event_type, false);
5774 		gaudi_hbm_read_interrupts(hdev,
5775 					  gaudi_hbm_event_to_dev(event_type));
5776 		break;
5777 
5778 	case GAUDI_EVENT_TPC0_DEC:
5779 	case GAUDI_EVENT_TPC1_DEC:
5780 	case GAUDI_EVENT_TPC2_DEC:
5781 	case GAUDI_EVENT_TPC3_DEC:
5782 	case GAUDI_EVENT_TPC4_DEC:
5783 	case GAUDI_EVENT_TPC5_DEC:
5784 	case GAUDI_EVENT_TPC6_DEC:
5785 	case GAUDI_EVENT_TPC7_DEC:
5786 		gaudi_print_irq_info(hdev, event_type, true);
5787 		reset_required = gaudi_tpc_read_interrupts(hdev,
5788 					tpc_dec_event_to_tpc_id(event_type),
5789 					"AXI_SLV_DEC_Error");
5790 		if (reset_required) {
5791 			dev_err(hdev->dev, "hard reset required due to %s\n",
5792 				gaudi_irq_map_table[event_type].name);
5793 
5794 			if (hdev->hard_reset_on_fw_events)
5795 				hl_device_reset(hdev, true, false);
5796 		} else {
5797 			hl_fw_unmask_irq(hdev, event_type);
5798 		}
5799 		break;
5800 
5801 	case GAUDI_EVENT_TPC0_KRN_ERR:
5802 	case GAUDI_EVENT_TPC1_KRN_ERR:
5803 	case GAUDI_EVENT_TPC2_KRN_ERR:
5804 	case GAUDI_EVENT_TPC3_KRN_ERR:
5805 	case GAUDI_EVENT_TPC4_KRN_ERR:
5806 	case GAUDI_EVENT_TPC5_KRN_ERR:
5807 	case GAUDI_EVENT_TPC6_KRN_ERR:
5808 	case GAUDI_EVENT_TPC7_KRN_ERR:
5809 		gaudi_print_irq_info(hdev, event_type, true);
5810 		reset_required = gaudi_tpc_read_interrupts(hdev,
5811 					tpc_krn_event_to_tpc_id(event_type),
5812 					"KRN_ERR");
5813 		if (reset_required) {
5814 			dev_err(hdev->dev, "hard reset required due to %s\n",
5815 				gaudi_irq_map_table[event_type].name);
5816 
5817 			if (hdev->hard_reset_on_fw_events)
5818 				hl_device_reset(hdev, true, false);
5819 		} else {
5820 			hl_fw_unmask_irq(hdev, event_type);
5821 		}
5822 		break;
5823 
5824 	case GAUDI_EVENT_PCIE_CORE_SERR:
5825 	case GAUDI_EVENT_PCIE_IF_SERR:
5826 	case GAUDI_EVENT_PCIE_PHY_SERR:
5827 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5828 	case GAUDI_EVENT_MME0_ACC_SERR:
5829 	case GAUDI_EVENT_MME0_SBAB_SERR:
5830 	case GAUDI_EVENT_MME1_ACC_SERR:
5831 	case GAUDI_EVENT_MME1_SBAB_SERR:
5832 	case GAUDI_EVENT_MME2_ACC_SERR:
5833 	case GAUDI_EVENT_MME2_SBAB_SERR:
5834 	case GAUDI_EVENT_MME3_ACC_SERR:
5835 	case GAUDI_EVENT_MME3_SBAB_SERR:
5836 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5837 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
5838 	case GAUDI_EVENT_PSOC_MEM_SERR:
5839 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5840 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5841 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5842 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5843 		fallthrough;
5844 	case GAUDI_EVENT_MMU_SERR:
5845 		gaudi_print_irq_info(hdev, event_type, true);
5846 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5847 		hl_fw_unmask_irq(hdev, event_type);
5848 		break;
5849 
5850 	case GAUDI_EVENT_PCIE_DEC:
5851 	case GAUDI_EVENT_MME0_WBC_RSP:
5852 	case GAUDI_EVENT_MME0_SBAB0_RSP:
5853 	case GAUDI_EVENT_MME1_WBC_RSP:
5854 	case GAUDI_EVENT_MME1_SBAB0_RSP:
5855 	case GAUDI_EVENT_MME2_WBC_RSP:
5856 	case GAUDI_EVENT_MME2_SBAB0_RSP:
5857 	case GAUDI_EVENT_MME3_WBC_RSP:
5858 	case GAUDI_EVENT_MME3_SBAB0_RSP:
5859 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
5860 	case GAUDI_EVENT_PSOC_AXI_DEC:
5861 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
5862 	case GAUDI_EVENT_MMU_PAGE_FAULT:
5863 	case GAUDI_EVENT_MMU_WR_PERM:
5864 	case GAUDI_EVENT_RAZWI_OR_ADC:
5865 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5866 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5867 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5868 		fallthrough;
5869 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5870 		gaudi_print_irq_info(hdev, event_type, true);
5871 		gaudi_handle_qman_err(hdev, event_type);
5872 		hl_fw_unmask_irq(hdev, event_type);
5873 		break;
5874 
5875 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5876 		gaudi_print_irq_info(hdev, event_type, true);
5877 		if (hdev->hard_reset_on_fw_events)
5878 			hl_device_reset(hdev, true, false);
5879 		break;
5880 
5881 	case GAUDI_EVENT_TPC0_BMON_SPMU:
5882 	case GAUDI_EVENT_TPC1_BMON_SPMU:
5883 	case GAUDI_EVENT_TPC2_BMON_SPMU:
5884 	case GAUDI_EVENT_TPC3_BMON_SPMU:
5885 	case GAUDI_EVENT_TPC4_BMON_SPMU:
5886 	case GAUDI_EVENT_TPC5_BMON_SPMU:
5887 	case GAUDI_EVENT_TPC6_BMON_SPMU:
5888 	case GAUDI_EVENT_TPC7_BMON_SPMU:
5889 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5890 		gaudi_print_irq_info(hdev, event_type, false);
5891 		hl_fw_unmask_irq(hdev, event_type);
5892 		break;
5893 
5894 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5895 		gaudi_print_clk_change_info(hdev, event_type);
5896 		hl_fw_unmask_irq(hdev, event_type);
5897 		break;
5898 
5899 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
5900 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5901 		dev_err(hdev->dev,
5902 			"Received high temp H/W interrupt %d (cause %d)\n",
5903 			event_type, cause);
5904 		break;
5905 
5906 	default:
5907 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5908 				event_type);
5909 		break;
5910 	}
5911 }
5912 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)5913 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5914 					u32 *size)
5915 {
5916 	struct gaudi_device *gaudi = hdev->asic_specific;
5917 
5918 	if (aggregate) {
5919 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
5920 		return gaudi->events_stat_aggregate;
5921 	}
5922 
5923 	*size = (u32) sizeof(gaudi->events_stat);
5924 	return gaudi->events_stat;
5925 }
5926 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5927 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5928 					u32 flags)
5929 {
5930 	struct gaudi_device *gaudi = hdev->asic_specific;
5931 	u32 status, timeout_usec;
5932 	int rc;
5933 
5934 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5935 		hdev->hard_reset_pending)
5936 		return 0;
5937 
5938 	if (hdev->pldm)
5939 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5940 	else
5941 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5942 
5943 	mutex_lock(&hdev->mmu_cache_lock);
5944 
5945 	/* L0 & L1 invalidation */
5946 	WREG32(mmSTLB_INV_PS, 3);
5947 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5948 	WREG32(mmSTLB_INV_PS, 2);
5949 
5950 	rc = hl_poll_timeout(
5951 		hdev,
5952 		mmSTLB_INV_PS,
5953 		status,
5954 		!status,
5955 		1000,
5956 		timeout_usec);
5957 
5958 	WREG32(mmSTLB_INV_SET, 0);
5959 
5960 	mutex_unlock(&hdev->mmu_cache_lock);
5961 
5962 	if (rc) {
5963 		dev_err_ratelimited(hdev->dev,
5964 					"MMU cache invalidation timeout\n");
5965 		hl_device_reset(hdev, true, false);
5966 	}
5967 
5968 	return rc;
5969 }
5970 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 asid,u64 va,u64 size)5971 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5972 				bool is_hard, u32 asid, u64 va, u64 size)
5973 {
5974 	struct gaudi_device *gaudi = hdev->asic_specific;
5975 	u32 status, timeout_usec;
5976 	u32 inv_data;
5977 	u32 pi;
5978 	int rc;
5979 
5980 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5981 		hdev->hard_reset_pending)
5982 		return 0;
5983 
5984 	mutex_lock(&hdev->mmu_cache_lock);
5985 
5986 	if (hdev->pldm)
5987 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5988 	else
5989 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5990 
5991 	/*
5992 	 * TODO: currently invalidate entire L0 & L1 as in regular hard
5993 	 * invalidation. Need to apply invalidation of specific cache
5994 	 * lines with mask of ASID & VA & size.
5995 	 * Note that L1 with be flushed entirely in any case.
5996 	 */
5997 
5998 	/* L0 & L1 invalidation */
5999 	inv_data = RREG32(mmSTLB_CACHE_INV);
6000 	/* PI is 8 bit */
6001 	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6002 	WREG32(mmSTLB_CACHE_INV,
6003 		(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6004 
6005 	rc = hl_poll_timeout(
6006 		hdev,
6007 		mmSTLB_INV_CONSUMER_INDEX,
6008 		status,
6009 		status == pi,
6010 		1000,
6011 		timeout_usec);
6012 
6013 	mutex_unlock(&hdev->mmu_cache_lock);
6014 
6015 	if (rc) {
6016 		dev_err_ratelimited(hdev->dev,
6017 					"MMU cache invalidation timeout\n");
6018 		hl_device_reset(hdev, true, false);
6019 	}
6020 
6021 	return rc;
6022 }
6023 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)6024 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6025 					u32 asid, u64 phys_addr)
6026 {
6027 	u32 status, timeout_usec;
6028 	int rc;
6029 
6030 	if (hdev->pldm)
6031 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6032 	else
6033 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6034 
6035 	WREG32(MMU_ASID, asid);
6036 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6037 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6038 	WREG32(MMU_BUSY, 0x80000000);
6039 
6040 	rc = hl_poll_timeout(
6041 		hdev,
6042 		MMU_BUSY,
6043 		status,
6044 		!(status & 0x80000000),
6045 		1000,
6046 		timeout_usec);
6047 
6048 	if (rc) {
6049 		dev_err(hdev->dev,
6050 			"Timeout during MMU hop0 config of asid %d\n", asid);
6051 		return rc;
6052 	}
6053 
6054 	return 0;
6055 }
6056 
gaudi_send_heartbeat(struct hl_device * hdev)6057 static int gaudi_send_heartbeat(struct hl_device *hdev)
6058 {
6059 	struct gaudi_device *gaudi = hdev->asic_specific;
6060 
6061 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6062 		return 0;
6063 
6064 	return hl_fw_send_heartbeat(hdev);
6065 }
6066 
gaudi_cpucp_info_get(struct hl_device * hdev)6067 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6068 {
6069 	struct gaudi_device *gaudi = hdev->asic_specific;
6070 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6071 	int rc;
6072 
6073 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6074 		return 0;
6075 
6076 	rc = hl_fw_cpucp_info_get(hdev);
6077 	if (rc)
6078 		return rc;
6079 
6080 	if (!strlen(prop->cpucp_info.card_name))
6081 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6082 				CARD_NAME_MAX_LEN);
6083 
6084 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6085 
6086 	if (hdev->card_type == cpucp_card_type_pci)
6087 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6088 	else if (hdev->card_type == cpucp_card_type_pmc)
6089 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6090 
6091 	hdev->max_power = prop->max_power_default;
6092 
6093 	return 0;
6094 }
6095 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask,struct seq_file * s)6096 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6097 					struct seq_file *s)
6098 {
6099 	struct gaudi_device *gaudi = hdev->asic_specific;
6100 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6101 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6102 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6103 	bool is_idle = true, is_eng_idle, is_slave;
6104 	u64 offset;
6105 	int i, dma_id;
6106 
6107 	mutex_lock(&gaudi->clk_gate_mutex);
6108 
6109 	hdev->asic_funcs->disable_clock_gating(hdev);
6110 
6111 	if (s)
6112 		seq_puts(s,
6113 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6114 			"---  -------  ------------  ----------  -------------\n");
6115 
6116 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6117 		dma_id = gaudi_dma_assignment[i];
6118 		offset = dma_id * DMA_QMAN_OFFSET;
6119 
6120 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6121 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6122 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6123 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6124 				IS_DMA_IDLE(dma_core_sts0);
6125 		is_idle &= is_eng_idle;
6126 
6127 		if (mask)
6128 			*mask |= ((u64) !is_eng_idle) <<
6129 					(GAUDI_ENGINE_ID_DMA_0 + dma_id);
6130 		if (s)
6131 			seq_printf(s, fmt, dma_id,
6132 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6133 				qm_cgm_sts, dma_core_sts0);
6134 	}
6135 
6136 	if (s)
6137 		seq_puts(s,
6138 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6139 			"---  -------  ------------  ----------  ----------\n");
6140 
6141 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6142 		offset = i * TPC_QMAN_OFFSET;
6143 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6144 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6145 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6146 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6147 				IS_TPC_IDLE(tpc_cfg_sts);
6148 		is_idle &= is_eng_idle;
6149 
6150 		if (mask)
6151 			*mask |= ((u64) !is_eng_idle) <<
6152 						(GAUDI_ENGINE_ID_TPC_0 + i);
6153 		if (s)
6154 			seq_printf(s, fmt, i,
6155 				is_eng_idle ? "Y" : "N",
6156 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6157 	}
6158 
6159 	if (s)
6160 		seq_puts(s,
6161 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6162 			"---  -------  ------------  ----------  -----------\n");
6163 
6164 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6165 		offset = i * MME_QMAN_OFFSET;
6166 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6167 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6168 
6169 		/* MME 1 & 3 are slaves, no need to check their QMANs */
6170 		is_slave = i % 2;
6171 		if (!is_slave) {
6172 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6173 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6174 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6175 		}
6176 
6177 		is_idle &= is_eng_idle;
6178 
6179 		if (mask)
6180 			*mask |= ((u64) !is_eng_idle) <<
6181 						(GAUDI_ENGINE_ID_MME_0 + i);
6182 		if (s) {
6183 			if (!is_slave)
6184 				seq_printf(s, fmt, i,
6185 					is_eng_idle ? "Y" : "N",
6186 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6187 			else
6188 				seq_printf(s, mme_slave_fmt, i,
6189 					is_eng_idle ? "Y" : "N", "-",
6190 					"-", mme_arch_sts);
6191 		}
6192 	}
6193 
6194 	if (s)
6195 		seq_puts(s, "\n");
6196 
6197 	hdev->asic_funcs->set_clock_gating(hdev);
6198 
6199 	mutex_unlock(&gaudi->clk_gate_mutex);
6200 
6201 	return is_idle;
6202 }
6203 
gaudi_hw_queues_lock(struct hl_device * hdev)6204 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6205 	__acquires(&gaudi->hw_queues_lock)
6206 {
6207 	struct gaudi_device *gaudi = hdev->asic_specific;
6208 
6209 	spin_lock(&gaudi->hw_queues_lock);
6210 }
6211 
gaudi_hw_queues_unlock(struct hl_device * hdev)6212 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6213 	__releases(&gaudi->hw_queues_lock)
6214 {
6215 	struct gaudi_device *gaudi = hdev->asic_specific;
6216 
6217 	spin_unlock(&gaudi->hw_queues_lock);
6218 }
6219 
gaudi_get_pci_id(struct hl_device * hdev)6220 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6221 {
6222 	return hdev->pdev->device;
6223 }
6224 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)6225 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6226 				size_t max_size)
6227 {
6228 	struct gaudi_device *gaudi = hdev->asic_specific;
6229 
6230 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6231 		return 0;
6232 
6233 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6234 }
6235 
6236 /*
6237  * this function should be used only during initialization and/or after reset,
6238  * when there are no active users.
6239  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)6240 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6241 				u32 tpc_id)
6242 {
6243 	struct gaudi_device *gaudi = hdev->asic_specific;
6244 	u64 kernel_timeout;
6245 	u32 status, offset;
6246 	int rc;
6247 
6248 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6249 
6250 	if (hdev->pldm)
6251 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6252 	else
6253 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6254 
6255 	mutex_lock(&gaudi->clk_gate_mutex);
6256 
6257 	hdev->asic_funcs->disable_clock_gating(hdev);
6258 
6259 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6260 			lower_32_bits(tpc_kernel));
6261 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6262 			upper_32_bits(tpc_kernel));
6263 
6264 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6265 			lower_32_bits(tpc_kernel));
6266 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6267 			upper_32_bits(tpc_kernel));
6268 	/* set a valid LUT pointer, content is of no significance */
6269 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6270 			lower_32_bits(tpc_kernel));
6271 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6272 			upper_32_bits(tpc_kernel));
6273 
6274 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6275 			lower_32_bits(CFG_BASE +
6276 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6277 
6278 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
6279 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6280 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6281 	/* wait a bit for the engine to start executing */
6282 	usleep_range(1000, 1500);
6283 
6284 	/* wait until engine has finished executing */
6285 	rc = hl_poll_timeout(
6286 		hdev,
6287 		mmTPC0_CFG_STATUS + offset,
6288 		status,
6289 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6290 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6291 		1000,
6292 		kernel_timeout);
6293 
6294 	if (rc) {
6295 		dev_err(hdev->dev,
6296 			"Timeout while waiting for TPC%d icache prefetch\n",
6297 			tpc_id);
6298 		hdev->asic_funcs->set_clock_gating(hdev);
6299 		mutex_unlock(&gaudi->clk_gate_mutex);
6300 		return -EIO;
6301 	}
6302 
6303 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6304 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6305 
6306 	/* wait a bit for the engine to start executing */
6307 	usleep_range(1000, 1500);
6308 
6309 	/* wait until engine has finished executing */
6310 	rc = hl_poll_timeout(
6311 		hdev,
6312 		mmTPC0_CFG_STATUS + offset,
6313 		status,
6314 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6315 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6316 		1000,
6317 		kernel_timeout);
6318 
6319 	if (rc) {
6320 		dev_err(hdev->dev,
6321 			"Timeout while waiting for TPC%d vector pipe\n",
6322 			tpc_id);
6323 		hdev->asic_funcs->set_clock_gating(hdev);
6324 		mutex_unlock(&gaudi->clk_gate_mutex);
6325 		return -EIO;
6326 	}
6327 
6328 	rc = hl_poll_timeout(
6329 		hdev,
6330 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6331 		status,
6332 		(status == 0),
6333 		1000,
6334 		kernel_timeout);
6335 
6336 	hdev->asic_funcs->set_clock_gating(hdev);
6337 	mutex_unlock(&gaudi->clk_gate_mutex);
6338 
6339 	if (rc) {
6340 		dev_err(hdev->dev,
6341 			"Timeout while waiting for TPC%d kernel to execute\n",
6342 			tpc_id);
6343 		return -EIO;
6344 	}
6345 
6346 	return 0;
6347 }
6348 
gaudi_get_hw_state(struct hl_device * hdev)6349 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6350 {
6351 	return RREG32(mmHW_STATE);
6352 }
6353 
gaudi_ctx_init(struct hl_ctx * ctx)6354 static int gaudi_ctx_init(struct hl_ctx *ctx)
6355 {
6356 	return 0;
6357 }
6358 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)6359 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6360 {
6361 	return gaudi_cq_assignment[cq_idx];
6362 }
6363 
gaudi_get_signal_cb_size(struct hl_device * hdev)6364 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6365 {
6366 	return sizeof(struct packet_msg_short) +
6367 			sizeof(struct packet_msg_prot) * 2;
6368 }
6369 
gaudi_get_wait_cb_size(struct hl_device * hdev)6370 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6371 {
6372 	return sizeof(struct packet_msg_short) * 4 +
6373 			sizeof(struct packet_fence) +
6374 			sizeof(struct packet_msg_prot) * 2;
6375 }
6376 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id)6377 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6378 {
6379 	struct hl_cb *cb = (struct hl_cb *) data;
6380 	struct packet_msg_short *pkt;
6381 	u32 value, ctl;
6382 
6383 	pkt = cb->kernel_address;
6384 	memset(pkt, 0, sizeof(*pkt));
6385 
6386 	/* Inc by 1, Mode ADD */
6387 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6388 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6389 
6390 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6391 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6392 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6393 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6394 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6395 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6396 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6397 
6398 	pkt->value = cpu_to_le32(value);
6399 	pkt->ctl = cpu_to_le32(ctl);
6400 }
6401 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)6402 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6403 					u16 addr)
6404 {
6405 	u32 ctl, pkt_size = sizeof(*pkt);
6406 
6407 	memset(pkt, 0, pkt_size);
6408 
6409 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6410 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6411 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6412 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6413 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6414 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6415 
6416 	pkt->value = cpu_to_le32(value);
6417 	pkt->ctl = cpu_to_le32(ctl);
6418 
6419 	return pkt_size;
6420 }
6421 
gaudi_add_arm_monitor_pkt(struct packet_msg_short * pkt,u16 sob_id,u16 sob_val,u16 addr)6422 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6423 					u16 sob_val, u16 addr)
6424 {
6425 	u32 ctl, value, pkt_size = sizeof(*pkt);
6426 	u8 mask = ~(1 << (sob_id & 0x7));
6427 
6428 	memset(pkt, 0, pkt_size);
6429 
6430 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6431 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6432 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6433 			0); /* GREATER OR EQUAL*/
6434 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6435 
6436 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6437 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6438 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6439 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6440 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6441 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6442 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6443 
6444 	pkt->value = cpu_to_le32(value);
6445 	pkt->ctl = cpu_to_le32(ctl);
6446 
6447 	return pkt_size;
6448 }
6449 
gaudi_add_fence_pkt(struct packet_fence * pkt)6450 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6451 {
6452 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
6453 
6454 	memset(pkt, 0, pkt_size);
6455 
6456 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6457 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6458 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6459 
6460 	ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6461 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6462 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6463 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6464 
6465 	pkt->cfg = cpu_to_le32(cfg);
6466 	pkt->ctl = cpu_to_le32(ctl);
6467 
6468 	return pkt_size;
6469 }
6470 
gaudi_gen_wait_cb(struct hl_device * hdev,void * data,u16 sob_id,u16 sob_val,u16 mon_id,u32 q_idx)6471 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6472 			u16 sob_val, u16 mon_id, u32 q_idx)
6473 {
6474 	struct hl_cb *cb = (struct hl_cb *) data;
6475 	void *buf = cb->kernel_address;
6476 	u64 monitor_base, fence_addr = 0;
6477 	u32 size = 0;
6478 	u16 msg_addr_offset;
6479 
6480 	switch (q_idx) {
6481 	case GAUDI_QUEUE_ID_DMA_0_0:
6482 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6483 		break;
6484 	case GAUDI_QUEUE_ID_DMA_0_1:
6485 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6486 		break;
6487 	case GAUDI_QUEUE_ID_DMA_0_2:
6488 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6489 		break;
6490 	case GAUDI_QUEUE_ID_DMA_0_3:
6491 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6492 		break;
6493 	case GAUDI_QUEUE_ID_DMA_1_0:
6494 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6495 		break;
6496 	case GAUDI_QUEUE_ID_DMA_1_1:
6497 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6498 		break;
6499 	case GAUDI_QUEUE_ID_DMA_1_2:
6500 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6501 		break;
6502 	case GAUDI_QUEUE_ID_DMA_1_3:
6503 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6504 		break;
6505 	case GAUDI_QUEUE_ID_DMA_5_0:
6506 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6507 		break;
6508 	case GAUDI_QUEUE_ID_DMA_5_1:
6509 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6510 		break;
6511 	case GAUDI_QUEUE_ID_DMA_5_2:
6512 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6513 		break;
6514 	case GAUDI_QUEUE_ID_DMA_5_3:
6515 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6516 		break;
6517 	default:
6518 		/* queue index should be valid here */
6519 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6520 				q_idx);
6521 		return;
6522 	}
6523 
6524 	fence_addr += CFG_BASE;
6525 
6526 	/*
6527 	 * monitor_base should be the content of the base0 address registers,
6528 	 * so it will be added to the msg short offsets
6529 	 */
6530 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6531 
6532 	/* First monitor config packet: low address of the sync */
6533 	msg_addr_offset =
6534 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6535 				monitor_base;
6536 
6537 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6538 					msg_addr_offset);
6539 
6540 	/* Second monitor config packet: high address of the sync */
6541 	msg_addr_offset =
6542 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6543 				monitor_base;
6544 
6545 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6546 					msg_addr_offset);
6547 
6548 	/*
6549 	 * Third monitor config packet: the payload, i.e. what to write when the
6550 	 * sync triggers
6551 	 */
6552 	msg_addr_offset =
6553 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6554 				monitor_base;
6555 
6556 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6557 
6558 	/* Fourth monitor config packet: bind the monitor to a sync object */
6559 	msg_addr_offset =
6560 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6561 				monitor_base;
6562 	size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6563 						msg_addr_offset);
6564 
6565 	/* Fence packet */
6566 	size += gaudi_add_fence_pkt(buf + size);
6567 }
6568 
gaudi_reset_sob(struct hl_device * hdev,void * data)6569 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6570 {
6571 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6572 
6573 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6574 		hw_sob->sob_id);
6575 
6576 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6577 		0);
6578 
6579 	kref_init(&hw_sob->kref);
6580 }
6581 
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)6582 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6583 {
6584 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6585 							HL_POWER9_HOST_MAGIC) {
6586 		hdev->power9_64bit_dma_enable = 1;
6587 		hdev->dma_mask = 64;
6588 	} else {
6589 		hdev->power9_64bit_dma_enable = 0;
6590 		hdev->dma_mask = 48;
6591 	}
6592 }
6593 
gaudi_get_device_time(struct hl_device * hdev)6594 static u64 gaudi_get_device_time(struct hl_device *hdev)
6595 {
6596 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6597 
6598 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6599 }
6600 
6601 static const struct hl_asic_funcs gaudi_funcs = {
6602 	.early_init = gaudi_early_init,
6603 	.early_fini = gaudi_early_fini,
6604 	.late_init = gaudi_late_init,
6605 	.late_fini = gaudi_late_fini,
6606 	.sw_init = gaudi_sw_init,
6607 	.sw_fini = gaudi_sw_fini,
6608 	.hw_init = gaudi_hw_init,
6609 	.hw_fini = gaudi_hw_fini,
6610 	.halt_engines = gaudi_halt_engines,
6611 	.suspend = gaudi_suspend,
6612 	.resume = gaudi_resume,
6613 	.cb_mmap = gaudi_cb_mmap,
6614 	.ring_doorbell = gaudi_ring_doorbell,
6615 	.pqe_write = gaudi_pqe_write,
6616 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6617 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
6618 	.get_int_queue_base = gaudi_get_int_queue_base,
6619 	.test_queues = gaudi_test_queues,
6620 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6621 	.asic_dma_pool_free = gaudi_dma_pool_free,
6622 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6623 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6624 	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6625 	.cs_parser = gaudi_cs_parser,
6626 	.asic_dma_map_sg = gaudi_dma_map_sg,
6627 	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6628 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6629 	.update_eq_ci = gaudi_update_eq_ci,
6630 	.context_switch = gaudi_context_switch,
6631 	.restore_phase_topology = gaudi_restore_phase_topology,
6632 	.debugfs_read32 = gaudi_debugfs_read32,
6633 	.debugfs_write32 = gaudi_debugfs_write32,
6634 	.debugfs_read64 = gaudi_debugfs_read64,
6635 	.debugfs_write64 = gaudi_debugfs_write64,
6636 	.add_device_attr = gaudi_add_device_attr,
6637 	.handle_eqe = gaudi_handle_eqe,
6638 	.set_pll_profile = gaudi_set_pll_profile,
6639 	.get_events_stat = gaudi_get_events_stat,
6640 	.read_pte = gaudi_read_pte,
6641 	.write_pte = gaudi_write_pte,
6642 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6643 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6644 	.send_heartbeat = gaudi_send_heartbeat,
6645 	.set_clock_gating = gaudi_set_clock_gating,
6646 	.disable_clock_gating = gaudi_disable_clock_gating,
6647 	.debug_coresight = gaudi_debug_coresight,
6648 	.is_device_idle = gaudi_is_device_idle,
6649 	.soft_reset_late_init = gaudi_soft_reset_late_init,
6650 	.hw_queues_lock = gaudi_hw_queues_lock,
6651 	.hw_queues_unlock = gaudi_hw_queues_unlock,
6652 	.get_pci_id = gaudi_get_pci_id,
6653 	.get_eeprom_data = gaudi_get_eeprom_data,
6654 	.send_cpu_message = gaudi_send_cpu_message,
6655 	.get_hw_state = gaudi_get_hw_state,
6656 	.pci_bars_map = gaudi_pci_bars_map,
6657 	.init_iatu = gaudi_init_iatu,
6658 	.rreg = hl_rreg,
6659 	.wreg = hl_wreg,
6660 	.halt_coresight = gaudi_halt_coresight,
6661 	.ctx_init = gaudi_ctx_init,
6662 	.get_clk_rate = gaudi_get_clk_rate,
6663 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6664 	.read_device_fw_version = gaudi_read_device_fw_version,
6665 	.load_firmware_to_device = gaudi_load_firmware_to_device,
6666 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6667 	.get_signal_cb_size = gaudi_get_signal_cb_size,
6668 	.get_wait_cb_size = gaudi_get_wait_cb_size,
6669 	.gen_signal_cb = gaudi_gen_signal_cb,
6670 	.gen_wait_cb = gaudi_gen_wait_cb,
6671 	.reset_sob = gaudi_reset_sob,
6672 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6673 	.get_device_time = gaudi_get_device_time
6674 };
6675 
6676 /**
6677  * gaudi_set_asic_funcs - set GAUDI function pointers
6678  *
6679  * @hdev: pointer to hl_device structure
6680  *
6681  */
gaudi_set_asic_funcs(struct hl_device * hdev)6682 void gaudi_set_asic_funcs(struct hl_device *hdev)
6683 {
6684 	hdev->asic_funcs = &gaudi_funcs;
6685 }
6686