1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22 
23 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24 
25 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26 
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		500000		/* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
31 #define GAUDI2_RESET_POLL_CNT			3
32 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT			512
36 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
41 
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
43 
44 /*
45  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46  * and the code relies on that value (for array size etc..) we define another value
47  * for MAX faulty TPCs which reflects the cluster binning requirements
48  */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
50 #define MAX_FAULTY_XBARS			1
51 #define MAX_FAULTY_EDMAS			1
52 #define MAX_FAULTY_DECODERS			1
53 
54 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK		0x3FF
57 
58 #define GAUDI2_NA_EVENT_CAUSE			0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
60 #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE	25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
80 
81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
83 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
84 
85 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
87 
88 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
89 
90 #define IS_DMA_IDLE(dma_core_sts0)	\
91 	(!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92 
93 #define IS_DMA_HALTED(dma_core_sts1)	\
94 	((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95 
96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97 
98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99 
100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104 
105 #define PCIE_DEC_EN_MASK			0x300
106 #define DEC_WORK_STATE_IDLE			0
107 #define DEC_WORK_STATE_PEND			3
108 #define IS_DEC_IDLE(dec_swreg15) \
109 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
111 
112 /* HBM MMU address scrambling parameters */
113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
118 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
119 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
120 #define MMU_RANGE_INV_EN_SHIFT			0
121 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
122 #define MMU_RANGE_INV_ASID_SHIFT		2
123 
124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125  * a 2 entries FIFO, and hence it is not enabled for it.
126  */
127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129 
130 #define GAUDI2_MAX_STRING_LEN			64
131 
132 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134 
135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136 
137 /* RAZWI initiator coordinates */
138 #define RAZWI_GET_AXUSER_XY(x) \
139 	((x & 0xF8001FF0) >> 4)
140 
141 #define RAZWI_GET_AXUSER_LOW_XY(x) \
142 	((x & 0x00001FF0) >> 4)
143 
144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT		0
145 #define RAZWI_INITIATOR_AXUER_L_X_MASK		0x1F
146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT		5
147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK		0xF
148 
149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT		23
150 #define RAZWI_INITIATOR_AXUER_H_X_MASK		0x1F
151 
152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153 	((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154 		(((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155 
156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157 		(((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158 
159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160 	(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161 
162 #define PSOC_RAZWI_ENG_STR_SIZE 128
163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164 
165 /* HW scrambles only bits 0-25 */
166 #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
167 
168 struct gaudi2_razwi_info {
169 	u32 axuser_xy;
170 	u32 rtr_ctrl;
171 	u16 eng_id;
172 	char *eng_name;
173 };
174 
175 static struct gaudi2_razwi_info common_razwi_info[] = {
176 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
177 				GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
178 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
179 				GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
180 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
181 				GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
182 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
183 				GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
184 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
185 				GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
186 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
187 				GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
188 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
189 				GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
190 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
191 				GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
192 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
193 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
194 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
195 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
196 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
197 				GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
198 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
199 				GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
200 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
201 				GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
202 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
203 				GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
204 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
205 				GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
206 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
207 				GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
208 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
209 				GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
210 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
211 				GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
212 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
213 				GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
214 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
215 				GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
216 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
217 				GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
218 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
219 				GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
220 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
221 				GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
222 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
223 				GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
224 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
225 				GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
226 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
227 				GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
228 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
229 				GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
230 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
231 				GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
232 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
233 				GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
234 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
235 				GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
236 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
237 				GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
238 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
239 				GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
240 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
241 				GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
242 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
243 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
244 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
245 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
246 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
247 				GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
248 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
249 				GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
250 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
251 				GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
252 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
253 				GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
254 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
255 				GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
256 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
257 				GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
258 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
259 				GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
260 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
261 				GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
262 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
263 				GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
264 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
265 				GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
266 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
267 				GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
268 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
269 				GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
270 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
271 				GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
272 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
273 				GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
274 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
275 				GAUDI2_ENGINE_ID_SIZE, "PMMU"},
276 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
277 				GAUDI2_ENGINE_ID_SIZE, "PCIE"},
278 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
279 				GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
280 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
281 				GAUDI2_ENGINE_ID_KDMA, "KDMA"},
282 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
283 				GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
284 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
285 				GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
286 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
287 				GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
288 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
289 				GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
290 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
291 				GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
292 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
293 				GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
294 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
295 				GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
296 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
297 				GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
298 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
299 				GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
300 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
301 				GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
302 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
303 				GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
304 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
305 				GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
306 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
307 				GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
308 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
309 				GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
310 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
311 				GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
312 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
313 				GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
314 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
315 				GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
316 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
317 				GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
318 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
319 				GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
320 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
321 				GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
322 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
323 				GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
324 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
325 				GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
326 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
327 				GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
328 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
329 				GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
330 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
331 				GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
332 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
333 				GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
334 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
335 				GAUDI2_ENGINE_ID_PSOC, "CPU"},
336 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
337 				GAUDI2_ENGINE_ID_PSOC, "PSOC"}
338 };
339 
340 static struct gaudi2_razwi_info mme_razwi_info[] = {
341 		/* MME X high coordinate is N/A, hence using only low coordinates */
342 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
343 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
344 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
345 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
346 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
347 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
348 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
349 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
350 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
351 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
352 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
353 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
354 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
355 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
356 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
357 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
358 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
359 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
360 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
361 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
362 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
363 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
364 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
365 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
366 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
367 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
368 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
369 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
370 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
371 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
372 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
373 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
374 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
375 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
376 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
377 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
378 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
379 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
380 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
381 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
382 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
383 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
384 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
385 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
386 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
387 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
388 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
389 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
390 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
391 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
392 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
393 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
394 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
395 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
396 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
397 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
398 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
399 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
400 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
401 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
402 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
403 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
404 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
405 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
406 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
407 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
408 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
409 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
410 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
411 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
412 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
413 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
414 };
415 
416 enum hl_pmmu_fatal_cause {
417 	LATENCY_RD_OUT_FIFO_OVERRUN,
418 	LATENCY_WR_OUT_FIFO_OVERRUN,
419 };
420 
421 enum hl_pcie_drain_ind_cause {
422 	LBW_AXI_DRAIN_IND,
423 	HBW_AXI_DRAIN_IND
424 };
425 
426 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
427 	[HBM_ID0] = 0xFFFC,
428 	[HBM_ID1] = 0xFFCF,
429 	[HBM_ID2] = 0xF7F7,
430 	[HBM_ID3] = 0x7F7F,
431 	[HBM_ID4] = 0xFCFF,
432 	[HBM_ID5] = 0xCFFF,
433 };
434 
435 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
436 	[0] = HBM_ID0,
437 	[1] = HBM_ID1,
438 	[2] = HBM_ID4,
439 	[3] = HBM_ID5,
440 };
441 
442 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
443 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
444 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
445 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
446 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
447 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
448 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
449 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
450 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
451 };
452 
453 static const int gaudi2_qman_async_event_id[] = {
454 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
455 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
456 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
457 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
458 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
459 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
460 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
461 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
462 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
463 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
464 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
465 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
466 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
467 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
468 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
469 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
470 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
471 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
472 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
473 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
474 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
475 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
476 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
477 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
478 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
479 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
480 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
481 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
482 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
483 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
484 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
485 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
486 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
487 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
488 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
489 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
490 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
491 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
492 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
493 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
494 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
495 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
496 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
497 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
498 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
499 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
500 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
501 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
502 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
503 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
504 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
505 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
506 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
507 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
508 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
509 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
510 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
511 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
512 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
513 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
514 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
515 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
516 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
517 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
518 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
519 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
520 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
521 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
522 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
523 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
524 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
525 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
526 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
527 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
528 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
529 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
530 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
531 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
532 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
533 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
534 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
535 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
536 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
537 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
538 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
539 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
540 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
541 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
542 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
543 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
544 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
545 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
546 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
547 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
548 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
549 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
550 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
551 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
552 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
553 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
554 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
555 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
556 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
557 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
558 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
559 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
560 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
561 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
562 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
563 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
564 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
565 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
566 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
567 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
568 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
569 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
570 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
571 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
572 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
573 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
574 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
575 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
576 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
577 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
578 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
579 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
580 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
581 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
582 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
583 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
584 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
585 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
586 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
587 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
588 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
589 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
590 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
591 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
592 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
593 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
594 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
595 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
596 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
597 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
598 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
599 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
600 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
601 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
602 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
603 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
604 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
605 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
606 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
607 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
608 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
609 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
610 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
611 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
612 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
613 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
614 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
615 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
616 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
617 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
618 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
619 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
620 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
621 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
622 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
623 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
624 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
625 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
626 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
627 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
628 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
629 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
630 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
631 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
632 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
633 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
634 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
635 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
636 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
637 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
638 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
639 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
640 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
641 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
642 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
643 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
644 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
645 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
646 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
647 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
648 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
649 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
650 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
651 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
652 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
653 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
654 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
655 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
656 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
657 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
658 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
659 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
660 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
661 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
662 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
663 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
664 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
665 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
666 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
667 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
668 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
669 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
670 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
671 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
672 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
673 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
674 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
675 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
676 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
677 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
678 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
679 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
680 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
681 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
682 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
683 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
684 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
685 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
686 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
687 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
688 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
689 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
690 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
691 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
692 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
693 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
694 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
695 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
696 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
697 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
698 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
699 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
700 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
701 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
702 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
703 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
704 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
705 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
706 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
710 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
711 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
713 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
714 };
715 
716 static const int gaudi2_dma_core_async_event_id[] = {
717 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
718 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
719 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
720 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
721 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
722 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
723 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
724 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
725 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
726 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
727 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
728 };
729 
730 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
731 	"qman sei intr",
732 	"arc sei intr"
733 };
734 
735 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
736 	"AXI_TERMINATOR WR",
737 	"AXI_TERMINATOR RD",
738 	"AXI SPLIT SEI Status"
739 };
740 
741 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
742 	"cbu_bresp_sei_intr_cause",
743 	"cbu_rresp_sei_intr_cause",
744 	"lbu_bresp_sei_intr_cause",
745 	"lbu_rresp_sei_intr_cause",
746 	"cbu_axi_split_intr_cause",
747 	"lbu_axi_split_intr_cause",
748 	"arc_ip_excptn_sei_intr_cause",
749 	"dmi_bresp_sei_intr_cause",
750 	"aux2apb_err_sei_intr_cause",
751 	"cfg_lbw_wr_terminated_intr_cause",
752 	"cfg_lbw_rd_terminated_intr_cause",
753 	"cfg_dccm_wr_terminated_intr_cause",
754 	"cfg_dccm_rd_terminated_intr_cause",
755 	"cfg_hbw_rd_terminated_intr_cause"
756 };
757 
758 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
759 	"msix_vcd_hbw_sei",
760 	"msix_l2c_hbw_sei",
761 	"msix_nrm_hbw_sei",
762 	"msix_abnrm_hbw_sei",
763 	"msix_vcd_lbw_sei",
764 	"msix_l2c_lbw_sei",
765 	"msix_nrm_lbw_sei",
766 	"msix_abnrm_lbw_sei",
767 	"apb_vcd_lbw_sei",
768 	"apb_l2c_lbw_sei",
769 	"apb_nrm_lbw_sei",
770 	"apb_abnrm_lbw_sei",
771 	"dec_sei",
772 	"dec_apb_sei",
773 	"trc_apb_sei",
774 	"lbw_mstr_if_sei",
775 	"axi_split_bresp_err_sei",
776 	"hbw_axi_wr_viol_sei",
777 	"hbw_axi_rd_viol_sei",
778 	"lbw_axi_wr_viol_sei",
779 	"lbw_axi_rd_viol_sei",
780 	"vcd_spi",
781 	"l2c_spi",
782 	"nrm_spi",
783 	"abnrm_spi",
784 };
785 
786 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
787 	"PQ AXI HBW error",
788 	"CQ AXI HBW error",
789 	"CP AXI HBW error",
790 	"CP error due to undefined OPCODE",
791 	"CP encountered STOP OPCODE",
792 	"CP AXI LBW error",
793 	"CP WRREG32 or WRBULK returned error",
794 	"N/A",
795 	"FENCE 0 inc over max value and clipped",
796 	"FENCE 1 inc over max value and clipped",
797 	"FENCE 2 inc over max value and clipped",
798 	"FENCE 3 inc over max value and clipped",
799 	"FENCE 0 dec under min value and clipped",
800 	"FENCE 1 dec under min value and clipped",
801 	"FENCE 2 dec under min value and clipped",
802 	"FENCE 3 dec under min value and clipped",
803 	"CPDMA Up overflow",
804 	"PQC L2H error"
805 };
806 
807 static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
808 	"RSVD0",
809 	"CQ AXI HBW error",
810 	"CP AXI HBW error",
811 	"CP error due to undefined OPCODE",
812 	"CP encountered STOP OPCODE",
813 	"CP AXI LBW error",
814 	"CP WRREG32 or WRBULK returned error",
815 	"N/A",
816 	"FENCE 0 inc over max value and clipped",
817 	"FENCE 1 inc over max value and clipped",
818 	"FENCE 2 inc over max value and clipped",
819 	"FENCE 3 inc over max value and clipped",
820 	"FENCE 0 dec under min value and clipped",
821 	"FENCE 1 dec under min value and clipped",
822 	"FENCE 2 dec under min value and clipped",
823 	"FENCE 3 dec under min value and clipped",
824 	"CPDMA Up overflow",
825 	"RSVD17",
826 	"CQ_WR_IFIFO_CI_ERR",
827 	"CQ_WR_CTL_CI_ERR",
828 	"ARC_CQF_RD_ERR",
829 	"ARC_CQ_WR_IFIFO_CI_ERR",
830 	"ARC_CQ_WR_CTL_CI_ERR",
831 	"ARC_AXI_ERR",
832 	"CP_SWITCH_WDT_ERR"
833 };
834 
835 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
836 	"Choice push while full error",
837 	"Choice Q watchdog error",
838 	"MSG AXI LBW returned with error"
839 };
840 
841 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
842 	"qm_axi_err",
843 	"qm_trace_fence_events",
844 	"qm_sw_err",
845 	"qm_cp_sw_stop",
846 	"lbw_mstr_rresp_err",
847 	"lbw_mstr_bresp_err",
848 	"lbw_msg_slverr",
849 	"hbw_msg_slverr",
850 	"wbc_slverr",
851 	"hbw_mstr_rresp_err",
852 	"hbw_mstr_bresp_err",
853 	"sb_resp_intr",
854 	"mrsb_resp_intr",
855 	"core_dw_status_0",
856 	"core_dw_status_1",
857 	"core_dw_status_2",
858 	"core_dw_status_3",
859 	"core_dw_status_4",
860 	"core_dw_status_5",
861 	"core_dw_status_6",
862 	"core_dw_status_7",
863 	"async_arc2cpu_sei_intr",
864 };
865 
866 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
867 	"tpc_address_exceed_slm",
868 	"tpc_div_by_0",
869 	"tpc_spu_mac_overflow",
870 	"tpc_spu_addsub_overflow",
871 	"tpc_spu_abs_overflow",
872 	"tpc_spu_fma_fp_dst_nan",
873 	"tpc_spu_fma_fp_dst_inf",
874 	"tpc_spu_convert_fp_dst_nan",
875 	"tpc_spu_convert_fp_dst_inf",
876 	"tpc_spu_fp_dst_denorm",
877 	"tpc_vpu_mac_overflow",
878 	"tpc_vpu_addsub_overflow",
879 	"tpc_vpu_abs_overflow",
880 	"tpc_vpu_convert_fp_dst_nan",
881 	"tpc_vpu_convert_fp_dst_inf",
882 	"tpc_vpu_fma_fp_dst_nan",
883 	"tpc_vpu_fma_fp_dst_inf",
884 	"tpc_vpu_fp_dst_denorm",
885 	"tpc_assertions",
886 	"tpc_illegal_instruction",
887 	"tpc_pc_wrap_around",
888 	"tpc_qm_sw_err",
889 	"tpc_hbw_rresp_err",
890 	"tpc_hbw_bresp_err",
891 	"tpc_lbw_rresp_err",
892 	"tpc_lbw_bresp_err",
893 	"st_unlock_already_locked",
894 	"invalid_lock_access",
895 	"LD_L protection violation",
896 	"ST_L protection violation",
897 	"D$ L0CS mismatch",
898 };
899 
900 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
901 	"agu_resp_intr",
902 	"qman_axi_err",
903 	"wap sei (wbc axi err)",
904 	"arc sei",
905 	"cfg access error",
906 	"qm_sw_err",
907 	"sbte_dbg_intr_0",
908 	"sbte_dbg_intr_1",
909 	"sbte_dbg_intr_2",
910 	"sbte_dbg_intr_3",
911 	"sbte_dbg_intr_4",
912 	"sbte_prtn_intr_0",
913 	"sbte_prtn_intr_1",
914 	"sbte_prtn_intr_2",
915 	"sbte_prtn_intr_3",
916 	"sbte_prtn_intr_4",
917 };
918 
919 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
920 	"i0",
921 	"i1",
922 	"i2",
923 	"i3",
924 	"i4",
925 };
926 
927 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
928 	"WBC ERR RESP_0",
929 	"WBC ERR RESP_1",
930 	"AP SOURCE POS INF",
931 	"AP SOURCE NEG INF",
932 	"AP SOURCE NAN",
933 	"AP RESULT POS INF",
934 	"AP RESULT NEG INF",
935 };
936 
937 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
938 	"HBW Read returned with error RRESP",
939 	"HBW write returned with error BRESP",
940 	"LBW write returned with error BRESP",
941 	"descriptor_fifo_overflow",
942 	"KDMA SB LBW Read returned with error",
943 	"KDMA WBC LBW Write returned with error",
944 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
945 	"WRONG CFG FOR COMMIT IN LIN DMA"
946 };
947 
948 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
949 	"HBW/LBW Read returned with error RRESP",
950 	"HBW/LBW write returned with error BRESP",
951 	"LBW write returned with error BRESP",
952 	"descriptor_fifo_overflow",
953 	"KDMA SB LBW Read returned with error",
954 	"KDMA WBC LBW Write returned with error",
955 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
956 	"WRONG CFG FOR COMMIT IN LIN DMA"
957 };
958 
959 struct gaudi2_sm_sei_cause_data {
960 	const char *cause_name;
961 	const char *log_name;
962 };
963 
964 static const struct gaudi2_sm_sei_cause_data
965 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
966 	{"calculated SO value overflow/underflow", "SOB ID"},
967 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
968 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
969 };
970 
971 static const char * const
972 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
973 	"LATENCY_RD_OUT_FIFO_OVERRUN",
974 	"LATENCY_WR_OUT_FIFO_OVERRUN",
975 };
976 
977 static const char * const
978 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
979 	"LATENCY_RD_OUT_FIFO_OVERRUN",
980 	"LATENCY_WR_OUT_FIFO_OVERRUN",
981 };
982 
983 static const char * const
984 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
985 	"AXI drain HBW",
986 	"AXI drain LBW",
987 };
988 
989 static const char * const
990 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
991 	"HBW error response",
992 	"LBW error response",
993 	"TLP is blocked by RR"
994 };
995 
996 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
997 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
998 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
999 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1000 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1001 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1002 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1003 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1004 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1005 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1006 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1007 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1008 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1009 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1010 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1011 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1012 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1013 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1014 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1015 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1016 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1017 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1018 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1019 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1020 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1021 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1022 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1023 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1024 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1025 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1026 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1027 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1028 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1029 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1030 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1031 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1032 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1033 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1034 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1035 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1036 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1037 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1038 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1039 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1040 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1041 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1042 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1043 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1044 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1045 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1046 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1047 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1048 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1049 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1050 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1051 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1052 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1053 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1054 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1055 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1056 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1057 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1058 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1059 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1060 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1061 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1062 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1063 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1064 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1065 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1066 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1067 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1068 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1069 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1070 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1071 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1072 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1073 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1074 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1075 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1076 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1077 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1078 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1079 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1080 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1081 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1082 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1083 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1084 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1085 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1086 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1087 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1088 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1089 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1090 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1091 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1092 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1093 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1094 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1095 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1096 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1097 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1098 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1099 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1100 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1101 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1102 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1103 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1104 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1105 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1106 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1107 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1108 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1109 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1110 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1111 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1112 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1113 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1114 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1115 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1116 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1117 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1118 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1119 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1120 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1121 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1122 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1123 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1124 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1125 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1126 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1127 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1128 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1129 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1130 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1131 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1132 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1133 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1134 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1135 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1136 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1137 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1138 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1139 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1140 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1141 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1142 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1143 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1144 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1145 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1146 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1147 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1148 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1149 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1150 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1151 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1152 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1153 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1154 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1155 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1156 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1157 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1158 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1159 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1160 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1161 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1162 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1163 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1164 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1165 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1166 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1167 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1168 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1169 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1170 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1171 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1172 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1173 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1174 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1175 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1176 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1177 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1178 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1179 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1180 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1181 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1182 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1183 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1184 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1185 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1186 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1187 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1188 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1189 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1190 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1191 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1192 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1193 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1194 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1195 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1196 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1197 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1198 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1199 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1200 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1201 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1202 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1203 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1204 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1205 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1206 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1207 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1208 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1209 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1210 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1211 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1212 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1213 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1214 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1215 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1216 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1217 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1218 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1219 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1220 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1221 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1222 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1223 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1224 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1225 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1226 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1227 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1228 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1229 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1230 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1231 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1232 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1233 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1234 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1235 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1236 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1237 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1238 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1239 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1240 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1241 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1242 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1243 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1244 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1245 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1246 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1247 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1248 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1249 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1250 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1251 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1252 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1253 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1254 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1255 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1256 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1257 };
1258 
1259 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1260 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1261 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1262 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1263 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1264 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1265 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1266 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1267 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1268 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1269 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1270 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1271 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1272 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1273 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1274 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1275 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1276 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1277 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1278 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1279 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1280 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1281 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1282 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1283 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1284 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1285 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1286 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1287 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1288 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1289 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1290 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1291 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1292 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1293 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1294 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1295 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1296 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1297 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1298 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1299 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1300 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1301 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1302 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1303 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1304 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1305 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1306 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1307 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1308 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1309 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1310 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1311 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1312 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1313 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1314 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1315 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1316 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1317 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1318 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1319 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1320 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1321 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1322 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1323 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1324 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1325 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1326 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1327 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1328 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1329 };
1330 
1331 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1332 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1333 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1334 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1335 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1336 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1337 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1338 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1339 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1340 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1341 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1342 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1343 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1344 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1345 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1346 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1347 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1348 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1349 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1350 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1351 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1352 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1353 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1354 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1355 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1356 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1357 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1358 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1359 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1360 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1361 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1362 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1363 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1364 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1365 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1366 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1367 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1368 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1369 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1370 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1371 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1372 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1373 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1374 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1375 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1376 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1377 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1378 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1379 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1380 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1381 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1382 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1383 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1384 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1385 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1386 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1387 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1388 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1389 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1390 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1391 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1392 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1393 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1394 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1395 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1396 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1397 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1398 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1399 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1400 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1401 };
1402 
1403 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1404 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1405 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1406 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1407 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1408 };
1409 
1410 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1411 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1412 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1413 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1414 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1415 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1416 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1417 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1418 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1419 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1420 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1421 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1422 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1423 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1424 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1425 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1426 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1427 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1428 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1429 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1430 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1431 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1432 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1433 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1434 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1435 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1436 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1437 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1438 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1439 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1440 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1441 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1442 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1443 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1444 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1445 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1446 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1447 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1448 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1449 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1450 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1451 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1452 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1453 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1454 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1455 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1456 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1457 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1458 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1459 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1460 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1461 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1462 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1463 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1464 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1465 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1466 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1467 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1468 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1469 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1470 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1471 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1472 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1473 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1474 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1475 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1476 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1477 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1478 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1479 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1480 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1481 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1482 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1483 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1484 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1485 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1486 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1487 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1488 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1489 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1490 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1491 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1492 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1493 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1494 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1495 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1496 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1497 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1498 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1499 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1500 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1501 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1502 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1503 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1504 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1505 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1506 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1507 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1508 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1509 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1510 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1511 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1512 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1513 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1514 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1515 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1516 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1517 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1518 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1519 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1520 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1521 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1522 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1523 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1524 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1525 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1526 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1527 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1528 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1529 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1530 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1531 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1532 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1533 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1534 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1535 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1536 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1537 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1538 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1539 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1540 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1541 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1542 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1543 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1544 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1545 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1546 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1547 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1548 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1549 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1550 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1551 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1552 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1553 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1554 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1555 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1556 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1557 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1558 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1559 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1560 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1561 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1562 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1563 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1564 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1565 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1566 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1567 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1568 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1569 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1570 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1571 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1572 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1573 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1574 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1575 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1576 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1577 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1578 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1579 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1580 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1581 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1582 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1583 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1584 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1585 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1586 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1587 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1588 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1589 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1590 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1591 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1592 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1593 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1594 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1595 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1596 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1597 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1598 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1599 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1600 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1601 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1602 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1603 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1604 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1605 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1606 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1607 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1608 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1609 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1610 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1611 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1612 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1613 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1614 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1615 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1616 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1617 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1618 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1619 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1620 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1621 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1622 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1623 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1624 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1625 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1626 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1627 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1628 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1629 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1630 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1631 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1632 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1633 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1634 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1635 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1636 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1637 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1638 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1639 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1640 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1641 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1642 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1643 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1644 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1645 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1646 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1647 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1648 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1649 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1650 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1651 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1652 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1653 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1654 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1655 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1656 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1657 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1658 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1659 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1660 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1661 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1662 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1663 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1664 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1665 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1666 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1667 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1668 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1669 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1670 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1671 };
1672 
1673 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1674 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1675 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1676 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1677 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1678 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1679 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1680 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1681 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1682 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1683 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1684 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1685 };
1686 
1687 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1688 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1689 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1690 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1691 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1692 };
1693 
1694 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1695 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1696 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1697 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1698 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1699 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1700 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1701 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1702 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1703 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1704 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1705 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1706 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1707 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1708 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1709 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1710 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1711 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1712 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1713 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1714 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1715 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1716 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1717 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1718 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1719 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1720 };
1721 
1722 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1723 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1724 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1725 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1726 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1727 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1728 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1729 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1730 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1731 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1732 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1733 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1734 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1735 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1736 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1737 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1738 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1739 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1740 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1741 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1742 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1743 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1744 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1745 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1746 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1747 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1748 };
1749 
1750 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1751 	[ROTATOR_ID_0] = mmROT0_BASE,
1752 	[ROTATOR_ID_1] = mmROT1_BASE
1753 };
1754 
1755 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1756 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1757 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1758 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1759 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1760 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1761 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1762 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1763 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1764 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1765 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1766 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1767 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1768 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1769 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1770 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1771 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1772 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1773 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1774 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1775 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1776 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1777 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1778 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1779 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1780 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1781 };
1782 
1783 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1784 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1785 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1786 };
1787 
1788 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1789 	[GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1790 	[GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1791 	[GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1792 	[GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1793 	[GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1794 	[GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1795 	[GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1796 	[GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1797 	[GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1798 	[GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1799 	[GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1800 	[GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1801 	[GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1802 	[GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1803 	[GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1804 	[GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1805 	[GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1806 	[GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1807 	[GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1808 	[GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1809 	[GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1810 	[GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1811 	[GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1812 	[GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1813 	/* the PCI TPC is placed last (mapped liked HW) */
1814 	[GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1815 };
1816 
1817 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1818 	[GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1819 	[GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1820 	[GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1821 	[GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1822 };
1823 
1824 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1825 	[GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1826 	[GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1827 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1828 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1829 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1830 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1831 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1832 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1833 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1834 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1835 	[GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1836 };
1837 
1838 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1839 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1840 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1841 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1842 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1843 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1844 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1845 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1846 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1847 };
1848 
1849 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1850 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1851 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1852 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1853 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1854 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1855 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1856 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1857 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1858 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1859 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1860 };
1861 
1862 enum rtr_id {
1863 	DCORE0_RTR0,
1864 	DCORE0_RTR1,
1865 	DCORE0_RTR2,
1866 	DCORE0_RTR3,
1867 	DCORE0_RTR4,
1868 	DCORE0_RTR5,
1869 	DCORE0_RTR6,
1870 	DCORE0_RTR7,
1871 	DCORE1_RTR0,
1872 	DCORE1_RTR1,
1873 	DCORE1_RTR2,
1874 	DCORE1_RTR3,
1875 	DCORE1_RTR4,
1876 	DCORE1_RTR5,
1877 	DCORE1_RTR6,
1878 	DCORE1_RTR7,
1879 	DCORE2_RTR0,
1880 	DCORE2_RTR1,
1881 	DCORE2_RTR2,
1882 	DCORE2_RTR3,
1883 	DCORE2_RTR4,
1884 	DCORE2_RTR5,
1885 	DCORE2_RTR6,
1886 	DCORE2_RTR7,
1887 	DCORE3_RTR0,
1888 	DCORE3_RTR1,
1889 	DCORE3_RTR2,
1890 	DCORE3_RTR3,
1891 	DCORE3_RTR4,
1892 	DCORE3_RTR5,
1893 	DCORE3_RTR6,
1894 	DCORE3_RTR7,
1895 };
1896 
1897 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1898 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1899 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1900 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1901 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1902 	DCORE0_RTR0
1903 };
1904 
1905 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1906 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1907 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1908 	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1909 	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1910 	DCORE0_RTR0
1911 };
1912 
1913 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1914 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1915 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1916 };
1917 
1918 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1919 	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1920 	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1921 };
1922 
1923 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1924 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1925 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1926 };
1927 
1928 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1929 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1930 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1931 };
1932 
1933 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1934 	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1935 	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1936 	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1937 	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1938 	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1939 	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1940 	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1941 	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1942 };
1943 
1944 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1945 	DCORE0_RTR0, DCORE0_RTR0
1946 };
1947 
1948 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1949 	DCORE0_RTR2, DCORE0_RTR2
1950 };
1951 
1952 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1953 	DCORE2_RTR0, DCORE3_RTR7
1954 };
1955 
1956 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1957 	DCORE2_RTR2, DCORE3_RTR5
1958 };
1959 
1960 struct mme_initiators_rtr_id {
1961 	u32 wap0;
1962 	u32 wap1;
1963 	u32 write;
1964 	u32 read;
1965 	u32 sbte0;
1966 	u32 sbte1;
1967 	u32 sbte2;
1968 	u32 sbte3;
1969 	u32 sbte4;
1970 };
1971 
1972 enum mme_initiators {
1973 	MME_WAP0 = 0,
1974 	MME_WAP1,
1975 	MME_WRITE,
1976 	MME_READ,
1977 	MME_SBTE0,
1978 	MME_SBTE1,
1979 	MME_SBTE2,
1980 	MME_SBTE3,
1981 	MME_SBTE4,
1982 	MME_INITIATORS_MAX
1983 };
1984 
1985 static const struct mme_initiators_rtr_id
1986 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1987 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1988 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1989 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1990 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1991 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1992 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1993 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1994 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1995 };
1996 
1997 enum razwi_event_sources {
1998 	RAZWI_TPC,
1999 	RAZWI_MME,
2000 	RAZWI_EDMA,
2001 	RAZWI_PDMA,
2002 	RAZWI_NIC,
2003 	RAZWI_DEC,
2004 	RAZWI_ROT
2005 };
2006 
2007 struct hbm_mc_error_causes {
2008 	u32 mask;
2009 	char cause[50];
2010 };
2011 
2012 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2013 
2014 /* Special blocks iterator is currently used to configure security protection bits,
2015  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2016  * must be skipped. Following configurations are commonly used for both PB config
2017  * and global error reading, since currently they both share the same settings.
2018  * Once it changes, we must remember to use separate configurations for either one.
2019  */
2020 static int gaudi2_iterator_skip_block_types[] = {
2021 		GAUDI2_BLOCK_TYPE_PLL,
2022 		GAUDI2_BLOCK_TYPE_EU_BIST,
2023 		GAUDI2_BLOCK_TYPE_HBM,
2024 		GAUDI2_BLOCK_TYPE_XFT
2025 };
2026 
2027 static struct range gaudi2_iterator_skip_block_ranges[] = {
2028 		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2029 		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2030 		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2031 		/* Skip all CPU blocks except for CPU_IF */
2032 		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2033 		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2034 };
2035 
2036 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2037 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2038 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2039 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2040 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2041 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2042 };
2043 
2044 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2045 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2046 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2047 	[HBM_SEI_READ_ERR] = "SEI read data error",
2048 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2049 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2050 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2051 	[HBM_SEI_DFI] = "SEI DFI error",
2052 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2053 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2054 };
2055 
2056 struct mmu_spi_sei_cause {
2057 	char cause[50];
2058 	int clear_bit;
2059 };
2060 
2061 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2062 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
2063 	{"page access", 1},		/* INTERRUPT_CLR[1] */
2064 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
2065 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
2066 	{"mmu rei0", -1},		/* no clear register bit */
2067 	{"mmu rei1", -1},		/* no clear register bit */
2068 	{"stlb rei0", -1},		/* no clear register bit */
2069 	{"stlb rei1", -1},		/* no clear register bit */
2070 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
2071 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
2072 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
2073 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
2074 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2075 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2076 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2077 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2078 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
2079 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
2080 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
2081 };
2082 
2083 struct gaudi2_cache_invld_params {
2084 	u64 start_va;
2085 	u64 end_va;
2086 	u32 inv_start_val;
2087 	u32 flags;
2088 	bool range_invalidation;
2089 };
2090 
2091 struct gaudi2_tpc_idle_data {
2092 	struct engines_data *e;
2093 	unsigned long *mask;
2094 	bool *is_idle;
2095 	const char *tpc_fmt;
2096 };
2097 
2098 struct gaudi2_tpc_mmu_data {
2099 	u32 rw_asid;
2100 };
2101 
2102 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2103 
2104 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2105 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2106 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2107 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2108 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2109 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2110 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2111 										bool is_memset);
2112 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2113 		struct engines_data *e);
2114 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2115 		struct engines_data *e);
2116 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2117 		struct engines_data *e);
2118 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2119 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2120 
gaudi2_init_scrambler_hbm(struct hl_device * hdev)2121 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2122 {
2123 
2124 }
2125 
gaudi2_get_signal_cb_size(struct hl_device * hdev)2126 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2127 {
2128 	return sizeof(struct packet_msg_short);
2129 }
2130 
gaudi2_get_wait_cb_size(struct hl_device * hdev)2131 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2132 {
2133 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2134 }
2135 
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)2136 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2137 {
2138 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2139 	int dcore, inst, tpc_seq;
2140 	u32 offset;
2141 
2142 	/* init the return code */
2143 	ctx->rc = 0;
2144 
2145 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2146 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2147 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2148 
2149 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2150 				continue;
2151 
2152 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2153 
2154 			ctx->fn(hdev, dcore, inst, offset, ctx);
2155 			if (ctx->rc) {
2156 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2157 							dcore, inst);
2158 				return;
2159 			}
2160 		}
2161 	}
2162 
2163 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2164 		return;
2165 
2166 	/* special check for PCI TPC (DCORE0_TPC6) */
2167 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2168 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2169 	if (ctx->rc)
2170 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2171 }
2172 
gaudi2_host_phys_addr_valid(u64 addr)2173 static bool gaudi2_host_phys_addr_valid(u64 addr)
2174 {
2175 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2176 		return true;
2177 
2178 	return false;
2179 }
2180 
set_number_of_functional_hbms(struct hl_device * hdev)2181 static int set_number_of_functional_hbms(struct hl_device *hdev)
2182 {
2183 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2184 	u8 faulty_hbms = hweight64(hdev->dram_binning);
2185 
2186 	/* check if all HBMs should be used */
2187 	if (!faulty_hbms) {
2188 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2189 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
2190 		return 0;
2191 	}
2192 
2193 	/*
2194 	 * check for error condition in which number of binning
2195 	 * candidates is higher than the maximum supported by the
2196 	 * driver (in which case binning mask shall be ignored and driver will
2197 	 * set the default)
2198 	 */
2199 	if (faulty_hbms > MAX_FAULTY_HBMS) {
2200 		dev_err(hdev->dev,
2201 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2202 			MAX_FAULTY_HBMS, hdev->dram_binning);
2203 		return -EINVAL;
2204 	}
2205 
2206 	/*
2207 	 * by default, number of functional HBMs in Gaudi2 is always
2208 	 * GAUDI2_HBM_NUM - 1.
2209 	 */
2210 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2211 	return 0;
2212 }
2213 
gaudi2_set_dram_properties(struct hl_device * hdev)2214 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2215 {
2216 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2217 	u32 basic_hbm_page_size;
2218 	int rc;
2219 
2220 	rc = set_number_of_functional_hbms(hdev);
2221 	if (rc)
2222 		return -EINVAL;
2223 
2224 	/*
2225 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2226 	 * in which we are using x16 bigger page size to be able to populate the entire
2227 	 * HBM mappings in the TLB
2228 	 */
2229 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2230 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2231 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2232 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
2233 	prop->dram_base_address = DRAM_PHYS_BASE;
2234 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2235 	prop->dram_supports_virtual_memory = true;
2236 
2237 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2238 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2239 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2240 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2241 
2242 	/* since DRAM page size differs from DMMU page size we need to allocate
2243 	 * DRAM memory in units of dram_page size and mapping this memory in
2244 	 * units of DMMU page size. we overcome this size mismatch using a
2245 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
2246 	 * page.
2247 	 * We therefore:
2248 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
2249 	 *    (suppose we get n such pages)
2250 	 * 2. limit the amount of virtual address space we got from 1 above to
2251 	 *    a multiple of 64M as we don't want the scrambled address to cross
2252 	 *    the DRAM virtual address space.
2253 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2254 	 * 3. determine the and address accordingly
2255 	 *    end_addr = start_addr + m * 48M
2256 	 *
2257 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2258 	 */
2259 	prop->dmmu.start_addr = prop->dram_base_address +
2260 			(prop->dram_page_size *
2261 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2262 
2263 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2264 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2265 
2266 	return 0;
2267 }
2268 
gaudi2_set_fixed_properties(struct hl_device * hdev)2269 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2270 {
2271 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2272 	struct hw_queue_properties *q_props;
2273 	u32 num_sync_stream_queues = 0;
2274 	int i;
2275 
2276 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2277 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2278 					GFP_KERNEL);
2279 
2280 	if (!prop->hw_queues_props)
2281 		return -ENOMEM;
2282 
2283 	q_props = prop->hw_queues_props;
2284 
2285 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2286 		q_props[i].type = QUEUE_TYPE_HW;
2287 		q_props[i].driver_only = 0;
2288 
2289 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2290 			q_props[i].supports_sync_stream = 0;
2291 		} else {
2292 			q_props[i].supports_sync_stream = 1;
2293 			num_sync_stream_queues++;
2294 		}
2295 
2296 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2297 	}
2298 
2299 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2300 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2301 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2302 
2303 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2304 	prop->cfg_base_address = CFG_BASE;
2305 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2306 	prop->host_base_address = HOST_PHYS_BASE_0;
2307 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2308 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2309 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2310 	prop->user_dec_intr_count = NUMBER_OF_DEC;
2311 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2312 	prop->completion_mode = HL_COMPLETION_MODE_CS;
2313 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2314 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2315 
2316 	prop->sram_base_address = SRAM_BASE_ADDR;
2317 	prop->sram_size = SRAM_SIZE;
2318 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2319 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2320 
2321 	prop->hints_range_reservation = true;
2322 
2323 	prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2324 
2325 	if (hdev->pldm)
2326 		prop->mmu_pgt_size = 0x800000; /* 8MB */
2327 	else
2328 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2329 
2330 	prop->mmu_pte_size = HL_PTE_SIZE;
2331 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2332 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2333 
2334 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2335 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2336 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2337 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2338 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2339 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2340 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2341 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2342 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2343 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2344 	prop->dmmu.page_size = PAGE_SIZE_1GB;
2345 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2346 	prop->dmmu.last_mask = LAST_MASK;
2347 	prop->dmmu.host_resident = 1;
2348 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2349 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2350 
2351 	/*
2352 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
2353 	 * the addresses and allocated space for FW image does not cross memory bounds).
2354 	 * for this reason we set the DRAM size to the minimum possible and later it will
2355 	 * be modified according to what reported in the cpucp info packet
2356 	 */
2357 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2358 
2359 	hdev->pmmu_huge_range = true;
2360 	prop->pmmu.host_resident = 1;
2361 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2362 	prop->pmmu.last_mask = LAST_MASK;
2363 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2364 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2365 
2366 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2367 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2368 	prop->hints_host_hpage_reserved_va_range.start_addr =
2369 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2370 	prop->hints_host_hpage_reserved_va_range.end_addr =
2371 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2372 
2373 	if (PAGE_SIZE == SZ_64K) {
2374 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2375 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2376 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2377 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2378 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2379 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2380 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2381 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2382 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2383 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2384 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2385 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2386 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2387 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2388 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2389 
2390 		/* shifts and masks are the same in PMMU and HPMMU */
2391 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2392 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2393 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2394 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2395 	} else {
2396 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2397 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2398 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2399 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2400 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2401 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2402 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2403 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2404 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2405 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2406 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2407 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2408 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2409 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2410 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2411 
2412 		/* shifts and masks are the same in PMMU and HPMMU */
2413 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2414 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2415 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2416 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2417 	}
2418 
2419 	prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2420 	prop->num_engine_cores = CPU_ID_MAX;
2421 	prop->cfg_size = CFG_SIZE;
2422 	prop->max_asid = MAX_ASID;
2423 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2424 
2425 	prop->supports_engine_modes = true;
2426 
2427 	prop->dc_power_default = DC_POWER_DEFAULT;
2428 
2429 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2430 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2431 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2432 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2433 
2434 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2435 
2436 	prop->mme_master_slave_mode = 1;
2437 
2438 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2439 					(num_sync_stream_queues * HL_RSVD_SOBS);
2440 
2441 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2442 					(num_sync_stream_queues * HL_RSVD_MONS);
2443 
2444 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2445 	prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2446 	prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2447 
2448 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2449 
2450 	prop->fw_cpu_boot_dev_sts0_valid = false;
2451 	prop->fw_cpu_boot_dev_sts1_valid = false;
2452 	prop->hard_reset_done_by_fw = false;
2453 	prop->gic_interrupts_enable = true;
2454 
2455 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2456 
2457 	prop->max_dec = NUMBER_OF_DEC;
2458 
2459 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2460 
2461 	prop->dma_mask = 64;
2462 
2463 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2464 
2465 	return 0;
2466 }
2467 
gaudi2_pci_bars_map(struct hl_device * hdev)2468 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2469 {
2470 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2471 	bool is_wc[3] = {false, false, true};
2472 	int rc;
2473 
2474 	rc = hl_pci_bars_map(hdev, name, is_wc);
2475 	if (rc)
2476 		return rc;
2477 
2478 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2479 
2480 	return 0;
2481 }
2482 
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2483 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2484 {
2485 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2486 	struct hl_inbound_pci_region pci_region;
2487 	u64 old_addr = addr;
2488 	int rc;
2489 
2490 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2491 		return old_addr;
2492 
2493 	if (hdev->asic_prop.iatu_done_by_fw)
2494 		return U64_MAX;
2495 
2496 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2497 	pci_region.mode = PCI_BAR_MATCH_MODE;
2498 	pci_region.bar = DRAM_BAR_ID;
2499 	pci_region.addr = addr;
2500 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2501 	if (rc)
2502 		return U64_MAX;
2503 
2504 	if (gaudi2) {
2505 		old_addr = gaudi2->dram_bar_cur_addr;
2506 		gaudi2->dram_bar_cur_addr = addr;
2507 	}
2508 
2509 	return old_addr;
2510 }
2511 
gaudi2_init_iatu(struct hl_device * hdev)2512 static int gaudi2_init_iatu(struct hl_device *hdev)
2513 {
2514 	struct hl_inbound_pci_region inbound_region;
2515 	struct hl_outbound_pci_region outbound_region;
2516 	u32 bar_addr_low, bar_addr_high;
2517 	int rc;
2518 
2519 	if (hdev->asic_prop.iatu_done_by_fw)
2520 		return 0;
2521 
2522 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2523 	 * We must map this region in BAR match mode in order to
2524 	 * fetch BAR physical base address
2525 	 */
2526 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2527 	inbound_region.bar = SRAM_CFG_BAR_ID;
2528 	/* Base address must be aligned to Bar size which is 256 MB */
2529 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2530 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2531 	if (rc)
2532 		return rc;
2533 
2534 	/* Fetch physical BAR address */
2535 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2536 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2537 
2538 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2539 
2540 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2541 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2542 	inbound_region.bar = SRAM_CFG_BAR_ID;
2543 	inbound_region.offset_in_bar = 0;
2544 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2545 	inbound_region.size = CFG_REGION_SIZE;
2546 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2547 	if (rc)
2548 		return rc;
2549 
2550 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2551 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2552 	inbound_region.bar = SRAM_CFG_BAR_ID;
2553 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2554 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2555 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2556 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2557 	if (rc)
2558 		return rc;
2559 
2560 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2561 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2562 	inbound_region.bar = DRAM_BAR_ID;
2563 	inbound_region.addr = DRAM_PHYS_BASE;
2564 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2565 	if (rc)
2566 		return rc;
2567 
2568 	/* Outbound Region 0 - Point to Host */
2569 	outbound_region.addr = HOST_PHYS_BASE_0;
2570 	outbound_region.size = HOST_PHYS_SIZE_0;
2571 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2572 
2573 	return rc;
2574 }
2575 
gaudi2_get_hw_state(struct hl_device * hdev)2576 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2577 {
2578 	return RREG32(mmHW_STATE);
2579 }
2580 
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2581 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2582 {
2583 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2584 
2585 	/*
2586 	 * check for error condition in which number of binning candidates
2587 	 * is higher than the maximum supported by the driver
2588 	 */
2589 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2590 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2591 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2592 					hdev->tpc_binning);
2593 		return -EINVAL;
2594 	}
2595 
2596 	prop->tpc_binning_mask = hdev->tpc_binning;
2597 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2598 
2599 	return 0;
2600 }
2601 
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2602 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2603 {
2604 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2605 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2606 	u64 tpc_binning_mask;
2607 	u8 subst_idx = 0;
2608 	int i, rc;
2609 
2610 	rc = gaudi2_tpc_binning_init_prop(hdev);
2611 	if (rc)
2612 		return rc;
2613 
2614 	tpc_binning_mask = prop->tpc_binning_mask;
2615 
2616 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2617 		u8 subst_seq, binned, qid_base;
2618 
2619 		if (tpc_binning_mask == 0)
2620 			break;
2621 
2622 		if (subst_idx == 0) {
2623 			subst_seq = TPC_ID_DCORE0_TPC6;
2624 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2625 		} else {
2626 			subst_seq = TPC_ID_DCORE3_TPC5;
2627 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2628 		}
2629 
2630 
2631 		/* clear bit from mask */
2632 		binned = __ffs(tpc_binning_mask);
2633 		/*
2634 		 * Coverity complains about possible out-of-bound access in
2635 		 * clear_bit
2636 		 */
2637 		if (binned >= TPC_ID_SIZE) {
2638 			dev_err(hdev->dev,
2639 				"Invalid binned TPC (binning mask: %llx)\n",
2640 				tpc_binning_mask);
2641 			return -EINVAL;
2642 		}
2643 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2644 
2645 		/* also clear replacing TPC bit from enabled mask */
2646 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2647 
2648 		/* bin substite TPC's Qs */
2649 		q_props[qid_base].binned = 1;
2650 		q_props[qid_base + 1].binned = 1;
2651 		q_props[qid_base + 2].binned = 1;
2652 		q_props[qid_base + 3].binned = 1;
2653 
2654 		subst_idx++;
2655 	}
2656 
2657 	return 0;
2658 }
2659 
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2660 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2661 {
2662 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2663 	u8 num_faulty;
2664 
2665 	num_faulty = hweight32(hdev->decoder_binning);
2666 
2667 	/*
2668 	 * check for error condition in which number of binning candidates
2669 	 * is higher than the maximum supported by the driver
2670 	 */
2671 	if (num_faulty > MAX_FAULTY_DECODERS) {
2672 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2673 						hdev->decoder_binning);
2674 		return -EINVAL;
2675 	}
2676 
2677 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2678 
2679 	if (prop->decoder_binning_mask)
2680 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2681 	else
2682 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2683 
2684 	return 0;
2685 }
2686 
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2687 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2688 {
2689 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2690 
2691 	/* check if we should override default binning */
2692 	if (!hdev->dram_binning) {
2693 		prop->dram_binning_mask = 0;
2694 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2695 		return;
2696 	}
2697 
2698 	/* set DRAM binning constraints */
2699 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2700 	prop->dram_binning_mask = hdev->dram_binning;
2701 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2702 }
2703 
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2704 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2705 {
2706 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2707 	struct hw_queue_properties *q_props;
2708 	u8 seq, num_faulty;
2709 
2710 	num_faulty = hweight32(hdev->edma_binning);
2711 
2712 	/*
2713 	 * check for error condition in which number of binning candidates
2714 	 * is higher than the maximum supported by the driver
2715 	 */
2716 	if (num_faulty > MAX_FAULTY_EDMAS) {
2717 		dev_err(hdev->dev,
2718 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2719 			hdev->edma_binning);
2720 		return -EINVAL;
2721 	}
2722 
2723 	if (!hdev->edma_binning) {
2724 		prop->edma_binning_mask = 0;
2725 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2726 		return 0;
2727 	}
2728 
2729 	seq = __ffs((unsigned long)hdev->edma_binning);
2730 
2731 	/* set binning constraints */
2732 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2733 	prop->edma_binning_mask = hdev->edma_binning;
2734 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2735 
2736 	/* bin substitute EDMA's queue */
2737 	q_props = prop->hw_queues_props;
2738 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2739 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2740 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2741 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2742 
2743 	return 0;
2744 }
2745 
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2746 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2747 {
2748 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2749 	u8 num_faulty, seq;
2750 
2751 	/* check if we should override default binning */
2752 	if (!xbar_edge_iso_mask) {
2753 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2754 		return 0;
2755 	}
2756 
2757 	/*
2758 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2759 	 * only the FW can set a redundancy value). for user it'll always be 0.
2760 	 */
2761 	num_faulty = hweight32(xbar_edge_iso_mask);
2762 
2763 	/*
2764 	 * check for error condition in which number of binning candidates
2765 	 * is higher than the maximum supported by the driver
2766 	 */
2767 	if (num_faulty > MAX_FAULTY_XBARS) {
2768 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2769 									MAX_FAULTY_XBARS);
2770 		return -EINVAL;
2771 	}
2772 
2773 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2774 
2775 	/* set binning constraints */
2776 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2777 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2778 
2779 	return 0;
2780 }
2781 
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2782 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2783 {
2784 	int rc;
2785 
2786 	/*
2787 	 * mark all clusters as good, each component will "fail" cluster
2788 	 * based on eFuse/user values.
2789 	 * If more than single cluster is faulty- the chip is unusable
2790 	 */
2791 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2792 
2793 	gaudi2_set_dram_binning_masks(hdev);
2794 
2795 	rc = gaudi2_set_edma_binning_masks(hdev);
2796 	if (rc)
2797 		return rc;
2798 
2799 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2800 	if (rc)
2801 		return rc;
2802 
2803 
2804 	/* always initially set to full mask */
2805 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2806 
2807 	return 0;
2808 }
2809 
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2810 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2811 {
2812 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2813 	int rc;
2814 
2815 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2816 	if (rc)
2817 		return rc;
2818 
2819 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2820 	if (prop->faulty_dram_cluster_map) {
2821 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2822 
2823 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2824 	}
2825 
2826 	return 0;
2827 }
2828 
gaudi2_set_binning_masks(struct hl_device * hdev)2829 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2830 {
2831 	int rc;
2832 
2833 	rc = gaudi2_set_cluster_binning_masks(hdev);
2834 	if (rc)
2835 		return rc;
2836 
2837 	rc = gaudi2_set_tpc_binning_masks(hdev);
2838 	if (rc)
2839 		return rc;
2840 
2841 	rc = gaudi2_set_dec_binning_masks(hdev);
2842 	if (rc)
2843 		return rc;
2844 
2845 	return 0;
2846 }
2847 
gaudi2_cpucp_info_get(struct hl_device * hdev)2848 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2849 {
2850 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2851 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2852 	long max_power;
2853 	u64 dram_size;
2854 	int rc;
2855 
2856 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2857 		return 0;
2858 
2859 	/* No point of asking this information again when not doing hard reset, as the device
2860 	 * CPU hasn't been reset
2861 	 */
2862 	if (hdev->reset_info.in_compute_reset)
2863 		return 0;
2864 
2865 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2866 										mmCPU_BOOT_ERR1);
2867 	if (rc)
2868 		return rc;
2869 
2870 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2871 	if (dram_size) {
2872 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2873 
2874 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2875 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2876 			dev_err(hdev->dev,
2877 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2878 				dram_size, prop->dram_size);
2879 			dram_size = prop->dram_size;
2880 		}
2881 
2882 		prop->dram_size = dram_size;
2883 		prop->dram_end_address = prop->dram_base_address + dram_size;
2884 	}
2885 
2886 	if (!strlen(prop->cpucp_info.card_name))
2887 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2888 
2889 	/* Overwrite binning masks with the actual binning values from F/W */
2890 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2891 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2892 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2893 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2894 
2895 	dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2896 			hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2897 			hdev->decoder_binning);
2898 
2899 	/*
2900 	 * at this point the DRAM parameters need to be updated according to data obtained
2901 	 * from the FW
2902 	 */
2903 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2904 	if (rc)
2905 		return rc;
2906 
2907 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2908 	if (rc)
2909 		return rc;
2910 
2911 	max_power = hl_fw_get_max_power(hdev);
2912 	if (max_power < 0)
2913 		return max_power;
2914 
2915 	prop->max_power_default = (u64) max_power;
2916 
2917 	return 0;
2918 }
2919 
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)2920 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2921 {
2922 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2923 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2924 	int rc;
2925 
2926 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2927 		return 0;
2928 
2929 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2930 	if (rc)
2931 		return rc;
2932 
2933 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2934 
2935 	return 0;
2936 }
2937 
gaudi2_early_init(struct hl_device * hdev)2938 static int gaudi2_early_init(struct hl_device *hdev)
2939 {
2940 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2941 	struct pci_dev *pdev = hdev->pdev;
2942 	resource_size_t pci_bar_size;
2943 	int rc;
2944 
2945 	rc = gaudi2_set_fixed_properties(hdev);
2946 	if (rc)
2947 		return rc;
2948 
2949 	/* Check BAR sizes */
2950 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2951 
2952 	if (pci_bar_size != CFG_BAR_SIZE) {
2953 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2954 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2955 		rc = -ENODEV;
2956 		goto free_queue_props;
2957 	}
2958 
2959 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2960 	if (pci_bar_size != MSIX_BAR_SIZE) {
2961 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2962 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2963 		rc = -ENODEV;
2964 		goto free_queue_props;
2965 	}
2966 
2967 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2968 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2969 
2970 	/*
2971 	 * Only in pldm driver config iATU
2972 	 */
2973 	if (hdev->pldm)
2974 		hdev->asic_prop.iatu_done_by_fw = false;
2975 	else
2976 		hdev->asic_prop.iatu_done_by_fw = true;
2977 
2978 	rc = hl_pci_init(hdev);
2979 	if (rc)
2980 		goto free_queue_props;
2981 
2982 	/* Before continuing in the initialization, we need to read the preboot
2983 	 * version to determine whether we run with a security-enabled firmware
2984 	 */
2985 	rc = hl_fw_read_preboot_status(hdev);
2986 	if (rc) {
2987 		if (hdev->reset_on_preboot_fail)
2988 			/* we are already on failure flow, so don't check if hw_fini fails. */
2989 			hdev->asic_funcs->hw_fini(hdev, true, false);
2990 		goto pci_fini;
2991 	}
2992 
2993 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2994 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2995 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2996 		if (rc) {
2997 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
2998 			goto pci_fini;
2999 		}
3000 	}
3001 
3002 	return 0;
3003 
3004 pci_fini:
3005 	hl_pci_fini(hdev);
3006 free_queue_props:
3007 	kfree(hdev->asic_prop.hw_queues_props);
3008 	return rc;
3009 }
3010 
gaudi2_early_fini(struct hl_device * hdev)3011 static int gaudi2_early_fini(struct hl_device *hdev)
3012 {
3013 	kfree(hdev->asic_prop.hw_queues_props);
3014 	hl_pci_fini(hdev);
3015 
3016 	return 0;
3017 }
3018 
gaudi2_is_arc_nic_owned(u64 arc_id)3019 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3020 {
3021 	switch (arc_id) {
3022 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3023 		return true;
3024 	default:
3025 		return false;
3026 	}
3027 }
3028 
gaudi2_is_arc_tpc_owned(u64 arc_id)3029 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3030 {
3031 	switch (arc_id) {
3032 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3033 		return true;
3034 	default:
3035 		return false;
3036 	}
3037 }
3038 
gaudi2_init_arcs(struct hl_device * hdev)3039 static void gaudi2_init_arcs(struct hl_device *hdev)
3040 {
3041 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3042 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3043 	u64 arc_id;
3044 	u32 i;
3045 
3046 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3047 		if (gaudi2_is_arc_enabled(hdev, i))
3048 			continue;
3049 
3050 		gaudi2_set_arc_id_cap(hdev, i);
3051 	}
3052 
3053 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3054 		if (!gaudi2_is_queue_enabled(hdev, i))
3055 			continue;
3056 
3057 		arc_id = gaudi2_queue_id_to_arc_id[i];
3058 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3059 			continue;
3060 
3061 		if (gaudi2_is_arc_nic_owned(arc_id) &&
3062 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3063 			continue;
3064 
3065 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3066 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3067 			continue;
3068 
3069 		gaudi2_set_arc_id_cap(hdev, arc_id);
3070 	}
3071 
3072 	/* Fetch ARC scratchpad address */
3073 	hdev->asic_prop.engine_core_interrupt_reg_addr =
3074 		CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3075 }
3076 
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)3077 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3078 {
3079 	u32 reg_base, reg_val;
3080 	int rc;
3081 
3082 	switch (cpu_id) {
3083 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3084 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
3085 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3086 						ARC_DCCM_BLOCK_SIZE * 2, true);
3087 		if (rc)
3088 			return rc;
3089 		break;
3090 	case CPU_ID_SCHED_ARC4:
3091 	case CPU_ID_SCHED_ARC5:
3092 	case CPU_ID_MME_QMAN_ARC0:
3093 	case CPU_ID_MME_QMAN_ARC1:
3094 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
3095 
3096 		/* Scrub lower DCCM block */
3097 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3098 						ARC_DCCM_BLOCK_SIZE, true);
3099 		if (rc)
3100 			return rc;
3101 
3102 		/* Switch to upper DCCM block */
3103 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3104 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3105 
3106 		/* Scrub upper DCCM block */
3107 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3108 						ARC_DCCM_BLOCK_SIZE, true);
3109 		if (rc)
3110 			return rc;
3111 
3112 		/* Switch to lower DCCM block */
3113 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3114 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3115 		break;
3116 	default:
3117 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3118 						ARC_DCCM_BLOCK_SIZE, true);
3119 		if (rc)
3120 			return rc;
3121 	}
3122 
3123 	return 0;
3124 }
3125 
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)3126 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3127 {
3128 	u16 arc_id;
3129 	int rc;
3130 
3131 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3132 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
3133 			continue;
3134 
3135 		rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3136 		if (rc)
3137 			return rc;
3138 	}
3139 
3140 	return 0;
3141 }
3142 
gaudi2_late_init(struct hl_device * hdev)3143 static int gaudi2_late_init(struct hl_device *hdev)
3144 {
3145 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3146 	int rc;
3147 
3148 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
3149 
3150 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3151 					gaudi2->virt_msix_db_dma_addr);
3152 	if (rc) {
3153 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3154 		return rc;
3155 	}
3156 
3157 	rc = gaudi2_fetch_psoc_frequency(hdev);
3158 	if (rc) {
3159 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3160 		goto disable_pci_access;
3161 	}
3162 
3163 	gaudi2_init_arcs(hdev);
3164 
3165 	rc = gaudi2_scrub_arcs_dccm(hdev);
3166 	if (rc) {
3167 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3168 		goto disable_pci_access;
3169 	}
3170 
3171 	gaudi2_init_security(hdev);
3172 
3173 	return 0;
3174 
3175 disable_pci_access:
3176 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3177 
3178 	return rc;
3179 }
3180 
gaudi2_late_fini(struct hl_device * hdev)3181 static void gaudi2_late_fini(struct hl_device *hdev)
3182 {
3183 	hl_hwmon_release_resources(hdev);
3184 }
3185 
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)3186 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3187 {
3188 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3189 
3190 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3191 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3192 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3193 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3194 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3195 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3196 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3197 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3198 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3199 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3200 }
3201 
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)3202 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3203 {
3204 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3205 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3206 	u32 block_size, umr_start_idx, num_umr_blocks;
3207 	int i;
3208 
3209 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3210 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3211 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
3212 		else
3213 			block_size = ARC_DCCM_BLOCK_SIZE;
3214 
3215 		blocks[i].address = gaudi2_arc_dccm_bases[i];
3216 		blocks[i].size = block_size;
3217 	}
3218 
3219 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3220 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3221 
3222 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3223 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3224 
3225 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3226 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3227 
3228 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3229 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3230 
3231 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3232 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3233 
3234 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3235 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3236 
3237 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3238 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3239 
3240 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3241 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3242 
3243 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3244 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3245 	for (i = 0 ; i < num_umr_blocks ; i++) {
3246 		u8 nic_id, umr_block_id;
3247 
3248 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3249 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3250 
3251 		blocks[umr_start_idx + i].address =
3252 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3253 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3254 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3255 			umr_block_id * NIC_UMR_OFFSET;
3256 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3257 	}
3258 
3259 	/* Expose decoder HW configuration block to user */
3260 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3261 
3262 	for (i = 1; i < NUM_OF_DCORES; ++i) {
3263 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3264 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3265 
3266 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3267 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3268 
3269 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3270 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3271 	}
3272 }
3273 
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)3274 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3275 {
3276 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3277 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3278 	int i, j, rc = 0;
3279 
3280 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
3281 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3282 	 * range.
3283 	 */
3284 
3285 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3286 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3287 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3288 		if (!virt_addr_arr[i]) {
3289 			rc = -ENOMEM;
3290 			goto free_dma_mem_arr;
3291 		}
3292 
3293 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3294 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3295 			break;
3296 	}
3297 
3298 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3299 		dev_err(hdev->dev,
3300 			"MSB of ARC accessible DMA memory are not identical in all range\n");
3301 		rc = -EFAULT;
3302 		goto free_dma_mem_arr;
3303 	}
3304 
3305 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3306 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3307 
3308 free_dma_mem_arr:
3309 	for (j = 0 ; j < i ; j++)
3310 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3311 						dma_addr_arr[j]);
3312 
3313 	return rc;
3314 }
3315 
gaudi2_set_pci_memory_regions(struct hl_device * hdev)3316 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3317 {
3318 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3319 	struct pci_mem_region *region;
3320 
3321 	/* CFG */
3322 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
3323 	region->region_base = CFG_BASE;
3324 	region->region_size = CFG_SIZE;
3325 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3326 	region->bar_size = CFG_BAR_SIZE;
3327 	region->bar_id = SRAM_CFG_BAR_ID;
3328 	region->used = 1;
3329 
3330 	/* SRAM */
3331 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3332 	region->region_base = SRAM_BASE_ADDR;
3333 	region->region_size = SRAM_SIZE;
3334 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3335 	region->bar_size = CFG_BAR_SIZE;
3336 	region->bar_id = SRAM_CFG_BAR_ID;
3337 	region->used = 1;
3338 
3339 	/* DRAM */
3340 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3341 	region->region_base = DRAM_PHYS_BASE;
3342 	region->region_size = hdev->asic_prop.dram_size;
3343 	region->offset_in_bar = 0;
3344 	region->bar_size = prop->dram_pci_bar_size;
3345 	region->bar_id = DRAM_BAR_ID;
3346 	region->used = 1;
3347 }
3348 
gaudi2_user_interrupt_setup(struct hl_device * hdev)3349 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3350 {
3351 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3352 	int i, j, k;
3353 
3354 	/* Initialize TPC interrupt */
3355 	HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3356 
3357 	/* Initialize unexpected error interrupt */
3358 	HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3359 						HL_USR_INTERRUPT_UNEXPECTED);
3360 
3361 	/* Initialize common user CQ interrupt */
3362 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3363 				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3364 
3365 	/* Initialize common decoder interrupt */
3366 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3367 				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3368 
3369 	/* User interrupts structure holds both decoder and user interrupts from various engines.
3370 	 * We first initialize the decoder interrupts and then we add the user interrupts.
3371 	 * The only limitation is that the last decoder interrupt id must be smaller
3372 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3373 	 */
3374 
3375 	/* Initialize decoder interrupts, expose only normal interrupts,
3376 	 * error interrupts to be handled by driver
3377 	 */
3378 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3379 										i += 2, j++)
3380 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3381 						HL_USR_INTERRUPT_DECODER);
3382 
3383 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3384 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3385 }
3386 
gaudi2_get_non_zero_random_int(void)3387 static inline int gaudi2_get_non_zero_random_int(void)
3388 {
3389 	int rand = get_random_u32();
3390 
3391 	return rand ? rand : 1;
3392 }
3393 
gaudi2_special_blocks_free(struct hl_device * hdev)3394 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3395 {
3396 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3397 	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3398 			&prop->skip_special_blocks_cfg;
3399 
3400 	kfree(prop->special_blocks);
3401 	kfree(skip_special_blocks_cfg->block_types);
3402 	kfree(skip_special_blocks_cfg->block_ranges);
3403 }
3404 
gaudi2_special_blocks_iterator_free(struct hl_device * hdev)3405 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3406 {
3407 	gaudi2_special_blocks_free(hdev);
3408 }
3409 
gaudi2_special_block_skip(struct hl_device * hdev,struct hl_special_blocks_cfg * special_blocks_cfg,u32 blk_idx,u32 major,u32 minor,u32 sub_minor)3410 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3411 		struct hl_special_blocks_cfg *special_blocks_cfg,
3412 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3413 {
3414 	return false;
3415 }
3416 
gaudi2_special_blocks_config(struct hl_device * hdev)3417 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3418 {
3419 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3420 	int i, rc;
3421 
3422 	/* Configure Special blocks */
3423 	prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3424 	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3425 	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3426 			sizeof(*prop->special_blocks), GFP_KERNEL);
3427 	if (!prop->special_blocks)
3428 		return -ENOMEM;
3429 
3430 	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3431 		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3432 				sizeof(*prop->special_blocks));
3433 
3434 	/* Configure when to skip Special blocks */
3435 	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3436 	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3437 
3438 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3439 		prop->skip_special_blocks_cfg.block_types =
3440 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3441 					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3442 		if (!prop->skip_special_blocks_cfg.block_types) {
3443 			rc = -ENOMEM;
3444 			goto free_special_blocks;
3445 		}
3446 
3447 		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3448 				sizeof(gaudi2_iterator_skip_block_types));
3449 
3450 		prop->skip_special_blocks_cfg.block_types_len =
3451 					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3452 	}
3453 
3454 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3455 		prop->skip_special_blocks_cfg.block_ranges =
3456 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3457 					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3458 		if (!prop->skip_special_blocks_cfg.block_ranges) {
3459 			rc = -ENOMEM;
3460 			goto free_skip_special_blocks_types;
3461 		}
3462 
3463 		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3464 			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3465 					&gaudi2_iterator_skip_block_ranges[i],
3466 					sizeof(struct range));
3467 
3468 		prop->skip_special_blocks_cfg.block_ranges_len =
3469 					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3470 	}
3471 
3472 	return 0;
3473 
3474 free_skip_special_blocks_types:
3475 	kfree(prop->skip_special_blocks_cfg.block_types);
3476 free_special_blocks:
3477 	kfree(prop->special_blocks);
3478 
3479 	return rc;
3480 }
3481 
gaudi2_special_blocks_iterator_config(struct hl_device * hdev)3482 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3483 {
3484 	return gaudi2_special_blocks_config(hdev);
3485 }
3486 
gaudi2_test_queues_msgs_free(struct hl_device * hdev)3487 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3488 {
3489 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3490 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3491 	int i;
3492 
3493 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3494 		/* bail-out if this is an allocation failure point */
3495 		if (!msg_info[i].kern_addr)
3496 			break;
3497 
3498 		hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3499 		msg_info[i].kern_addr = NULL;
3500 	}
3501 }
3502 
gaudi2_test_queues_msgs_alloc(struct hl_device * hdev)3503 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3504 {
3505 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3506 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3507 	int i, rc;
3508 
3509 	/* allocate a message-short buf for each Q we intend to test */
3510 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3511 		msg_info[i].kern_addr =
3512 			(void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3513 							GFP_KERNEL, &msg_info[i].dma_addr);
3514 		if (!msg_info[i].kern_addr) {
3515 			dev_err(hdev->dev,
3516 				"Failed to allocate dma memory for H/W queue %d testing\n", i);
3517 			rc = -ENOMEM;
3518 			goto err_exit;
3519 		}
3520 	}
3521 
3522 	return 0;
3523 
3524 err_exit:
3525 	gaudi2_test_queues_msgs_free(hdev);
3526 	return rc;
3527 }
3528 
gaudi2_sw_init(struct hl_device * hdev)3529 static int gaudi2_sw_init(struct hl_device *hdev)
3530 {
3531 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3532 	struct gaudi2_device *gaudi2;
3533 	int i, rc;
3534 
3535 	/* Allocate device structure */
3536 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3537 	if (!gaudi2)
3538 		return -ENOMEM;
3539 
3540 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3541 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3542 			continue;
3543 
3544 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3545 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3546 				GAUDI2_EVENT_SIZE);
3547 			rc = -EINVAL;
3548 			goto free_gaudi2_device;
3549 		}
3550 
3551 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3552 	}
3553 
3554 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3555 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3556 
3557 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3558 
3559 	hdev->asic_specific = gaudi2;
3560 
3561 	/* Create DMA pool for small allocations.
3562 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3563 	 * PI/CI registers allocated from this pool have this restriction
3564 	 */
3565 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3566 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3567 	if (!hdev->dma_pool) {
3568 		dev_err(hdev->dev, "failed to create DMA pool\n");
3569 		rc = -ENOMEM;
3570 		goto free_gaudi2_device;
3571 	}
3572 
3573 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3574 	if (rc)
3575 		goto free_dma_pool;
3576 
3577 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3578 	if (!hdev->cpu_accessible_dma_pool) {
3579 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3580 		rc = -ENOMEM;
3581 		goto free_cpu_dma_mem;
3582 	}
3583 
3584 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3585 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3586 	if (rc) {
3587 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3588 		rc = -EFAULT;
3589 		goto free_cpu_accessible_dma_pool;
3590 	}
3591 
3592 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3593 								&gaudi2->virt_msix_db_dma_addr);
3594 	if (!gaudi2->virt_msix_db_cpu_addr) {
3595 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3596 		rc = -ENOMEM;
3597 		goto free_cpu_accessible_dma_pool;
3598 	}
3599 
3600 	spin_lock_init(&gaudi2->hw_queues_lock);
3601 
3602 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3603 							&gaudi2->scratchpad_bus_address,
3604 							GFP_KERNEL | __GFP_ZERO);
3605 	if (!gaudi2->scratchpad_kernel_address) {
3606 		rc = -ENOMEM;
3607 		goto free_virt_msix_db_mem;
3608 	}
3609 
3610 	gaudi2_user_mapped_blocks_init(hdev);
3611 
3612 	/* Initialize user interrupts */
3613 	gaudi2_user_interrupt_setup(hdev);
3614 
3615 	hdev->supports_coresight = true;
3616 	hdev->supports_sync_stream = true;
3617 	hdev->supports_cb_mapping = true;
3618 	hdev->supports_wait_for_multi_cs = false;
3619 
3620 	prop->supports_compute_reset = true;
3621 
3622 	/* Event queue sanity check added in FW version 1.11 */
3623 	if (hl_is_fw_sw_ver_below(hdev, 1, 11))
3624 		hdev->event_queue.check_eqe_index = false;
3625 	else
3626 		hdev->event_queue.check_eqe_index = true;
3627 
3628 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3629 
3630 	rc = gaudi2_special_blocks_iterator_config(hdev);
3631 	if (rc)
3632 		goto free_scratchpad_mem;
3633 
3634 	rc = gaudi2_test_queues_msgs_alloc(hdev);
3635 	if (rc)
3636 		goto special_blocks_free;
3637 
3638 	return 0;
3639 
3640 special_blocks_free:
3641 	gaudi2_special_blocks_iterator_free(hdev);
3642 free_scratchpad_mem:
3643 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3644 				  gaudi2->scratchpad_bus_address);
3645 free_virt_msix_db_mem:
3646 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3647 free_cpu_accessible_dma_pool:
3648 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3649 free_cpu_dma_mem:
3650 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3651 					hdev->cpu_accessible_dma_address);
3652 free_dma_pool:
3653 	dma_pool_destroy(hdev->dma_pool);
3654 free_gaudi2_device:
3655 	kfree(gaudi2);
3656 	return rc;
3657 }
3658 
gaudi2_sw_fini(struct hl_device * hdev)3659 static int gaudi2_sw_fini(struct hl_device *hdev)
3660 {
3661 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3662 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3663 
3664 	gaudi2_test_queues_msgs_free(hdev);
3665 
3666 	gaudi2_special_blocks_iterator_free(hdev);
3667 
3668 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3669 
3670 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3671 
3672 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3673 						hdev->cpu_accessible_dma_address);
3674 
3675 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3676 					gaudi2->scratchpad_bus_address);
3677 
3678 	dma_pool_destroy(hdev->dma_pool);
3679 
3680 	kfree(gaudi2);
3681 
3682 	return 0;
3683 }
3684 
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3685 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3686 {
3687 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3688 						QM_GLBL_CFG1_CQF_STOP |
3689 						QM_GLBL_CFG1_CP_STOP);
3690 
3691 	/* stop also the ARC */
3692 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3693 }
3694 
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3695 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3696 {
3697 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3698 						QM_GLBL_CFG1_CQF_FLUSH |
3699 						QM_GLBL_CFG1_CP_FLUSH);
3700 }
3701 
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3702 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3703 {
3704 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3705 }
3706 
3707 /**
3708  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3709  *
3710  * @hdev: pointer to the habanalabs device structure
3711  * @queue_id: queue to clear fence counters to
3712  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3713  *              getting stuck on any fence value. otherwise set all fence
3714  *              counters to 0 (standard clear of fence counters)
3715  */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3716 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3717 						bool skip_fence)
3718 {
3719 	u32 size, reg_base;
3720 	u32 addr, val;
3721 
3722 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3723 
3724 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3725 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3726 
3727 	/*
3728 	 * in case we want to make sure that QM that is stuck on a fence will
3729 	 * be released we should set the fence counter to a higher value that
3730 	 * the value the QM waiting for. to comply with any fence counter of
3731 	 * any value we set maximum fence value to all counters
3732 	 */
3733 	val = skip_fence ? U32_MAX : 0;
3734 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3735 }
3736 
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3737 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3738 {
3739 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3740 
3741 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3742 	gaudi2_flush_qman_common(hdev, reg_base);
3743 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3744 }
3745 
gaudi2_stop_dma_qmans(struct hl_device * hdev)3746 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3747 {
3748 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3749 	int dcore, inst;
3750 
3751 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3752 		goto stop_edma_qmans;
3753 
3754 	/* Stop CPs of PDMA QMANs */
3755 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3756 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3757 
3758 stop_edma_qmans:
3759 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3760 		return;
3761 
3762 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3763 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3764 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3765 			u32 qm_base;
3766 
3767 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3768 				continue;
3769 
3770 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3771 					inst * DCORE_EDMA_OFFSET;
3772 
3773 			/* Stop CPs of EDMA QMANs */
3774 			gaudi2_stop_qman_common(hdev, qm_base);
3775 		}
3776 	}
3777 }
3778 
gaudi2_stop_mme_qmans(struct hl_device * hdev)3779 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3780 {
3781 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3782 	u32 offset, i;
3783 
3784 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3785 
3786 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3787 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3788 			continue;
3789 
3790 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3791 	}
3792 }
3793 
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3794 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3795 {
3796 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3797 	u32 reg_base;
3798 	int i;
3799 
3800 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3801 		return;
3802 
3803 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3804 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3805 			continue;
3806 
3807 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3808 		gaudi2_stop_qman_common(hdev, reg_base);
3809 	}
3810 }
3811 
gaudi2_stop_rot_qmans(struct hl_device * hdev)3812 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3813 {
3814 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3815 	u32 reg_base;
3816 	int i;
3817 
3818 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3819 		return;
3820 
3821 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3822 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3823 			continue;
3824 
3825 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3826 		gaudi2_stop_qman_common(hdev, reg_base);
3827 	}
3828 }
3829 
gaudi2_stop_nic_qmans(struct hl_device * hdev)3830 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3831 {
3832 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3833 	u32 reg_base, queue_id;
3834 	int i;
3835 
3836 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3837 		return;
3838 
3839 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3840 
3841 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3842 		if (!(hdev->nic_ports_mask & BIT(i)))
3843 			continue;
3844 
3845 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3846 		gaudi2_stop_qman_common(hdev, reg_base);
3847 	}
3848 }
3849 
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)3850 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3851 {
3852 	u32 reg_val;
3853 
3854 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3855 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3856 }
3857 
gaudi2_dma_stall(struct hl_device * hdev)3858 static void gaudi2_dma_stall(struct hl_device *hdev)
3859 {
3860 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3861 	int dcore, inst;
3862 
3863 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3864 		goto stall_edma;
3865 
3866 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3867 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3868 
3869 stall_edma:
3870 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3871 		return;
3872 
3873 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3874 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3875 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3876 			u32 core_base;
3877 
3878 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3879 				continue;
3880 
3881 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3882 					inst * DCORE_EDMA_OFFSET;
3883 
3884 			/* Stall CPs of EDMA QMANs */
3885 			gaudi2_stall_dma_common(hdev, core_base);
3886 		}
3887 	}
3888 }
3889 
gaudi2_mme_stall(struct hl_device * hdev)3890 static void gaudi2_mme_stall(struct hl_device *hdev)
3891 {
3892 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3893 	u32 offset, i;
3894 
3895 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3896 
3897 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3898 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3899 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3900 }
3901 
gaudi2_tpc_stall(struct hl_device * hdev)3902 static void gaudi2_tpc_stall(struct hl_device *hdev)
3903 {
3904 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3905 	u32 reg_base;
3906 	int i;
3907 
3908 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3909 		return;
3910 
3911 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3912 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3913 			continue;
3914 
3915 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3916 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3917 	}
3918 }
3919 
gaudi2_rotator_stall(struct hl_device * hdev)3920 static void gaudi2_rotator_stall(struct hl_device *hdev)
3921 {
3922 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3923 	u32 reg_val;
3924 	int i;
3925 
3926 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3927 		return;
3928 
3929 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3930 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3931 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3932 
3933 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3934 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3935 			continue;
3936 
3937 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3938 	}
3939 }
3940 
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)3941 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3942 {
3943 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3944 }
3945 
gaudi2_disable_dma_qmans(struct hl_device * hdev)3946 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3947 {
3948 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3949 	int dcore, inst;
3950 
3951 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3952 		goto stop_edma_qmans;
3953 
3954 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3955 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3956 
3957 stop_edma_qmans:
3958 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3959 		return;
3960 
3961 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3962 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3963 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3964 			u32 qm_base;
3965 
3966 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3967 				continue;
3968 
3969 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3970 					inst * DCORE_EDMA_OFFSET;
3971 
3972 			/* Disable CPs of EDMA QMANs */
3973 			gaudi2_disable_qman_common(hdev, qm_base);
3974 		}
3975 	}
3976 }
3977 
gaudi2_disable_mme_qmans(struct hl_device * hdev)3978 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3979 {
3980 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3981 	u32 offset, i;
3982 
3983 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3984 
3985 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3986 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3987 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3988 }
3989 
gaudi2_disable_tpc_qmans(struct hl_device * hdev)3990 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3991 {
3992 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3993 	u32 reg_base;
3994 	int i;
3995 
3996 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3997 		return;
3998 
3999 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4000 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4001 			continue;
4002 
4003 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4004 		gaudi2_disable_qman_common(hdev, reg_base);
4005 	}
4006 }
4007 
gaudi2_disable_rot_qmans(struct hl_device * hdev)4008 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4009 {
4010 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4011 	u32 reg_base;
4012 	int i;
4013 
4014 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4015 		return;
4016 
4017 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4018 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4019 			continue;
4020 
4021 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4022 		gaudi2_disable_qman_common(hdev, reg_base);
4023 	}
4024 }
4025 
gaudi2_disable_nic_qmans(struct hl_device * hdev)4026 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4027 {
4028 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4029 	u32 reg_base, queue_id;
4030 	int i;
4031 
4032 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4033 		return;
4034 
4035 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4036 
4037 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4038 		if (!(hdev->nic_ports_mask & BIT(i)))
4039 			continue;
4040 
4041 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4042 		gaudi2_disable_qman_common(hdev, reg_base);
4043 	}
4044 }
4045 
gaudi2_enable_timestamp(struct hl_device * hdev)4046 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4047 {
4048 	/* Disable the timestamp counter */
4049 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4050 
4051 	/* Zero the lower/upper parts of the 64-bit counter */
4052 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4053 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4054 
4055 	/* Enable the counter */
4056 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4057 }
4058 
gaudi2_disable_timestamp(struct hl_device * hdev)4059 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4060 {
4061 	/* Disable the timestamp counter */
4062 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4063 }
4064 
gaudi2_irq_name(u16 irq_number)4065 static const char *gaudi2_irq_name(u16 irq_number)
4066 {
4067 	switch (irq_number) {
4068 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4069 		return "gaudi2 cpu eq";
4070 	case GAUDI2_IRQ_NUM_COMPLETION:
4071 		return "gaudi2 completion";
4072 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4073 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4074 	case GAUDI2_IRQ_NUM_TPC_ASSERT:
4075 		return "gaudi2 tpc assert";
4076 	case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4077 		return "gaudi2 unexpected error";
4078 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4079 		return "gaudi2 user completion";
4080 	default:
4081 		return "invalid";
4082 	}
4083 }
4084 
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)4085 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4086 {
4087 	int i, irq, relative_idx;
4088 	struct hl_dec *dec;
4089 
4090 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4091 		irq = pci_irq_vector(hdev->pdev, i);
4092 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4093 
4094 		dec = hdev->dec + relative_idx / 2;
4095 
4096 		/* We pass different structures depending on the irq handler. For the abnormal
4097 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4098 		 * user_interrupt entry
4099 		 */
4100 		free_irq(irq, ((relative_idx % 2) ?
4101 				(void *) dec :
4102 				(void *) &hdev->user_interrupt[dec->core_id]));
4103 	}
4104 }
4105 
gaudi2_dec_enable_msix(struct hl_device * hdev)4106 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4107 {
4108 	int rc, i, irq_init_cnt, irq, relative_idx;
4109 	struct hl_dec *dec;
4110 
4111 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4112 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4113 			i++, irq_init_cnt++) {
4114 
4115 		irq = pci_irq_vector(hdev->pdev, i);
4116 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4117 
4118 		/* We pass different structures depending on the irq handler. For the abnormal
4119 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4120 		 * user_interrupt entry
4121 		 *
4122 		 * TODO: change the dec abnrm to threaded irq
4123 		 */
4124 
4125 		dec = hdev->dec + relative_idx / 2;
4126 		if (relative_idx % 2) {
4127 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4128 						gaudi2_irq_name(i), (void *) dec);
4129 		} else {
4130 			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4131 					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4132 					gaudi2_irq_name(i),
4133 					(void *) &hdev->user_interrupt[dec->core_id]);
4134 		}
4135 
4136 		if (rc) {
4137 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4138 			goto free_dec_irqs;
4139 		}
4140 	}
4141 
4142 	return 0;
4143 
4144 free_dec_irqs:
4145 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4146 	return rc;
4147 }
4148 
gaudi2_enable_msix(struct hl_device * hdev)4149 static int gaudi2_enable_msix(struct hl_device *hdev)
4150 {
4151 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4152 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4153 	int rc, irq, i, j, user_irq_init_cnt;
4154 	struct hl_cq *cq;
4155 
4156 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4157 		return 0;
4158 
4159 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4160 					PCI_IRQ_MSIX);
4161 	if (rc < 0) {
4162 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4163 			GAUDI2_MSIX_ENTRIES, rc);
4164 		return rc;
4165 	}
4166 
4167 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4168 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4169 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4170 	if (rc) {
4171 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4172 		goto free_irq_vectors;
4173 	}
4174 
4175 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4176 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4177 			&hdev->event_queue);
4178 	if (rc) {
4179 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4180 		goto free_completion_irq;
4181 	}
4182 
4183 	rc = gaudi2_dec_enable_msix(hdev);
4184 	if (rc) {
4185 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
4186 		goto free_event_irq;
4187 	}
4188 
4189 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4190 	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4191 			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4192 			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4193 	if (rc) {
4194 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4195 		goto free_dec_irq;
4196 	}
4197 
4198 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4199 	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4200 			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4201 					&hdev->unexpected_error_interrupt);
4202 	if (rc) {
4203 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4204 		goto free_tpc_irq;
4205 	}
4206 
4207 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4208 			user_irq_init_cnt < prop->user_interrupt_count;
4209 			i++, j++, user_irq_init_cnt++) {
4210 
4211 		irq = pci_irq_vector(hdev->pdev, i);
4212 		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4213 						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4214 						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4215 
4216 		if (rc) {
4217 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4218 			goto free_user_irq;
4219 		}
4220 	}
4221 
4222 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4223 
4224 	return 0;
4225 
4226 free_user_irq:
4227 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4228 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4229 
4230 		irq = pci_irq_vector(hdev->pdev, i);
4231 		free_irq(irq, &hdev->user_interrupt[j]);
4232 	}
4233 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4234 	free_irq(irq, &hdev->unexpected_error_interrupt);
4235 free_tpc_irq:
4236 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4237 	free_irq(irq, &hdev->tpc_interrupt);
4238 free_dec_irq:
4239 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4240 free_event_irq:
4241 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4242 	free_irq(irq, cq);
4243 
4244 free_completion_irq:
4245 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4246 	free_irq(irq, cq);
4247 
4248 free_irq_vectors:
4249 	pci_free_irq_vectors(hdev->pdev);
4250 
4251 	return rc;
4252 }
4253 
gaudi2_sync_irqs(struct hl_device * hdev)4254 static void gaudi2_sync_irqs(struct hl_device *hdev)
4255 {
4256 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4257 	int i, j;
4258 	int irq;
4259 
4260 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4261 		return;
4262 
4263 	/* Wait for all pending IRQs to be finished */
4264 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4265 
4266 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4267 		irq = pci_irq_vector(hdev->pdev, i);
4268 		synchronize_irq(irq);
4269 	}
4270 
4271 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4272 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4273 
4274 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4275 										i++, j++) {
4276 		irq = pci_irq_vector(hdev->pdev, i);
4277 		synchronize_irq(irq);
4278 	}
4279 
4280 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4281 }
4282 
gaudi2_disable_msix(struct hl_device * hdev)4283 static void gaudi2_disable_msix(struct hl_device *hdev)
4284 {
4285 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4286 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4287 	struct hl_cq *cq;
4288 	int irq, i, j, k;
4289 
4290 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4291 		return;
4292 
4293 	gaudi2_sync_irqs(hdev);
4294 
4295 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4296 	free_irq(irq, &hdev->event_queue);
4297 
4298 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4299 
4300 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4301 	free_irq(irq, &hdev->tpc_interrupt);
4302 
4303 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4304 	free_irq(irq, &hdev->unexpected_error_interrupt);
4305 
4306 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4307 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4308 
4309 		irq = pci_irq_vector(hdev->pdev, i);
4310 		free_irq(irq, &hdev->user_interrupt[j]);
4311 	}
4312 
4313 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4314 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4315 	free_irq(irq, cq);
4316 
4317 	pci_free_irq_vectors(hdev->pdev);
4318 
4319 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4320 }
4321 
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)4322 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4323 {
4324 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4325 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4326 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4327 	int rc;
4328 
4329 	if (hdev->pldm)
4330 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4331 	else
4332 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4333 
4334 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4335 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4336 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4337 			continue;
4338 
4339 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4340 
4341 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4342 
4343 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4344 
4345 		/* Wait till all traffic from decoder stops
4346 		 * before apply core reset.
4347 		 */
4348 		rc = hl_poll_timeout(
4349 				hdev,
4350 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4351 				graceful,
4352 				(graceful & graceful_pend_mask),
4353 				100,
4354 				timeout_usec);
4355 		if (rc)
4356 			dev_err(hdev->dev,
4357 				"Failed to stop traffic from DCORE%d Decoder %d\n",
4358 				dcore_id, dec_id);
4359 	}
4360 }
4361 
gaudi2_stop_pcie_dec(struct hl_device * hdev)4362 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4363 {
4364 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4365 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4366 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4367 	int rc;
4368 
4369 	if (hdev->pldm)
4370 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4371 	else
4372 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4373 
4374 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4375 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4376 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4377 			continue;
4378 
4379 		offset = dec_id * PCIE_VDEC_OFFSET;
4380 
4381 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4382 
4383 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4384 
4385 		/* Wait till all traffic from decoder stops
4386 		 * before apply core reset.
4387 		 */
4388 		rc = hl_poll_timeout(
4389 				hdev,
4390 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4391 				graceful,
4392 				(graceful & graceful_pend_mask),
4393 				100,
4394 				timeout_usec);
4395 		if (rc)
4396 			dev_err(hdev->dev,
4397 				"Failed to stop traffic from PCIe Decoder %d\n",
4398 				dec_id);
4399 	}
4400 }
4401 
gaudi2_stop_dec(struct hl_device * hdev)4402 static void gaudi2_stop_dec(struct hl_device *hdev)
4403 {
4404 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4405 	int dcore_id;
4406 
4407 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4408 		return;
4409 
4410 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4411 		gaudi2_stop_dcore_dec(hdev, dcore_id);
4412 
4413 	gaudi2_stop_pcie_dec(hdev);
4414 }
4415 
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4416 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4417 {
4418 	u32 reg_base, reg_val;
4419 
4420 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4421 	if (run_mode == HL_ENGINE_CORE_RUN)
4422 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4423 	else
4424 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4425 
4426 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4427 }
4428 
gaudi2_halt_arcs(struct hl_device * hdev)4429 static void gaudi2_halt_arcs(struct hl_device *hdev)
4430 {
4431 	u16 arc_id;
4432 
4433 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4434 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4435 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4436 	}
4437 }
4438 
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4439 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4440 {
4441 	int rc;
4442 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
4443 
4444 	if (hdev->pldm)
4445 		timeout_usec *= 100;
4446 
4447 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4448 	if (run_mode == HL_ENGINE_CORE_RUN)
4449 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4450 	else
4451 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4452 
4453 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4454 				val, ((val & ack_mask) == ack_mask),
4455 				1000, timeout_usec);
4456 
4457 	if (!rc) {
4458 		/* Clear */
4459 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4460 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4461 	}
4462 
4463 	return rc;
4464 }
4465 
gaudi2_reset_arcs(struct hl_device * hdev)4466 static void gaudi2_reset_arcs(struct hl_device *hdev)
4467 {
4468 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4469 	u16 arc_id;
4470 
4471 	if (!gaudi2)
4472 		return;
4473 
4474 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4475 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4476 			gaudi2_clr_arc_id_cap(hdev, arc_id);
4477 }
4478 
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)4479 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4480 {
4481 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4482 	u32 queue_id;
4483 	int i;
4484 
4485 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4486 		return;
4487 
4488 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4489 
4490 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4491 		if (!(hdev->nic_ports_mask & BIT(i)))
4492 			continue;
4493 
4494 		gaudi2_qman_manual_flush_common(hdev, queue_id);
4495 	}
4496 }
4497 
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)4498 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4499 					u32 num_cores, u32 core_command)
4500 {
4501 	int i, rc;
4502 
4503 	for (i = 0 ; i < num_cores ; i++) {
4504 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4505 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4506 	}
4507 
4508 	for (i = 0 ; i < num_cores ; i++) {
4509 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4510 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4511 
4512 			if (rc) {
4513 				dev_err(hdev->dev, "failed to %s arc: %d\n",
4514 					(core_command == HL_ENGINE_CORE_HALT) ?
4515 					"HALT" : "RUN", core_ids[i]);
4516 				return -1;
4517 			}
4518 		}
4519 	}
4520 
4521 	return 0;
4522 }
4523 
gaudi2_set_tpc_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4524 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4525 {
4526 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4527 	u32 reg_base, reg_addr, reg_val, tpc_id;
4528 
4529 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4530 		return 0;
4531 
4532 	tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4533 	if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4534 		return 0;
4535 
4536 	reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4537 	reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4538 	reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4539 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4540 	WREG32(reg_addr, reg_val);
4541 
4542 	if (engine_command == HL_ENGINE_RESUME) {
4543 		reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4544 		reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4545 		RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4546 	}
4547 
4548 	return 0;
4549 }
4550 
gaudi2_set_mme_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4551 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4552 {
4553 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4554 	u32 reg_base, reg_addr, reg_val, mme_id;
4555 
4556 	mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4557 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4558 		return 0;
4559 
4560 	reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4561 	reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4562 	reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4563 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4564 	WREG32(reg_addr, reg_val);
4565 
4566 	return 0;
4567 }
4568 
gaudi2_set_edma_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4569 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4570 {
4571 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4572 	u32 reg_base, reg_addr, reg_val, edma_id;
4573 
4574 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4575 		return 0;
4576 
4577 	edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4578 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4579 		return 0;
4580 
4581 	reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4582 	reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4583 	reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4584 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4585 	WREG32(reg_addr, reg_val);
4586 
4587 	if (engine_command == HL_ENGINE_STALL) {
4588 		reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4589 				FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4590 		WREG32(reg_addr, reg_val);
4591 	}
4592 
4593 	return 0;
4594 }
4595 
gaudi2_set_engine_modes(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4596 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4597 		u32 *engine_ids, u32 num_engines, u32 engine_command)
4598 {
4599 	int i, rc;
4600 
4601 	for (i = 0 ; i < num_engines ; ++i) {
4602 		switch (engine_ids[i]) {
4603 		case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4604 		case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4605 		case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4606 		case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4607 			rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4608 			if (rc)
4609 				return rc;
4610 
4611 			break;
4612 		case GAUDI2_DCORE0_ENGINE_ID_MME:
4613 		case GAUDI2_DCORE1_ENGINE_ID_MME:
4614 		case GAUDI2_DCORE2_ENGINE_ID_MME:
4615 		case GAUDI2_DCORE3_ENGINE_ID_MME:
4616 			rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4617 			if (rc)
4618 				return rc;
4619 
4620 			break;
4621 		case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4622 		case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4623 		case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4624 		case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4625 			rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4626 			if (rc)
4627 				return rc;
4628 
4629 			break;
4630 		default:
4631 			dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4632 			return -EINVAL;
4633 		}
4634 	}
4635 
4636 	return 0;
4637 }
4638 
gaudi2_set_engines(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4639 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4640 					u32 num_engines, u32 engine_command)
4641 {
4642 	switch (engine_command) {
4643 	case HL_ENGINE_CORE_HALT:
4644 	case HL_ENGINE_CORE_RUN:
4645 		return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4646 
4647 	case HL_ENGINE_STALL:
4648 	case HL_ENGINE_RESUME:
4649 		return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4650 
4651 	default:
4652 		dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4653 		return -EINVAL;
4654 	}
4655 }
4656 
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)4657 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4658 {
4659 	u32 wait_timeout_ms;
4660 
4661 	if (hdev->pldm)
4662 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4663 	else
4664 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4665 
4666 	if (fw_reset)
4667 		goto skip_engines;
4668 
4669 	gaudi2_stop_dma_qmans(hdev);
4670 	gaudi2_stop_mme_qmans(hdev);
4671 	gaudi2_stop_tpc_qmans(hdev);
4672 	gaudi2_stop_rot_qmans(hdev);
4673 	gaudi2_stop_nic_qmans(hdev);
4674 	msleep(wait_timeout_ms);
4675 
4676 	gaudi2_halt_arcs(hdev);
4677 	gaudi2_dma_stall(hdev);
4678 	gaudi2_mme_stall(hdev);
4679 	gaudi2_tpc_stall(hdev);
4680 	gaudi2_rotator_stall(hdev);
4681 
4682 	msleep(wait_timeout_ms);
4683 
4684 	gaudi2_stop_dec(hdev);
4685 
4686 	/*
4687 	 * in case of soft reset do a manual flush for QMANs (currently called
4688 	 * only for NIC QMANs
4689 	 */
4690 	if (!hard_reset)
4691 		gaudi2_nic_qmans_manual_flush(hdev);
4692 
4693 	gaudi2_disable_dma_qmans(hdev);
4694 	gaudi2_disable_mme_qmans(hdev);
4695 	gaudi2_disable_tpc_qmans(hdev);
4696 	gaudi2_disable_rot_qmans(hdev);
4697 	gaudi2_disable_nic_qmans(hdev);
4698 	gaudi2_disable_timestamp(hdev);
4699 
4700 skip_engines:
4701 	if (hard_reset) {
4702 		gaudi2_disable_msix(hdev);
4703 		return;
4704 	}
4705 
4706 	gaudi2_sync_irqs(hdev);
4707 }
4708 
gaudi2_init_firmware_preload_params(struct hl_device * hdev)4709 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4710 {
4711 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4712 
4713 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4714 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4715 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4716 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4717 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4718 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4719 }
4720 
gaudi2_init_firmware_loader(struct hl_device * hdev)4721 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4722 {
4723 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4724 	struct dynamic_fw_load_mgr *dynamic_loader;
4725 	struct cpu_dyn_regs *dyn_regs;
4726 
4727 	/* fill common fields */
4728 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4729 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4730 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4731 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4732 	fw_loader->skip_bmc = false;
4733 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4734 	fw_loader->dram_bar_id = DRAM_BAR_ID;
4735 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4736 
4737 	/* here we update initial values for few specific dynamic regs (as
4738 	 * before reading the first descriptor from FW those value has to be
4739 	 * hard-coded). in later stages of the protocol those values will be
4740 	 * updated automatically by reading the FW descriptor so data there
4741 	 * will always be up-to-date
4742 	 */
4743 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4744 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4745 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4746 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4747 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4748 }
4749 
gaudi2_init_cpu(struct hl_device * hdev)4750 static int gaudi2_init_cpu(struct hl_device *hdev)
4751 {
4752 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4753 	int rc;
4754 
4755 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4756 		return 0;
4757 
4758 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4759 		return 0;
4760 
4761 	rc = hl_fw_init_cpu(hdev);
4762 	if (rc)
4763 		return rc;
4764 
4765 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4766 
4767 	return 0;
4768 }
4769 
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4770 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4771 {
4772 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4773 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4774 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4775 	struct cpu_dyn_regs *dyn_regs;
4776 	struct hl_eq *eq;
4777 	u32 status;
4778 	int err;
4779 
4780 	if (!hdev->cpu_queues_enable)
4781 		return 0;
4782 
4783 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4784 		return 0;
4785 
4786 	eq = &hdev->event_queue;
4787 
4788 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4789 
4790 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4791 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4792 
4793 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4794 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4795 
4796 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4797 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4798 
4799 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4800 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4801 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4802 
4803 	/* Used for EQ CI */
4804 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4805 
4806 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4807 
4808 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4809 
4810 	/* Let the ARC know we are ready as it is now handling those queues  */
4811 
4812 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4813 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4814 
4815 	err = hl_poll_timeout(
4816 		hdev,
4817 		mmCPU_IF_QUEUE_INIT,
4818 		status,
4819 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4820 		1000,
4821 		cpu_timeout);
4822 
4823 	if (err) {
4824 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4825 		return -EIO;
4826 	}
4827 
4828 	/* update FW application security bits */
4829 	if (prop->fw_cpu_boot_dev_sts0_valid)
4830 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4831 
4832 	if (prop->fw_cpu_boot_dev_sts1_valid)
4833 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4834 
4835 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4836 	return 0;
4837 }
4838 
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4839 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4840 				u32 queue_id_base)
4841 {
4842 	struct hl_hw_queue *q;
4843 	u32 pq_id, pq_offset;
4844 
4845 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4846 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4847 		pq_offset = pq_id * 4;
4848 
4849 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4850 				lower_32_bits(q->bus_address));
4851 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4852 				upper_32_bits(q->bus_address));
4853 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4854 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4855 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4856 	}
4857 }
4858 
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)4859 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4860 {
4861 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4862 
4863 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4864 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4865 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4866 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4867 
4868 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4869 		cp_offset = cp_id * 4;
4870 
4871 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4872 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4873 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4874 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4875 	}
4876 
4877 	/* allow QMANs to accept work from ARC CQF */
4878 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4879 }
4880 
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4881 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4882 				u32 queue_id_base)
4883 {
4884 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4885 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4886 
4887 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4888 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4889 
4890 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4891 		pq_offset = pq_id * 4;
4892 
4893 		/* Configure QMAN HBW to scratchpad as it is not needed */
4894 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4895 				lower_32_bits(gaudi2->scratchpad_bus_address));
4896 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4897 				upper_32_bits(gaudi2->scratchpad_bus_address));
4898 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4899 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4900 
4901 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4902 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4903 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4904 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4905 	}
4906 
4907 	/* Enable QMAN H/W completion */
4908 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4909 }
4910 
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)4911 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4912 {
4913 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4914 	u32 sp_reg_addr;
4915 
4916 	switch (queue_id_base) {
4917 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4918 		fallthrough;
4919 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4920 		fallthrough;
4921 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4922 		fallthrough;
4923 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4924 		fallthrough;
4925 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4926 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4927 		break;
4928 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4929 		fallthrough;
4930 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4931 		fallthrough;
4932 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4933 		fallthrough;
4934 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4935 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4936 		break;
4937 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4938 		fallthrough;
4939 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4940 		fallthrough;
4941 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4942 		fallthrough;
4943 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4944 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4945 		break;
4946 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4947 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4948 		break;
4949 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4950 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4951 		break;
4952 	default:
4953 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4954 		return 0;
4955 	}
4956 
4957 	return sp_reg_addr;
4958 }
4959 
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4960 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4961 					u32 queue_id_base)
4962 {
4963 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4964 	int map_table_entry;
4965 
4966 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4967 
4968 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4969 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4970 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4971 
4972 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4973 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4974 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4975 
4976 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4977 
4978 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4979 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4980 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4981 
4982 	/* Enable the QMAN channel.
4983 	 * PDMA QMAN configuration is different, as we do not allow user to
4984 	 * access some of the CPs.
4985 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4986 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4987 	 */
4988 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4989 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4990 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4991 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4992 	else
4993 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4994 }
4995 
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4996 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4997 		u32 queue_id_base)
4998 {
4999 	u32 pq_id;
5000 
5001 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5002 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5003 
5004 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5005 	gaudi2_init_qman_cp(hdev, reg_base);
5006 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5007 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5008 }
5009 
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)5010 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5011 				u32 dma_core_id, bool is_secure)
5012 {
5013 	u32 prot, irq_handler_offset;
5014 	struct cpu_dyn_regs *dyn_regs;
5015 	int map_table_entry;
5016 
5017 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5018 	if (is_secure)
5019 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5020 
5021 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5022 
5023 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5024 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5025 
5026 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5027 			lower_32_bits(CFG_BASE + irq_handler_offset));
5028 
5029 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5030 			upper_32_bits(CFG_BASE + irq_handler_offset));
5031 
5032 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5033 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5034 		gaudi2_irq_map_table[map_table_entry].cpu_id);
5035 
5036 	/* Enable the DMA channel */
5037 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5038 }
5039 
gaudi2_init_kdma(struct hl_device * hdev)5040 static void gaudi2_init_kdma(struct hl_device *hdev)
5041 {
5042 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5043 	u32 reg_base;
5044 
5045 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5046 		return;
5047 
5048 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5049 
5050 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5051 
5052 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5053 }
5054 
gaudi2_init_pdma(struct hl_device * hdev)5055 static void gaudi2_init_pdma(struct hl_device *hdev)
5056 {
5057 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5058 	u32 reg_base;
5059 
5060 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5061 		return;
5062 
5063 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5064 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5065 
5066 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5067 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5068 
5069 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5070 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5071 
5072 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5073 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5074 
5075 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5076 }
5077 
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)5078 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5079 {
5080 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
5081 
5082 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5083 	base_edma_qman_id = edma_stream_base[seq];
5084 
5085 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5086 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5087 
5088 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5089 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5090 }
5091 
gaudi2_init_edma(struct hl_device * hdev)5092 static void gaudi2_init_edma(struct hl_device *hdev)
5093 {
5094 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5095 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5096 	int dcore, inst;
5097 
5098 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5099 		return;
5100 
5101 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5102 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5103 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5104 
5105 			if (!(prop->edma_enabled_mask & BIT(seq)))
5106 				continue;
5107 
5108 			gaudi2_init_edma_instance(hdev, seq);
5109 
5110 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5111 		}
5112 	}
5113 }
5114 
5115 /*
5116  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5117  * @hdev: pointer to habanalabs device structure.
5118  * @sob_id: sync object ID.
5119  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5120  * @interrupt_id: interrupt ID.
5121  *
5122  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5123  * write directly to the HBW host memory of the virtual MSI-X doorbell.
5124  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5125  *
5126  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5127  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5128  * completion, by decrementing the sync object value and re-arming the monitor.
5129  */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)5130 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5131 							u32 first_mon_id, u32 interrupt_id)
5132 {
5133 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5134 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5135 	u64 addr;
5136 	u8 mask;
5137 
5138 	/* Reset the SOB value */
5139 	sob_offset = sob_id * sizeof(u32);
5140 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5141 
5142 	/* Configure 3 monitors:
5143 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5144 	 * 2. Decrement SOB value by 1.
5145 	 * 3. Re-arm the master monitor.
5146 	 */
5147 
5148 	first_mon_offset = first_mon_id * sizeof(u32);
5149 
5150 	/* 2nd monitor: Decrement SOB value by 1 */
5151 	mon_offset = first_mon_offset + sizeof(u32);
5152 
5153 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5154 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5155 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5156 
5157 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5158 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5159 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5160 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5161 
5162 	/* 3rd monitor: Re-arm the master monitor */
5163 	mon_offset = first_mon_offset + 2 * sizeof(u32);
5164 
5165 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5166 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5167 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5168 
5169 	sob_group = sob_id / 8;
5170 	mask = ~BIT(sob_id & 0x7);
5171 	mode = 0; /* comparison mode is "greater than or equal to" */
5172 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5173 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5174 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5175 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5176 
5177 	payload = arm;
5178 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5179 
5180 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5181 	mon_offset = first_mon_offset;
5182 
5183 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5184 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5185 
5186 	addr = gaudi2->virt_msix_db_dma_addr;
5187 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5188 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5189 
5190 	payload = interrupt_id;
5191 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5192 
5193 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5194 }
5195 
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)5196 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5197 {
5198 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5199 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5200 
5201 	/* Decoder normal/abnormal interrupts */
5202 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5203 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5204 			continue;
5205 
5206 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5207 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5208 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5209 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5210 
5211 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5212 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5213 		interrupt_id += 1;
5214 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5215 	}
5216 }
5217 
gaudi2_init_sm(struct hl_device * hdev)5218 static void gaudi2_init_sm(struct hl_device *hdev)
5219 {
5220 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5221 	u64 cq_address;
5222 	u32 reg_val;
5223 	int i;
5224 
5225 	/* Enable HBW/LBW CQ for completion monitors */
5226 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5227 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5228 
5229 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5230 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5231 
5232 	/* Enable only HBW CQ for KDMA completion monitor */
5233 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5234 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5235 
5236 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5237 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5238 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5239 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5240 
5241 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5242 		cq_address =
5243 			hdev->completion_queue[i].bus_address;
5244 
5245 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5246 							lower_32_bits(cq_address));
5247 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5248 							upper_32_bits(cq_address));
5249 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5250 							ilog2(HL_CQ_SIZE_IN_BYTES));
5251 	}
5252 
5253 	/* Configure kernel ASID and MMU BP*/
5254 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5255 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5256 
5257 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5258 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
5259 }
5260 
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)5261 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5262 {
5263 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5264 	u32 reg_val;
5265 	int i;
5266 
5267 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5268 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5269 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5270 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5271 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5272 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5273 
5274 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5275 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5276 
5277 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5278 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5279 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5280 	}
5281 }
5282 
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)5283 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5284 							bool config_qman_only)
5285 {
5286 	u32 queue_id_base, reg_base;
5287 
5288 	switch (dcore_id) {
5289 	case 0:
5290 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5291 		break;
5292 	case 1:
5293 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5294 		break;
5295 	case 2:
5296 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5297 		break;
5298 	case 3:
5299 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5300 		break;
5301 	default:
5302 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5303 		return;
5304 	}
5305 
5306 	if (!config_qman_only) {
5307 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5308 		gaudi2_init_mme_acc(hdev, reg_base);
5309 	}
5310 
5311 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5312 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
5313 }
5314 
gaudi2_init_mme(struct hl_device * hdev)5315 static void gaudi2_init_mme(struct hl_device *hdev)
5316 {
5317 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5318 	int i;
5319 
5320 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5321 		return;
5322 
5323 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5324 		gaudi2_init_dcore_mme(hdev, i, false);
5325 
5326 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5327 	}
5328 }
5329 
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)5330 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5331 {
5332 	/* Mask arithmetic and QM interrupts in TPC */
5333 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5334 
5335 	/* Set 16 cache lines */
5336 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5337 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5338 }
5339 
5340 struct gaudi2_tpc_init_cfg_data {
5341 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5342 };
5343 
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)5344 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5345 					u32 offset, struct iterate_module_ctx *ctx)
5346 {
5347 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5348 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5349 	u32 queue_id_base;
5350 	u8 seq;
5351 
5352 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5353 
5354 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5355 		/* gets last sequence number */
5356 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5357 	else
5358 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5359 
5360 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5361 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5362 
5363 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5364 }
5365 
gaudi2_init_tpc(struct hl_device * hdev)5366 static void gaudi2_init_tpc(struct hl_device *hdev)
5367 {
5368 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5369 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
5370 	struct iterate_module_ctx tpc_iter;
5371 
5372 	if (!hdev->asic_prop.tpc_enabled_mask)
5373 		return;
5374 
5375 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5376 		return;
5377 
5378 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5379 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5380 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5381 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5382 	tpc_iter.fn = &gaudi2_init_tpc_config;
5383 	tpc_iter.data = &init_cfg_data;
5384 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
5385 }
5386 
gaudi2_init_rotator(struct hl_device * hdev)5387 static void gaudi2_init_rotator(struct hl_device *hdev)
5388 {
5389 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5390 	u32 i, reg_base, queue_id;
5391 
5392 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5393 
5394 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5395 		reg_base = gaudi2_qm_blocks_bases[queue_id];
5396 		gaudi2_init_qman(hdev, reg_base, queue_id);
5397 
5398 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5399 	}
5400 }
5401 
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)5402 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5403 {
5404 	u32 sob_id;
5405 
5406 	/* VCMD normal interrupt */
5407 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5408 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5409 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5410 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5411 
5412 	/* VCMD abnormal interrupt */
5413 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5414 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5415 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5416 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5417 }
5418 
gaudi2_init_dec(struct hl_device * hdev)5419 static void gaudi2_init_dec(struct hl_device *hdev)
5420 {
5421 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5422 	u32 dcore_id, dec_id, dec_bit;
5423 	u64 base_addr;
5424 
5425 	if (!hdev->asic_prop.decoder_enabled_mask)
5426 		return;
5427 
5428 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5429 		return;
5430 
5431 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5432 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5433 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5434 
5435 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5436 				continue;
5437 
5438 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
5439 					BRDG_CTRL_BLOCK_OFFSET +
5440 					dcore_id * DCORE_OFFSET +
5441 					dec_id * DCORE_VDEC_OFFSET;
5442 
5443 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5444 
5445 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5446 		}
5447 
5448 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5449 		dec_bit = PCIE_DEC_SHIFT + dec_id;
5450 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5451 			continue;
5452 
5453 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5454 				dec_id * DCORE_VDEC_OFFSET;
5455 
5456 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5457 
5458 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5459 	}
5460 }
5461 
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)5462 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5463 					u32 stlb_base, u32 asid, u64 phys_addr)
5464 {
5465 	u32 status, timeout_usec;
5466 	int rc;
5467 
5468 	if (hdev->pldm || !hdev->pdev)
5469 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5470 	else
5471 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5472 
5473 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5474 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5475 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5476 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5477 
5478 	rc = hl_poll_timeout(
5479 		hdev,
5480 		stlb_base + STLB_BUSY_OFFSET,
5481 		status,
5482 		!(status & 0x80000000),
5483 		1000,
5484 		timeout_usec);
5485 
5486 	if (rc) {
5487 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5488 		return rc;
5489 	}
5490 
5491 	return 0;
5492 }
5493 
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)5494 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5495 					u32 start_offset, u32 inv_start_val,
5496 					u32 flags)
5497 {
5498 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
5499 	if (flags & MMU_OP_CLEAR_MEMCACHE)
5500 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5501 
5502 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5503 		return;
5504 
5505 	WREG32(stlb_base + start_offset, inv_start_val);
5506 }
5507 
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5508 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5509 						struct gaudi2_cache_invld_params *inv_params)
5510 {
5511 	u32 status, timeout_usec, start_offset;
5512 	int rc;
5513 
5514 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5515 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5516 
5517 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
5518 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5519 		rc = hl_poll_timeout(
5520 			hdev,
5521 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5522 			status,
5523 			status & 0x1,
5524 			1000,
5525 			timeout_usec);
5526 
5527 		if (rc)
5528 			return rc;
5529 
5530 		/* Need to manually reset the status to 0 */
5531 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5532 	}
5533 
5534 	/* Lower cache does not work with cache lines, hence we can skip its
5535 	 * invalidation upon map and invalidate only upon unmap
5536 	 */
5537 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5538 		return 0;
5539 
5540 	start_offset = inv_params->range_invalidation ?
5541 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5542 
5543 	rc = hl_poll_timeout(
5544 		hdev,
5545 		stlb_base + start_offset,
5546 		status,
5547 		!(status & 0x1),
5548 		1000,
5549 		timeout_usec);
5550 
5551 	return rc;
5552 }
5553 
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)5554 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5555 {
5556 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5557 	u32 hw_cap;
5558 
5559 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5560 
5561 	if (gaudi2->hw_cap_initialized & hw_cap)
5562 		return true;
5563 
5564 	return false;
5565 }
5566 
5567 /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)5568 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5569 {
5570 	u32 offset;
5571 
5572 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5573 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5574 }
5575 
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5576 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5577 						struct gaudi2_cache_invld_params *inv_params)
5578 {
5579 	u32 start_offset;
5580 
5581 	if (inv_params->range_invalidation) {
5582 		/* Set the addresses range
5583 		 * Note: that the start address we set in register, is not included in
5584 		 * the range of the invalidation, by design.
5585 		 * that's why we need to set lower address than the one we actually
5586 		 * want to be included in the range invalidation.
5587 		 */
5588 		u64 start = inv_params->start_va - 1;
5589 
5590 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5591 
5592 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5593 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5594 
5595 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5596 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5597 
5598 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5599 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5600 
5601 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5602 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5603 	} else {
5604 		start_offset = STLB_INV_ALL_START_OFFSET;
5605 	}
5606 
5607 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5608 						inv_params->inv_start_val, inv_params->flags);
5609 }
5610 
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5611 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5612 						int dcore_id, int hmmu_id,
5613 						struct gaudi2_cache_invld_params *inv_params)
5614 {
5615 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5616 
5617 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5618 }
5619 
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5620 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5621 						int dcore_id, int hmmu_id,
5622 						struct gaudi2_cache_invld_params *inv_params)
5623 {
5624 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5625 
5626 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5627 }
5628 
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)5629 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5630 						struct gaudi2_cache_invld_params *inv_params)
5631 {
5632 	int dcore_id, hmmu_id;
5633 
5634 	/* first send all invalidation commands */
5635 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5636 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5637 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5638 				continue;
5639 
5640 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5641 		}
5642 	}
5643 
5644 	/* next, poll all invalidations status */
5645 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5646 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5647 			int rc;
5648 
5649 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5650 				continue;
5651 
5652 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5653 										inv_params);
5654 			if (rc)
5655 				return rc;
5656 		}
5657 	}
5658 
5659 	return 0;
5660 }
5661 
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5662 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5663 {
5664 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5665 	struct gaudi2_cache_invld_params invld_params;
5666 	int rc = 0;
5667 
5668 	if (hdev->reset_info.hard_reset_pending)
5669 		return rc;
5670 
5671 	invld_params.range_invalidation = false;
5672 	invld_params.inv_start_val = 1;
5673 
5674 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5675 		invld_params.flags = flags;
5676 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5677 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5678 										&invld_params);
5679 	} else if (flags & MMU_OP_PHYS_PACK) {
5680 		invld_params.flags = 0;
5681 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5682 	}
5683 
5684 	return rc;
5685 }
5686 
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)5687 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5688 				u32 flags, u32 asid, u64 va, u64 size)
5689 {
5690 	struct gaudi2_cache_invld_params invld_params = {0};
5691 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5692 	u64 start_va, end_va;
5693 	u32 inv_start_val;
5694 	int rc = 0;
5695 
5696 	if (hdev->reset_info.hard_reset_pending)
5697 		return 0;
5698 
5699 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5700 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5701 			asid << MMU_RANGE_INV_ASID_SHIFT);
5702 	start_va = va;
5703 	end_va = start_va + size;
5704 
5705 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5706 		/* As range invalidation does not support zero address we will
5707 		 * do full invalidation in this case
5708 		 */
5709 		if (start_va) {
5710 			invld_params.range_invalidation = true;
5711 			invld_params.start_va = start_va;
5712 			invld_params.end_va = end_va;
5713 			invld_params.inv_start_val = inv_start_val;
5714 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5715 		} else {
5716 			invld_params.range_invalidation = false;
5717 			invld_params.inv_start_val = 1;
5718 			invld_params.flags = flags;
5719 		}
5720 
5721 
5722 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5723 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5724 										&invld_params);
5725 		if (rc)
5726 			return rc;
5727 
5728 	} else if (flags & MMU_OP_PHYS_PACK) {
5729 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5730 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5731 		invld_params.inv_start_val = inv_start_val;
5732 		invld_params.flags = flags;
5733 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5734 	}
5735 
5736 	return rc;
5737 }
5738 
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base)5739 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5740 {
5741 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5742 	u64 hop0_addr;
5743 	u32 asid, max_asid = prop->max_asid;
5744 	int rc;
5745 
5746 	/* it takes too much time to init all of the ASIDs on palladium */
5747 	if (hdev->pldm)
5748 		max_asid = min((u32) 8, max_asid);
5749 
5750 	for (asid = 0 ; asid < max_asid ; asid++) {
5751 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5752 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5753 		if (rc) {
5754 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5755 			return rc;
5756 		}
5757 	}
5758 
5759 	return 0;
5760 }
5761 
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base)5762 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5763 {
5764 	u32 status, timeout_usec;
5765 	int rc;
5766 
5767 	if (hdev->pldm || !hdev->pdev)
5768 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5769 	else
5770 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5771 
5772 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5773 
5774 	rc = hl_poll_timeout(
5775 		hdev,
5776 		stlb_base + STLB_SRAM_INIT_OFFSET,
5777 		status,
5778 		!status,
5779 		1000,
5780 		timeout_usec);
5781 
5782 	if (rc)
5783 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5784 
5785 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5786 	if (rc)
5787 		return rc;
5788 
5789 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5790 
5791 	rc = hl_poll_timeout(
5792 		hdev,
5793 		stlb_base + STLB_INV_ALL_START_OFFSET,
5794 		status,
5795 		!status,
5796 		1000,
5797 		timeout_usec);
5798 
5799 	if (rc)
5800 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5801 
5802 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5803 
5804 	return rc;
5805 }
5806 
gaudi2_pci_mmu_init(struct hl_device * hdev)5807 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5808 {
5809 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5810 	u32 mmu_base, stlb_base;
5811 	int rc;
5812 
5813 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5814 		return 0;
5815 
5816 	mmu_base = mmPMMU_HBW_MMU_BASE;
5817 	stlb_base = mmPMMU_HBW_STLB_BASE;
5818 
5819 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5820 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5821 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5822 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5823 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5824 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5825 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5826 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5827 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5828 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5829 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5830 
5831 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5832 
5833 	if (PAGE_SIZE == SZ_64K) {
5834 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5835 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5836 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5837 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5838 			FIELD_PREP(
5839 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5840 				1),
5841 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5842 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5843 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5844 	}
5845 
5846 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5847 
5848 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5849 	if (rc)
5850 		return rc;
5851 
5852 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5853 
5854 	return 0;
5855 }
5856 
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)5857 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5858 				int hmmu_id)
5859 {
5860 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5861 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5862 	u32 offset, mmu_base, stlb_base, hw_cap;
5863 	u8 dmmu_seq;
5864 	int rc;
5865 
5866 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5867 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5868 
5869 	/*
5870 	 * return if DMMU is already initialized or if it's not out of
5871 	 * isolation (due to cluster binning)
5872 	 */
5873 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5874 		return 0;
5875 
5876 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5877 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5878 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5879 
5880 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5881 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5882 
5883 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5884 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5885 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5886 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5887 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5888 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5889 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5890 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5891 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5892 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5893 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5894 
5895 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5896 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5897 
5898 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5899 
5900 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5901 	if (rc)
5902 		return rc;
5903 
5904 	gaudi2->hw_cap_initialized |= hw_cap;
5905 
5906 	return 0;
5907 }
5908 
gaudi2_hbm_mmu_init(struct hl_device * hdev)5909 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5910 {
5911 	int rc, dcore_id, hmmu_id;
5912 
5913 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5914 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5915 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5916 			if (rc)
5917 				return rc;
5918 		}
5919 
5920 	return 0;
5921 }
5922 
gaudi2_mmu_init(struct hl_device * hdev)5923 static int gaudi2_mmu_init(struct hl_device *hdev)
5924 {
5925 	int rc;
5926 
5927 	rc = gaudi2_pci_mmu_init(hdev);
5928 	if (rc)
5929 		return rc;
5930 
5931 	rc = gaudi2_hbm_mmu_init(hdev);
5932 	if (rc)
5933 		return rc;
5934 
5935 	return 0;
5936 }
5937 
gaudi2_hw_init(struct hl_device * hdev)5938 static int gaudi2_hw_init(struct hl_device *hdev)
5939 {
5940 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5941 	int rc;
5942 
5943 	/* Let's mark in the H/W that we have reached this point. We check
5944 	 * this value in the reset_before_init function to understand whether
5945 	 * we need to reset the chip before doing H/W init. This register is
5946 	 * cleared by the H/W upon H/W reset
5947 	 */
5948 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5949 
5950 	/* Perform read from the device to make sure device is up */
5951 	RREG32(mmHW_STATE);
5952 
5953 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5954 	 * So we set it here and if anyone tries to move it later to
5955 	 * a different address, there will be an error
5956 	 */
5957 	if (hdev->asic_prop.iatu_done_by_fw)
5958 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5959 
5960 	/*
5961 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5962 	 * base address of dram
5963 	 */
5964 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5965 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5966 		return -EIO;
5967 	}
5968 
5969 	rc = gaudi2_init_cpu(hdev);
5970 	if (rc) {
5971 		dev_err(hdev->dev, "failed to initialize CPU\n");
5972 		return rc;
5973 	}
5974 
5975 	gaudi2_init_scrambler_hbm(hdev);
5976 	gaudi2_init_kdma(hdev);
5977 
5978 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5979 	if (rc) {
5980 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5981 		return rc;
5982 	}
5983 
5984 	rc = gaudi2->cpucp_info_get(hdev);
5985 	if (rc) {
5986 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5987 		return rc;
5988 	}
5989 
5990 	rc = gaudi2_mmu_init(hdev);
5991 	if (rc)
5992 		return rc;
5993 
5994 	gaudi2_init_pdma(hdev);
5995 	gaudi2_init_edma(hdev);
5996 	gaudi2_init_sm(hdev);
5997 	gaudi2_init_tpc(hdev);
5998 	gaudi2_init_mme(hdev);
5999 	gaudi2_init_rotator(hdev);
6000 	gaudi2_init_dec(hdev);
6001 	gaudi2_enable_timestamp(hdev);
6002 
6003 	rc = gaudi2_coresight_init(hdev);
6004 	if (rc)
6005 		goto disable_queues;
6006 
6007 	rc = gaudi2_enable_msix(hdev);
6008 	if (rc)
6009 		goto disable_queues;
6010 
6011 	/* Perform read from the device to flush all configuration */
6012 	RREG32(mmHW_STATE);
6013 
6014 	return 0;
6015 
6016 disable_queues:
6017 	gaudi2_disable_dma_qmans(hdev);
6018 	gaudi2_disable_mme_qmans(hdev);
6019 	gaudi2_disable_tpc_qmans(hdev);
6020 	gaudi2_disable_rot_qmans(hdev);
6021 	gaudi2_disable_nic_qmans(hdev);
6022 
6023 	gaudi2_disable_timestamp(hdev);
6024 
6025 	return rc;
6026 }
6027 
6028 /**
6029  * gaudi2_send_hard_reset_cmd - common function to handle reset
6030  *
6031  * @hdev: pointer to the habanalabs device structure
6032  *
6033  * This function handles the various possible scenarios for reset.
6034  * It considers if reset is handled by driver\FW and what FW components are loaded
6035  */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)6036 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6037 {
6038 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6039 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
6040 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6041 	u32 cpu_boot_status;
6042 
6043 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6044 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6045 
6046 	/*
6047 	 * Handle corner case where failure was at cpu management app load,
6048 	 * and driver didn't detect any failure while loading the FW,
6049 	 * then at such scenario driver will send only HALT_MACHINE
6050 	 * and no one will respond to this request since FW already back to preboot
6051 	 * and it cannot handle such cmd.
6052 	 * In this case next time the management app loads it'll check on events register
6053 	 * which will still have the halt indication, and will reboot the device.
6054 	 * The solution is to let preboot clear all relevant registers before next boot
6055 	 * once driver send COMMS_RST_DEV.
6056 	 */
6057 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6058 
6059 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6060 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6061 		cpu_initialized = true;
6062 
6063 	/*
6064 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6065 	 * 1. FW reset: FW initiate the reset sequence
6066 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
6067 	 *                  reset but not the reset itself as it is not implemented
6068 	 *                  on their part) and LKD will wait to let FW complete the
6069 	 *                  sequence before issuing the reset
6070 	 */
6071 	if (!preboot_only && cpu_initialized) {
6072 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6073 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6074 
6075 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6076 	}
6077 
6078 	/*
6079 	 * When working with preboot (without Linux/Boot fit) we can
6080 	 * communicate only using the COMMS commands to issue halt/reset.
6081 	 *
6082 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6083 	 * attempt to revive the card in the small chance that the f/w has
6084 	 * experienced a watchdog event, which caused it to return back to preboot.
6085 	 * In that case, triggering reset through GIC won't help. We need to
6086 	 * trigger the reset as if Linux wasn't loaded.
6087 	 *
6088 	 * We do it only if the reset cause was HB, because that would be the
6089 	 * indication of such an event.
6090 	 *
6091 	 * In case watchdog hasn't expired but we still got HB, then this won't
6092 	 * do any damage.
6093 	 */
6094 
6095 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
6096 		if (hdev->asic_prop.hard_reset_done_by_fw)
6097 			hl_fw_ask_hard_reset_without_linux(hdev);
6098 		else
6099 			hl_fw_ask_halt_machine_without_linux(hdev);
6100 	}
6101 }
6102 
6103 /**
6104  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6105  *
6106  * @hdev: pointer to the habanalabs device structure
6107  *
6108  * This function executes hard reset based on if driver/FW should do the reset
6109  */
gaudi2_execute_hard_reset(struct hl_device * hdev)6110 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6111 {
6112 	if (hdev->asic_prop.hard_reset_done_by_fw) {
6113 		gaudi2_send_hard_reset_cmd(hdev);
6114 		return;
6115 	}
6116 
6117 	/* Set device to handle FLR by H/W as we will put the device
6118 	 * CPU to halt mode
6119 	 */
6120 	WREG32(mmPCIE_AUX_FLR_CTRL,
6121 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6122 
6123 	gaudi2_send_hard_reset_cmd(hdev);
6124 
6125 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6126 }
6127 
gaudi2_get_soft_rst_done_indication(struct hl_device * hdev,u32 poll_timeout_us)6128 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6129 {
6130 	int i, rc = 0;
6131 	u32 reg_val;
6132 
6133 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6134 		rc = hl_poll_timeout(
6135 			hdev,
6136 			mmCPU_RST_STATUS_TO_HOST,
6137 			reg_val,
6138 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6139 			1000,
6140 			poll_timeout_us);
6141 
6142 	if (rc)
6143 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6144 				reg_val);
6145 	return rc;
6146 }
6147 
6148 /**
6149  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6150  *
6151  * @hdev: pointer to the habanalabs device structure
6152  * @driver_performs_reset: true if driver should perform reset instead of f/w.
6153  * @poll_timeout_us: time to wait for response from f/w.
6154  *
6155  * This function executes soft reset based on if driver/FW should do the reset
6156  */
gaudi2_execute_soft_reset(struct hl_device * hdev,bool driver_performs_reset,u32 poll_timeout_us)6157 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6158 						u32 poll_timeout_us)
6159 {
6160 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6161 	int rc = 0;
6162 
6163 	if (!driver_performs_reset) {
6164 		if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
6165 			/* set SP to indicate reset request sent to FW */
6166 			if (dyn_regs->cpu_rst_status)
6167 				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6168 			else
6169 				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6170 			WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6171 				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6172 
6173 			/* wait for f/w response */
6174 			rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6175 		} else {
6176 			rc = hl_fw_send_soft_reset(hdev);
6177 		}
6178 		return rc;
6179 	}
6180 
6181 	/* Block access to engines, QMANs and SM during reset, these
6182 	 * RRs will be reconfigured after soft reset.
6183 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6184 	 */
6185 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6186 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6187 
6188 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6189 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6190 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6191 
6192 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6193 	return 0;
6194 }
6195 
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 poll_timeout_us)6196 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6197 {
6198 	int i, rc = 0;
6199 	u32 reg_val;
6200 
6201 	/* We poll the BTM done indication multiple times after reset due to
6202 	 * a HW errata 'GAUDI2_0300'
6203 	 */
6204 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6205 		rc = hl_poll_timeout(
6206 			hdev,
6207 			mmPSOC_GLOBAL_CONF_BTM_FSM,
6208 			reg_val,
6209 			reg_val == 0,
6210 			1000,
6211 			poll_timeout_us);
6212 
6213 	if (rc)
6214 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6215 }
6216 
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)6217 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6218 {
6219 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6220 	u32 poll_timeout_us, reset_sleep_ms;
6221 	bool driver_performs_reset = false;
6222 	int rc;
6223 
6224 	if (hdev->pldm) {
6225 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6226 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6227 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6228 	} else {
6229 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6230 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6231 	}
6232 
6233 	if (fw_reset)
6234 		goto skip_reset;
6235 
6236 	gaudi2_reset_arcs(hdev);
6237 
6238 	if (hard_reset) {
6239 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6240 		gaudi2_execute_hard_reset(hdev);
6241 	} else {
6242 		/*
6243 		 * As we have to support also work with preboot only (which does not supports
6244 		 * soft reset) we have to make sure that security is disabled before letting driver
6245 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6246 		 * secured device with preboot only.
6247 		 */
6248 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6249 							!hdev->asic_prop.fw_security_enabled);
6250 		rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6251 		if (rc)
6252 			return rc;
6253 	}
6254 
6255 skip_reset:
6256 	if (driver_performs_reset || hard_reset) {
6257 		/*
6258 		 * Instead of waiting for BTM indication we should wait for preboot ready:
6259 		 * Consider the below scenario:
6260 		 * 1. FW update is being triggered
6261 		 *        - setting the dirty bit
6262 		 * 2. hard reset will be triggered due to the dirty bit
6263 		 * 3. FW initiates the reset:
6264 		 *        - dirty bit cleared
6265 		 *        - BTM indication cleared
6266 		 *        - preboot ready indication cleared
6267 		 * 4. during hard reset:
6268 		 *        - BTM indication will be set
6269 		 *        - BIST test performed and another reset triggered
6270 		 * 5. only after this reset the preboot will set the preboot ready
6271 		 *
6272 		 * when polling on BTM indication alone we can lose sync with FW while trying to
6273 		 * communicate with FW that is during reset.
6274 		 * to overcome this we will always wait to preboot ready indication
6275 		 */
6276 
6277 		/* without this sleep reset will not work */
6278 		msleep(reset_sleep_ms);
6279 
6280 		if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6281 			hl_fw_wait_preboot_ready(hdev);
6282 		else
6283 			gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6284 	}
6285 
6286 	if (!gaudi2)
6287 		return 0;
6288 
6289 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6290 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6291 
6292 	/*
6293 	 * Clear NIC capability mask in order for driver to re-configure
6294 	 * NIC QMANs. NIC ports will not be re-configured during soft
6295 	 * reset as we call gaudi2_nic_init only during hard reset
6296 	 */
6297 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6298 
6299 	if (hard_reset) {
6300 		gaudi2->hw_cap_initialized &=
6301 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6302 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6303 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6304 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6305 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6306 
6307 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6308 	} else {
6309 		gaudi2->hw_cap_initialized &=
6310 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6311 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6312 			HW_CAP_ROT_MASK);
6313 	}
6314 	return 0;
6315 }
6316 
gaudi2_suspend(struct hl_device * hdev)6317 static int gaudi2_suspend(struct hl_device *hdev)
6318 {
6319 	int rc;
6320 
6321 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6322 	if (rc)
6323 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6324 
6325 	return rc;
6326 }
6327 
gaudi2_resume(struct hl_device * hdev)6328 static int gaudi2_resume(struct hl_device *hdev)
6329 {
6330 	return gaudi2_init_iatu(hdev);
6331 }
6332 
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)6333 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6334 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
6335 {
6336 	int rc;
6337 
6338 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6339 			VM_DONTCOPY | VM_NORESERVE);
6340 
6341 #ifdef _HAS_DMA_MMAP_COHERENT
6342 
6343 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6344 	if (rc)
6345 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6346 
6347 #else
6348 
6349 	rc = remap_pfn_range(vma, vma->vm_start,
6350 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6351 				size, vma->vm_page_prot);
6352 	if (rc)
6353 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6354 
6355 #endif
6356 
6357 	return rc;
6358 }
6359 
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)6360 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6361 {
6362 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6363 	u64 hw_cap_mask = 0;
6364 	u64 hw_tpc_cap_bit = 0;
6365 	u64 hw_nic_cap_bit = 0;
6366 	u64 hw_test_cap_bit = 0;
6367 
6368 	switch (hw_queue_id) {
6369 	case GAUDI2_QUEUE_ID_PDMA_0_0:
6370 	case GAUDI2_QUEUE_ID_PDMA_0_1:
6371 	case GAUDI2_QUEUE_ID_PDMA_1_0:
6372 		hw_cap_mask = HW_CAP_PDMA_MASK;
6373 		break;
6374 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6375 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6376 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6377 		break;
6378 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6379 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6380 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6381 		break;
6382 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6383 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6384 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6385 		break;
6386 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6387 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6388 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6389 		break;
6390 
6391 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6392 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
6393 		break;
6394 
6395 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6396 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6397 		break;
6398 
6399 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6400 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6401 		break;
6402 
6403 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6404 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6405 		break;
6406 
6407 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6408 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6409 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6410 
6411 		/* special case where cap bit refers to the first queue id */
6412 		if (!hw_tpc_cap_bit)
6413 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6414 		break;
6415 
6416 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6417 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6418 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6419 		break;
6420 
6421 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6422 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6423 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6424 		break;
6425 
6426 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6427 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6428 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6429 		break;
6430 
6431 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6432 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6433 		break;
6434 
6435 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6436 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6437 		break;
6438 
6439 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6440 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6441 
6442 		/* special case where cap bit refers to the first queue id */
6443 		if (!hw_nic_cap_bit)
6444 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6445 		break;
6446 
6447 	case GAUDI2_QUEUE_ID_CPU_PQ:
6448 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6449 
6450 	default:
6451 		return false;
6452 	}
6453 
6454 	if (hw_tpc_cap_bit)
6455 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6456 
6457 	if (hw_nic_cap_bit)
6458 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6459 
6460 	if (hw_test_cap_bit)
6461 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6462 
6463 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6464 }
6465 
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)6466 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6467 {
6468 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6469 
6470 	switch (arc_id) {
6471 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6472 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6473 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6474 
6475 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6476 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6477 
6478 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6479 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6480 
6481 	default:
6482 		return false;
6483 	}
6484 }
6485 
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)6486 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6487 {
6488 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6489 
6490 	switch (arc_id) {
6491 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6492 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6493 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6494 		break;
6495 
6496 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6497 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6498 		break;
6499 
6500 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6501 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6502 		break;
6503 
6504 	default:
6505 		return;
6506 	}
6507 }
6508 
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)6509 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6510 {
6511 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6512 
6513 	switch (arc_id) {
6514 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6515 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6516 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6517 		break;
6518 
6519 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6520 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6521 		break;
6522 
6523 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6524 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6525 		break;
6526 
6527 	default:
6528 		return;
6529 	}
6530 }
6531 
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)6532 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6533 {
6534 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6535 	u32 pq_offset, reg_base, db_reg_offset, db_value;
6536 
6537 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6538 		/*
6539 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6540 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6541 		 * number.
6542 		 */
6543 		pq_offset = (hw_queue_id & 0x3) * 4;
6544 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6545 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6546 	} else {
6547 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
6548 	}
6549 
6550 	db_value = pi;
6551 
6552 	/* ring the doorbell */
6553 	WREG32(db_reg_offset, db_value);
6554 
6555 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6556 		/* make sure device CPU will read latest data from host */
6557 		mb();
6558 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6559 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6560 	}
6561 }
6562 
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)6563 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6564 {
6565 	__le64 *pbd = (__le64 *) bd;
6566 
6567 	/* The QMANs are on the host memory so a simple copy suffice */
6568 	pqe[0] = pbd[0];
6569 	pqe[1] = pbd[1];
6570 }
6571 
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)6572 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6573 				dma_addr_t *dma_handle, gfp_t flags)
6574 {
6575 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6576 }
6577 
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)6578 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6579 				void *cpu_addr, dma_addr_t dma_handle)
6580 {
6581 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6582 }
6583 
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)6584 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6585 				u32 timeout, u64 *result)
6586 {
6587 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6588 
6589 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6590 		if (result)
6591 			*result = 0;
6592 		return 0;
6593 	}
6594 
6595 	if (!timeout)
6596 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6597 
6598 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6599 }
6600 
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)6601 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6602 				gfp_t mem_flags, dma_addr_t *dma_handle)
6603 {
6604 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6605 		return NULL;
6606 
6607 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6608 }
6609 
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)6610 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6611 {
6612 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6613 }
6614 
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)6615 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6616 						dma_addr_t *dma_handle)
6617 {
6618 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6619 }
6620 
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)6621 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6622 {
6623 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6624 }
6625 
gaudi2_dma_map_single(struct hl_device * hdev,void * addr,int len,enum dma_data_direction dir)6626 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6627 					enum dma_data_direction dir)
6628 {
6629 	dma_addr_t dma_addr;
6630 
6631 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6632 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6633 		return 0;
6634 
6635 	return dma_addr;
6636 }
6637 
gaudi2_dma_unmap_single(struct hl_device * hdev,dma_addr_t addr,int len,enum dma_data_direction dir)6638 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6639 					enum dma_data_direction dir)
6640 {
6641 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6642 }
6643 
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)6644 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6645 {
6646 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6647 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6648 
6649 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6650 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6651 		return -EINVAL;
6652 	}
6653 
6654 	/* Just check if CB address is valid */
6655 
6656 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6657 					parser->user_cb_size,
6658 					asic_prop->sram_user_base_address,
6659 					asic_prop->sram_end_address))
6660 		return 0;
6661 
6662 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6663 					parser->user_cb_size,
6664 					asic_prop->dram_user_base_address,
6665 					asic_prop->dram_end_address))
6666 		return 0;
6667 
6668 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6669 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6670 						parser->user_cb_size,
6671 						asic_prop->dmmu.start_addr,
6672 						asic_prop->dmmu.end_addr))
6673 		return 0;
6674 
6675 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6676 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6677 					parser->user_cb_size,
6678 					asic_prop->pmmu.start_addr,
6679 					asic_prop->pmmu.end_addr) ||
6680 			hl_mem_area_inside_range(
6681 					(u64) (uintptr_t) parser->user_cb,
6682 					parser->user_cb_size,
6683 					asic_prop->pmmu_huge.start_addr,
6684 					asic_prop->pmmu_huge.end_addr))
6685 			return 0;
6686 
6687 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6688 		if (!hdev->pdev)
6689 			return 0;
6690 
6691 		if (!device_iommu_mapped(&hdev->pdev->dev))
6692 			return 0;
6693 	}
6694 
6695 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6696 		parser->user_cb, parser->user_cb_size);
6697 
6698 	return -EFAULT;
6699 }
6700 
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)6701 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6702 {
6703 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6704 
6705 	if (!parser->is_kernel_allocated_cb)
6706 		return gaudi2_validate_cb_address(hdev, parser);
6707 
6708 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6709 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6710 		return -EINVAL;
6711 	}
6712 
6713 	return 0;
6714 }
6715 
gaudi2_send_heartbeat(struct hl_device * hdev)6716 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6717 {
6718 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6719 
6720 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6721 		return 0;
6722 
6723 	return hl_fw_send_heartbeat(hdev);
6724 }
6725 
6726 /* This is an internal helper function, used to update the KDMA mmu props.
6727  * Should be called with a proper kdma lock.
6728  */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)6729 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6730 					   bool mmu_bypass, u32 asid)
6731 {
6732 	u32 rw_asid, rw_mmu_bp;
6733 
6734 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6735 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6736 
6737 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6738 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6739 
6740 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6741 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6742 }
6743 
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6744 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6745 						u32 mon_payload, u32 sync_value)
6746 {
6747 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6748 	u8 mask;
6749 
6750 	sob_offset = sob_id * 4;
6751 	mon_offset = mon_id * 4;
6752 
6753 	/* Reset the SOB value */
6754 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6755 
6756 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6757 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6758 
6759 	/* Configure this address with CS index because CQ_EN is set */
6760 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6761 
6762 	sync_group_id = sob_id / 8;
6763 	mask = ~(1 << (sob_id & 0x7));
6764 	mode = 1; /* comparison mode is "equal to" */
6765 
6766 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6767 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6768 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6769 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6770 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6771 }
6772 
6773 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6774 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6775 					u64 src_addr, u64 dst_addr,
6776 					u32 size, bool is_memset)
6777 {
6778 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6779 	struct hl_cq_entry *cq_base;
6780 	struct hl_cq *cq;
6781 	u64 comp_addr;
6782 	int rc;
6783 
6784 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6785 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6786 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6787 
6788 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6789 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6790 
6791 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6792 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6793 
6794 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6795 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6796 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6797 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6798 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6799 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6800 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6801 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6802 
6803 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6804 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6805 
6806 	if (is_memset)
6807 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6808 
6809 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6810 
6811 	/* Wait for completion */
6812 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6813 	cq_base = cq->kernel_address;
6814 	polling_addr = (u32 *)&cq_base[cq->ci];
6815 
6816 	if (hdev->pldm)
6817 		/* for each 1MB 20 second of timeout */
6818 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6819 	else
6820 		timeout = KDMA_TIMEOUT_USEC;
6821 
6822 	/* Polling */
6823 	rc = hl_poll_timeout_memory(
6824 			hdev,
6825 			polling_addr,
6826 			status,
6827 			(status == 1),
6828 			1000,
6829 			timeout,
6830 			true);
6831 
6832 	*polling_addr = 0;
6833 
6834 	if (rc) {
6835 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6836 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6837 		return rc;
6838 	}
6839 
6840 	cq->ci = hl_cq_inc_ptr(cq->ci);
6841 
6842 	return 0;
6843 }
6844 
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6845 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6846 {
6847 	u32 i;
6848 
6849 	for (i = 0 ; i < size ; i += sizeof(u32))
6850 		WREG32(addr + i, val);
6851 }
6852 
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6853 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6854 {
6855 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6856 
6857 	if (enable) {
6858 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6859 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6860 	} else {
6861 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6862 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6863 	}
6864 }
6865 
gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device * hdev,u32 hw_queue_id)6866 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6867 {
6868 	return hdev->asic_prop.first_available_user_sob[0] +
6869 				hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6870 }
6871 
gaudi2_test_queue_clear(struct hl_device * hdev,u32 hw_queue_id)6872 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6873 {
6874 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6875 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6876 
6877 	/* Reset the SOB value */
6878 	WREG32(sob_addr, 0);
6879 }
6880 
gaudi2_test_queue_send_msg_short(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val,struct gaudi2_queues_test_info * msg_info)6881 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6882 					    struct gaudi2_queues_test_info *msg_info)
6883 {
6884 	u32 sob_offset =  gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6885 	u32 tmp, sob_base = 1;
6886 	struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6887 	size_t pkt_size = sizeof(struct packet_msg_short);
6888 	int rc;
6889 
6890 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6891 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6892 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6893 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6894 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6895 
6896 	msg_short_pkt->value = cpu_to_le32(sob_val);
6897 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6898 
6899 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6900 	if (rc)
6901 		dev_err(hdev->dev,
6902 			"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6903 
6904 	return rc;
6905 }
6906 
gaudi2_test_queue_wait_completion(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val)6907 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6908 {
6909 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6910 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6911 	u32 timeout_usec, tmp;
6912 	int rc;
6913 
6914 	if (hdev->pldm)
6915 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6916 	else
6917 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6918 
6919 	rc = hl_poll_timeout(
6920 			hdev,
6921 			sob_addr,
6922 			tmp,
6923 			(tmp == sob_val),
6924 			1000,
6925 			timeout_usec);
6926 
6927 	if (rc == -ETIMEDOUT) {
6928 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6929 			hw_queue_id, tmp);
6930 		rc = -EIO;
6931 	}
6932 
6933 	return rc;
6934 }
6935 
gaudi2_test_cpu_queue(struct hl_device * hdev)6936 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6937 {
6938 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6939 
6940 	/*
6941 	 * check capability here as send_cpu_message() won't update the result
6942 	 * value if no capability
6943 	 */
6944 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6945 		return 0;
6946 
6947 	return hl_fw_test_cpu_queue(hdev);
6948 }
6949 
gaudi2_test_queues(struct hl_device * hdev)6950 static int gaudi2_test_queues(struct hl_device *hdev)
6951 {
6952 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6953 	struct gaudi2_queues_test_info *msg_info;
6954 	u32 sob_val = 0x5a5a;
6955 	int i, rc;
6956 
6957 	/* send test message on all enabled Qs */
6958 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6959 		if (!gaudi2_is_queue_enabled(hdev, i))
6960 			continue;
6961 
6962 		msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
6963 		gaudi2_qman_set_test_mode(hdev, i, true);
6964 		gaudi2_test_queue_clear(hdev, i);
6965 		rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6966 		if (rc)
6967 			goto done;
6968 	}
6969 
6970 	rc = gaudi2_test_cpu_queue(hdev);
6971 	if (rc)
6972 		goto done;
6973 
6974 	/* verify that all messages were processed */
6975 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6976 		if (!gaudi2_is_queue_enabled(hdev, i))
6977 			continue;
6978 
6979 		rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6980 		if (rc)
6981 			/* chip is not usable, no need for cleanups, just bail-out with error */
6982 			goto done;
6983 
6984 		gaudi2_test_queue_clear(hdev, i);
6985 		gaudi2_qman_set_test_mode(hdev, i, false);
6986 	}
6987 
6988 done:
6989 	return rc;
6990 }
6991 
gaudi2_compute_reset_late_init(struct hl_device * hdev)6992 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6993 {
6994 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6995 	size_t irq_arr_size;
6996 	int rc;
6997 
6998 	gaudi2_init_arcs(hdev);
6999 
7000 	rc = gaudi2_scrub_arcs_dccm(hdev);
7001 	if (rc) {
7002 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7003 		return rc;
7004 	}
7005 
7006 	gaudi2_init_security(hdev);
7007 
7008 	/* Unmask all IRQs since some could have been received during the soft reset */
7009 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7010 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7011 }
7012 
gaudi2_get_edma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7013 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7014 		struct engines_data *e)
7015 {
7016 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7017 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7018 	unsigned long *mask = (unsigned long *) mask_arr;
7019 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7020 	bool is_idle = true, is_eng_idle;
7021 	int engine_idx, i, j;
7022 	u64 offset;
7023 
7024 	if (e)
7025 		hl_engine_data_sprintf(e,
7026 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7027 			"----  ----  -------  ------------  -------------  -------------\n");
7028 
7029 	for (i = 0; i < NUM_OF_DCORES; i++) {
7030 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7031 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7032 
7033 			if (!(prop->edma_enabled_mask & BIT(seq)))
7034 				continue;
7035 
7036 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7037 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7038 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7039 
7040 			dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7041 			dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7042 
7043 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7044 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7045 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7046 
7047 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7048 					IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7049 			is_idle &= is_eng_idle;
7050 
7051 			if (mask && !is_eng_idle)
7052 				set_bit(engine_idx, mask);
7053 
7054 			if (e)
7055 				hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7056 							qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7057 		}
7058 	}
7059 
7060 	return is_idle;
7061 }
7062 
gaudi2_get_pdma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7063 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7064 		struct engines_data *e)
7065 {
7066 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7067 	unsigned long *mask = (unsigned long *) mask_arr;
7068 	const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7069 	bool is_idle = true, is_eng_idle;
7070 	int engine_idx, i;
7071 	u64 offset;
7072 
7073 	if (e)
7074 		hl_engine_data_sprintf(e,
7075 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7076 					"----  -------  ------------  -------------  -------------\n");
7077 
7078 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7079 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7080 		offset = i * PDMA_OFFSET;
7081 		dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7082 		dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7083 
7084 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7085 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7086 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7087 
7088 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7089 				IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7090 		is_idle &= is_eng_idle;
7091 
7092 		if (mask && !is_eng_idle)
7093 			set_bit(engine_idx, mask);
7094 
7095 		if (e)
7096 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7097 						qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7098 	}
7099 
7100 	return is_idle;
7101 }
7102 
gaudi2_get_nic_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7103 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7104 		struct engines_data *e)
7105 {
7106 	unsigned long *mask = (unsigned long *) mask_arr;
7107 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7108 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7109 	bool is_idle = true, is_eng_idle;
7110 	int engine_idx, i;
7111 	u64 offset = 0;
7112 
7113 	/* NIC, twelve macros in Full chip */
7114 	if (e && hdev->nic_ports_mask)
7115 		hl_engine_data_sprintf(e,
7116 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7117 					"---  -------  ------------  ----------\n");
7118 
7119 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7120 		if (!(i & 1))
7121 			offset = i / 2 * NIC_OFFSET;
7122 		else
7123 			offset += NIC_QM_OFFSET;
7124 
7125 		if (!(hdev->nic_ports_mask & BIT(i)))
7126 			continue;
7127 
7128 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7129 
7130 
7131 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7132 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7133 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7134 
7135 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7136 		is_idle &= is_eng_idle;
7137 
7138 		if (mask && !is_eng_idle)
7139 			set_bit(engine_idx, mask);
7140 
7141 		if (e)
7142 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7143 						qm_glbl_sts0, qm_cgm_sts);
7144 	}
7145 
7146 	return is_idle;
7147 }
7148 
gaudi2_get_mme_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7149 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7150 		struct engines_data *e)
7151 {
7152 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7153 	unsigned long *mask = (unsigned long *) mask_arr;
7154 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7155 	bool is_idle = true, is_eng_idle;
7156 	int engine_idx, i;
7157 	u64 offset;
7158 
7159 	if (e)
7160 		hl_engine_data_sprintf(e,
7161 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7162 					"---  ----  -------  ------------  ---------------\n");
7163 	/* MME, one per Dcore */
7164 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7165 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7166 		offset = i * DCORE_OFFSET;
7167 
7168 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7169 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7170 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7171 
7172 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7173 		is_idle &= is_eng_idle;
7174 
7175 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7176 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7177 		is_idle &= is_eng_idle;
7178 
7179 		if (e)
7180 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
7181 				is_eng_idle ? "Y" : "N",
7182 				qm_glbl_sts0,
7183 				mme_arch_sts);
7184 
7185 		if (mask && !is_eng_idle)
7186 			set_bit(engine_idx, mask);
7187 	}
7188 
7189 	return is_idle;
7190 }
7191 
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7192 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7193 					struct iterate_module_ctx *ctx)
7194 {
7195 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7196 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7197 	bool is_eng_idle;
7198 	int engine_idx;
7199 
7200 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7201 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7202 	else
7203 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7204 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7205 
7206 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7207 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7208 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7209 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7210 
7211 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7212 						IS_TPC_IDLE(tpc_cfg_sts);
7213 	*(idle_data->is_idle) &= is_eng_idle;
7214 
7215 	if (idle_data->mask && !is_eng_idle)
7216 		set_bit(engine_idx, idle_data->mask);
7217 
7218 	if (idle_data->e)
7219 		hl_engine_data_sprintf(idle_data->e,
7220 					idle_data->tpc_fmt, dcore, inst,
7221 					is_eng_idle ? "Y" : "N",
7222 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7223 }
7224 
gaudi2_get_tpc_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7225 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7226 		struct engines_data *e)
7227 {
7228 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7229 	unsigned long *mask = (unsigned long *) mask_arr;
7230 	bool is_idle = true;
7231 
7232 	struct gaudi2_tpc_idle_data tpc_idle_data = {
7233 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7234 		.e = e,
7235 		.mask = mask,
7236 		.is_idle = &is_idle,
7237 	};
7238 	struct iterate_module_ctx tpc_iter = {
7239 		.fn = &gaudi2_is_tpc_engine_idle,
7240 		.data = &tpc_idle_data,
7241 	};
7242 
7243 	if (e && prop->tpc_enabled_mask)
7244 		hl_engine_data_sprintf(e,
7245 			"\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7246 			"----  ---  -------  ------------  ----------  ------\n");
7247 
7248 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7249 
7250 	return *tpc_idle_data.is_idle;
7251 }
7252 
gaudi2_get_decoder_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7253 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7254 		struct engines_data *e)
7255 {
7256 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7257 	unsigned long *mask = (unsigned long *) mask_arr;
7258 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7259 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7260 	bool is_idle = true, is_eng_idle;
7261 	u32 dec_swreg15, dec_enabled_bit;
7262 	int engine_idx, i, j;
7263 	u64 offset;
7264 
7265 	/* Decoders, two each Dcore and two shared PCIe decoders */
7266 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7267 		hl_engine_data_sprintf(e,
7268 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7269 			"----  ---  -------  ---------------\n");
7270 
7271 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7272 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7273 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7274 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7275 				continue;
7276 
7277 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7278 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7279 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7280 
7281 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7282 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7283 			is_idle &= is_eng_idle;
7284 
7285 			if (mask && !is_eng_idle)
7286 				set_bit(engine_idx, mask);
7287 
7288 			if (e)
7289 				hl_engine_data_sprintf(e, dec_fmt, i, j,
7290 							is_eng_idle ? "Y" : "N", dec_swreg15);
7291 		}
7292 	}
7293 
7294 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7295 		hl_engine_data_sprintf(e,
7296 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7297 			"--------  -------  ---------------\n");
7298 
7299 	/* Check shared(PCIe) decoders */
7300 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7301 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
7302 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7303 			continue;
7304 
7305 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7306 		offset = i * DCORE_DEC_OFFSET;
7307 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7308 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7309 		is_idle &= is_eng_idle;
7310 
7311 		if (mask && !is_eng_idle)
7312 			set_bit(engine_idx, mask);
7313 
7314 		if (e)
7315 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7316 						is_eng_idle ? "Y" : "N", dec_swreg15);
7317 	}
7318 
7319 	return is_idle;
7320 }
7321 
gaudi2_get_rotator_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7322 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7323 		struct engines_data *e)
7324 {
7325 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7326 	unsigned long *mask = (unsigned long *) mask_arr;
7327 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7328 	bool is_idle = true, is_eng_idle;
7329 	int engine_idx, i;
7330 	u64 offset;
7331 
7332 	if (e)
7333 		hl_engine_data_sprintf(e,
7334 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7335 			"----  ---  -------  ------------  ------------  ----------\n");
7336 
7337 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7338 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7339 
7340 		offset = i * ROT_OFFSET;
7341 
7342 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7343 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7344 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7345 
7346 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7347 		is_idle &= is_eng_idle;
7348 
7349 		if (mask && !is_eng_idle)
7350 			set_bit(engine_idx, mask);
7351 
7352 		if (e)
7353 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7354 						qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7355 	}
7356 
7357 	return is_idle;
7358 }
7359 
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7360 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7361 					struct engines_data *e)
7362 {
7363 	bool is_idle = true;
7364 
7365 	is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7366 	is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7367 	is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7368 	is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7369 	is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7370 	is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7371 	is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7372 
7373 	return is_idle;
7374 }
7375 
gaudi2_hw_queues_lock(struct hl_device * hdev)7376 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7377 	__acquires(&gaudi2->hw_queues_lock)
7378 {
7379 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7380 
7381 	spin_lock(&gaudi2->hw_queues_lock);
7382 }
7383 
gaudi2_hw_queues_unlock(struct hl_device * hdev)7384 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7385 	__releases(&gaudi2->hw_queues_lock)
7386 {
7387 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7388 
7389 	spin_unlock(&gaudi2->hw_queues_lock);
7390 }
7391 
gaudi2_get_pci_id(struct hl_device * hdev)7392 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7393 {
7394 	return hdev->pdev->device;
7395 }
7396 
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)7397 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7398 {
7399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7400 
7401 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7402 		return 0;
7403 
7404 	return hl_fw_get_eeprom_data(hdev, data, max_size);
7405 }
7406 
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)7407 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7408 {
7409 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7410 }
7411 
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7412 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7413 {
7414 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7415 
7416 	if (aggregate) {
7417 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
7418 		return gaudi2->events_stat_aggregate;
7419 	}
7420 
7421 	*size = (u32) sizeof(gaudi2->events_stat);
7422 	return gaudi2->events_stat;
7423 }
7424 
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7425 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7426 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7427 {
7428 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7429 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
7430 
7431 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7432 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7433 
7434 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7435 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7436 
7437 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7438 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7439 
7440 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7441 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7442 
7443 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7444 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7445 }
7446 
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)7447 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7448 {
7449 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7450 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7451 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7452 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
7453 	u32 vdec_id, i, ports_offset, reg_val;
7454 	u8 edma_seq_base;
7455 
7456 	/* EDMA */
7457 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7458 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7459 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7460 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7461 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7462 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7463 	}
7464 
7465 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7466 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7467 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7468 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7469 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7470 	}
7471 
7472 	/* Sync Mngr */
7473 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7474 	/*
7475 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7476 	 * for any access type
7477 	 */
7478 	if (dcore_id > 0) {
7479 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7480 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7481 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7482 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7483 	}
7484 
7485 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7486 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7487 
7488 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7489 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
7490 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7491 				dcore_offset + ports_offset, 0);
7492 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7493 				dcore_offset + ports_offset, rw_asid);
7494 	}
7495 
7496 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7497 		ports_offset = i * DCORE_MME_WB_OFFSET;
7498 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7499 				dcore_offset + ports_offset, 0);
7500 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7501 				dcore_offset + ports_offset, rw_asid);
7502 	}
7503 
7504 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7505 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7506 
7507 	/*
7508 	 * Decoders
7509 	 */
7510 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7511 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7512 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7513 	}
7514 }
7515 
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7516 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7517 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7518 {
7519 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7520 
7521 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7522 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7523 
7524 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7525 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7526 
7527 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7528 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7529 
7530 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7531 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7532 
7533 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7534 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7535 }
7536 
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)7537 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7538 							u32 rw_asid, u32 rw_mmu_bp)
7539 {
7540 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7541 
7542 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7543 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7544 }
7545 
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)7546 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7547 {
7548 	u32 reg_base, reg_offset, reg_val = 0;
7549 
7550 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
7551 
7552 	/* Enable MMU and configure asid for all relevant ARC regions */
7553 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7554 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7555 
7556 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7557 	WREG32(reg_base + reg_offset, reg_val);
7558 
7559 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7560 	WREG32(reg_base + reg_offset, reg_val);
7561 
7562 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7563 	WREG32(reg_base + reg_offset, reg_val);
7564 
7565 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7566 	WREG32(reg_base + reg_offset, reg_val);
7567 
7568 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7569 	WREG32(reg_base + reg_offset, reg_val);
7570 
7571 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7572 	WREG32(reg_base + reg_offset, reg_val);
7573 
7574 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7575 	WREG32(reg_base + reg_offset, reg_val);
7576 
7577 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7578 	WREG32(reg_base + reg_offset, reg_val);
7579 
7580 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7581 	WREG32(reg_base + reg_offset, reg_val);
7582 
7583 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7584 	WREG32(reg_base + reg_offset, reg_val);
7585 
7586 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7587 	WREG32(reg_base + reg_offset, reg_val);
7588 }
7589 
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)7590 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7591 {
7592 	int i;
7593 
7594 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7595 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7596 
7597 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7598 		gaudi2_arc_mmu_prepare(hdev, i, asid);
7599 
7600 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7601 		if (!gaudi2_is_queue_enabled(hdev, i))
7602 			continue;
7603 
7604 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7605 	}
7606 
7607 	return 0;
7608 }
7609 
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)7610 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7611 {
7612 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7613 	u32 rw_asid, offset;
7614 	int rc, i;
7615 
7616 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7617 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7618 
7619 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7620 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7621 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7622 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7623 
7624 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7625 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7626 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7627 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7628 
7629 	/* ROT */
7630 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7631 		offset = i * ROT_OFFSET;
7632 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7633 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7634 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7635 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7636 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7637 	}
7638 
7639 	/* Shared Decoders are the last bits in the decoders mask */
7640 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7641 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7642 
7643 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7644 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7645 
7646 	/* arc farm arc dup eng */
7647 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7648 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7649 
7650 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7651 	if (rc)
7652 		return rc;
7653 
7654 	return 0;
7655 }
7656 
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7657 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
7658 					struct iterate_module_ctx *ctx)
7659 {
7660 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7661 
7662 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7663 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7664 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7665 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7666 }
7667 
7668 /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)7669 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7670 {
7671 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7672 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
7673 	struct iterate_module_ctx tpc_iter = {
7674 		.fn = &gaudi2_tpc_mmu_prepare,
7675 		.data = &tpc_mmu_data,
7676 	};
7677 	int rc, i;
7678 
7679 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7680 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
7681 		return -EINVAL;
7682 	}
7683 
7684 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7685 		return 0;
7686 
7687 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
7688 	if (rc)
7689 		return rc;
7690 
7691 	/* configure DCORE MMUs */
7692 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7693 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7694 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7695 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
7696 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
7697 
7698 	return 0;
7699 }
7700 
is_info_event(u32 event)7701 static inline bool is_info_event(u32 event)
7702 {
7703 	switch (event) {
7704 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7705 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7706 
7707 	/* return in case of NIC status event - these events are received periodically and not as
7708 	 * an indication to an error.
7709 	 */
7710 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7711 		return true;
7712 	default:
7713 		return false;
7714 	}
7715 }
7716 
gaudi2_print_event(struct hl_device * hdev,u16 event_type,bool ratelimited,const char * fmt,...)7717 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7718 			bool ratelimited, const char *fmt, ...)
7719 {
7720 	struct va_format vaf;
7721 	va_list args;
7722 
7723 	va_start(args, fmt);
7724 	vaf.fmt = fmt;
7725 	vaf.va = &args;
7726 
7727 	if (ratelimited)
7728 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7729 			gaudi2_irq_map_table[event_type].valid ?
7730 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7731 	else
7732 		dev_err(hdev->dev, "%s: %pV\n",
7733 			gaudi2_irq_map_table[event_type].valid ?
7734 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7735 
7736 	va_end(args);
7737 }
7738 
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7739 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7740 		struct hl_eq_ecc_data *ecc_data)
7741 {
7742 	u64 ecc_address = 0, ecc_syndrom = 0;
7743 	u8 memory_wrapper_idx = 0;
7744 
7745 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7746 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7747 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7748 
7749 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7750 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7751 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7752 
7753 	return !!ecc_data->is_critical;
7754 }
7755 
print_lower_qman_data_on_err(struct hl_device * hdev,u64 qman_base)7756 static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
7757 {
7758 	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
7759 	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
7760 
7761 	lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
7762 	hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
7763 	cq_ptr = ((u64) hi) << 32 | lo;
7764 	cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
7765 
7766 	lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
7767 	hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
7768 	arc_cq_ptr = ((u64) hi) << 32 | lo;
7769 	arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
7770 
7771 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7772 	hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7773 	cp_current_inst = ((u64) hi) << 32 | lo;
7774 
7775 	dev_info(hdev->dev,
7776 		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
7777 		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
7778 }
7779 
gaudi2_handle_qman_err_generic(struct hl_device * hdev,u16 event_type,u64 qman_base,u32 qid_base)7780 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7781 							u64 qman_base, u32 qid_base)
7782 {
7783 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7784 	u64 glbl_sts_addr, arb_err_addr;
7785 	char reg_desc[32];
7786 
7787 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7788 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7789 
7790 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7791 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7792 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7793 
7794 		if (!glbl_sts_val)
7795 			continue;
7796 
7797 		if (i == QMAN_STREAMS) {
7798 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7799 			num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
7800 		} else {
7801 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7802 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7803 		}
7804 
7805 		for (j = 0 ; j < num_error_causes ; j++)
7806 			if (glbl_sts_val & BIT(j)) {
7807 				gaudi2_print_event(hdev, event_type, true,
7808 					"%s. err cause: %s", reg_desc,
7809 					i == QMAN_STREAMS ?
7810 					gaudi2_lower_qman_error_cause[j] :
7811 					gaudi2_qman_error_cause[j]);
7812 				error_count++;
7813 			}
7814 
7815 		if (i == QMAN_STREAMS)
7816 			print_lower_qman_data_on_err(hdev, qman_base);
7817 	}
7818 
7819 	arb_err_val = RREG32(arb_err_addr);
7820 
7821 	if (!arb_err_val)
7822 		goto out;
7823 
7824 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7825 		if (arb_err_val & BIT(j)) {
7826 			gaudi2_print_event(hdev, event_type, true,
7827 				"ARB_ERR. err cause: %s",
7828 				gaudi2_qman_arb_error_cause[j]);
7829 			error_count++;
7830 		}
7831 	}
7832 
7833 out:
7834 	return error_count;
7835 }
7836 
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)7837 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7838 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7839 			enum gaudi2_engine_id id, u64 *event_mask)
7840 {
7841 	u32 razwi_hi, razwi_lo, razwi_xy;
7842 	u16 eng_id = id;
7843 	u8 rd_wr_flag;
7844 
7845 	if (is_write) {
7846 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7847 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7848 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7849 		rd_wr_flag = HL_RAZWI_WRITE;
7850 	} else {
7851 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7852 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7853 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7854 		rd_wr_flag = HL_RAZWI_READ;
7855 	}
7856 
7857 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7858 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7859 
7860 	dev_err_ratelimited(hdev->dev,
7861 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7862 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7863 }
7864 
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)7865 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7866 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7867 			enum gaudi2_engine_id id, u64 *event_mask)
7868 {
7869 	u64 razwi_addr = CFG_BASE;
7870 	u32 razwi_xy;
7871 	u16 eng_id = id;
7872 	u8 rd_wr_flag;
7873 
7874 	if (is_write) {
7875 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7876 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7877 		rd_wr_flag = HL_RAZWI_WRITE;
7878 	} else {
7879 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7880 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7881 		rd_wr_flag = HL_RAZWI_READ;
7882 	}
7883 
7884 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7885 	dev_err_ratelimited(hdev->dev,
7886 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7887 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7888 						razwi_xy);
7889 }
7890 
gaudi2_razwi_calc_engine_id(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx)7891 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7892 						enum razwi_event_sources module, u8 module_idx)
7893 {
7894 	switch (module) {
7895 	case RAZWI_TPC:
7896 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7897 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7898 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7899 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7900 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7901 
7902 	case RAZWI_MME:
7903 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7904 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7905 
7906 	case RAZWI_EDMA:
7907 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7908 			(module_idx % NUM_OF_EDMA_PER_DCORE));
7909 
7910 	case RAZWI_PDMA:
7911 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7912 
7913 	case RAZWI_NIC:
7914 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7915 
7916 	case RAZWI_DEC:
7917 		if (module_idx == 8)
7918 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7919 
7920 		if (module_idx == 9)
7921 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7922 					;
7923 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7924 				(module_idx % NUM_OF_DEC_PER_DCORE) +
7925 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7926 
7927 	case RAZWI_ROT:
7928 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7929 
7930 	default:
7931 		return GAUDI2_ENGINE_ID_SIZE;
7932 	}
7933 }
7934 
7935 /*
7936  * This function handles RR(Range register) hit events.
7937  * raised be initiators not PSOC RAZWI.
7938  */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,u64 * event_mask)7939 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7940 				enum razwi_event_sources module, u8 module_idx,
7941 				u8 module_sub_idx, u64 *event_mask)
7942 {
7943 	bool via_sft = false;
7944 	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
7945 	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
7946 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7947 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7948 	char initiator_name[64];
7949 
7950 	switch (module) {
7951 	case RAZWI_TPC:
7952 		sprintf(initiator_name, "TPC_%u", module_idx);
7953 		if (hdev->tpc_binning) {
7954 			binned_idx = __ffs(hdev->tpc_binning);
7955 			if (binned_idx == module_idx)
7956 				module_idx = TPC_ID_DCORE0_TPC6;
7957 		}
7958 
7959 		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
7960 
7961 		if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
7962 				!hdev->asic_prop.fw_security_enabled &&
7963 				((module_idx == 0) || (module_idx == 1)))
7964 			lbw_rtr_id = DCORE0_RTR0;
7965 		else
7966 			lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
7967 		break;
7968 	case RAZWI_MME:
7969 		sprintf(initiator_name, "MME_%u", module_idx);
7970 		switch (module_sub_idx) {
7971 		case MME_WAP0:
7972 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7973 			break;
7974 		case MME_WAP1:
7975 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7976 			break;
7977 		case MME_WRITE:
7978 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7979 			break;
7980 		case MME_READ:
7981 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7982 			break;
7983 		case MME_SBTE0:
7984 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7985 			break;
7986 		case MME_SBTE1:
7987 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7988 			break;
7989 		case MME_SBTE2:
7990 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7991 			break;
7992 		case MME_SBTE3:
7993 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7994 			break;
7995 		case MME_SBTE4:
7996 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7997 			break;
7998 		default:
7999 			return;
8000 		}
8001 		lbw_rtr_id = hbw_rtr_id;
8002 		break;
8003 	case RAZWI_EDMA:
8004 		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8005 		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8006 		/* SFT has separate MSTR_IF for LBW, only there we can
8007 		 * read the LBW razwi related registers
8008 		 */
8009 		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8010 								dcore_id * SFT_DCORE_OFFSET;
8011 		via_sft = true;
8012 		sprintf(initiator_name, "EDMA_%u", module_idx);
8013 		break;
8014 	case RAZWI_PDMA:
8015 		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8016 		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8017 		sprintf(initiator_name, "PDMA_%u", module_idx);
8018 		break;
8019 	case RAZWI_NIC:
8020 		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8021 		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8022 		sprintf(initiator_name, "NIC_%u", module_idx);
8023 		break;
8024 	case RAZWI_DEC:
8025 		sprintf(initiator_name, "DEC_%u", module_idx);
8026 		if (hdev->decoder_binning) {
8027 			binned_idx = __ffs(hdev->decoder_binning);
8028 			if (binned_idx == module_idx)
8029 				module_idx = DEC_ID_PCIE_VDEC1;
8030 		}
8031 		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8032 		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8033 		break;
8034 	case RAZWI_ROT:
8035 		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8036 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8037 		sprintf(initiator_name, "ROT_%u", module_idx);
8038 		break;
8039 	default:
8040 		return;
8041 	}
8042 
8043 	/* Find router mstr_if register base */
8044 	if (!via_sft) {
8045 		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8046 		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8047 		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8048 				dcore_id * DCORE_OFFSET +
8049 				dcore_rtr_id * DCORE_RTR_OFFSET +
8050 				RTR_MSTR_IF_OFFSET;
8051 		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8052 				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8053 	}
8054 
8055 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
8056 	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8057 	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8058 	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8059 	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8060 
8061 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8062 	if (hbw_shrd_aw) {
8063 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8064 						initiator_name, eng_id, event_mask);
8065 
8066 		/* Clear event indication */
8067 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8068 	}
8069 
8070 	if (hbw_shrd_ar) {
8071 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8072 						initiator_name, eng_id, event_mask);
8073 
8074 		/* Clear event indication */
8075 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8076 	}
8077 
8078 	if (lbw_shrd_aw) {
8079 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8080 						initiator_name, eng_id, event_mask);
8081 
8082 		/* Clear event indication */
8083 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8084 	}
8085 
8086 	if (lbw_shrd_ar) {
8087 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8088 						initiator_name, eng_id, event_mask);
8089 
8090 		/* Clear event indication */
8091 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8092 	}
8093 }
8094 
gaudi2_check_if_razwi_happened(struct hl_device * hdev)8095 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8096 {
8097 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8098 	u8 mod_idx, sub_mod;
8099 
8100 	/* check all TPCs */
8101 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8102 		if (prop->tpc_enabled_mask & BIT(mod_idx))
8103 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8104 	}
8105 
8106 	/* check all MMEs */
8107 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8108 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8109 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8110 									sub_mod, NULL);
8111 
8112 	/* check all EDMAs */
8113 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8114 		if (prop->edma_enabled_mask & BIT(mod_idx))
8115 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8116 
8117 	/* check all PDMAs */
8118 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8119 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8120 
8121 	/* check all NICs */
8122 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8123 		if (hdev->nic_ports_mask & BIT(mod_idx))
8124 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8125 								NULL);
8126 
8127 	/* check all DECs */
8128 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8129 		if (prop->decoder_enabled_mask & BIT(mod_idx))
8130 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8131 
8132 	/* check all ROTs */
8133 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8134 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8135 }
8136 
gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info * razwi_info,u32 array_size,u32 axuser_xy,u32 * base,u16 * eng_id,char * eng_name)8137 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8138 						u32 axuser_xy, u32 *base, u16 *eng_id,
8139 						char *eng_name)
8140 {
8141 
8142 	int i, num_of_eng = 0;
8143 	u16 str_size = 0;
8144 
8145 	for (i = 0 ; i < array_size ; i++) {
8146 		if (axuser_xy != razwi_info[i].axuser_xy)
8147 			continue;
8148 
8149 		eng_id[num_of_eng] = razwi_info[i].eng_id;
8150 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
8151 		if (!num_of_eng)
8152 			str_size += snprintf(eng_name + str_size,
8153 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8154 						razwi_info[i].eng_name);
8155 		else
8156 			str_size += snprintf(eng_name + str_size,
8157 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8158 						razwi_info[i].eng_name);
8159 		num_of_eng++;
8160 	}
8161 
8162 	return num_of_eng;
8163 }
8164 
gaudi2_handle_psoc_razwi_happened(struct hl_device * hdev,u32 razwi_reg,u64 * event_mask)8165 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8166 						u64 *event_mask)
8167 {
8168 	u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8169 	u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8170 	u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8171 	char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8172 	bool razwi_happened = false;
8173 	u64 addr;
8174 	int i;
8175 
8176 	num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8177 							axuser_xy, base, eng_id, eng_name_str);
8178 
8179 	/* If no match for XY coordinates, try to find it in MME razwi table */
8180 	if (!num_of_eng) {
8181 		axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8182 		num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8183 								ARRAY_SIZE(mme_razwi_info),
8184 								axuser_xy, base, eng_id,
8185 								eng_name_str);
8186 	}
8187 
8188 	for  (i = 0 ; i < num_of_eng ; i++) {
8189 		if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8190 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8191 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8192 			addr = ((u64)addr_hi << 32) + addr_lo;
8193 			if (addr) {
8194 				dev_err(hdev->dev,
8195 					"PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8196 					eng_name_str, addr);
8197 				hl_handle_razwi(hdev, addr, &eng_id[0],
8198 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8199 				razwi_happened = true;
8200 			}
8201 		}
8202 
8203 		if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8204 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8205 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8206 			addr = ((u64)addr_hi << 32) + addr_lo;
8207 			if (addr) {
8208 				dev_err(hdev->dev,
8209 					"PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8210 					eng_name_str, addr);
8211 				hl_handle_razwi(hdev, addr, &eng_id[0],
8212 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8213 				razwi_happened = true;
8214 			}
8215 		}
8216 
8217 		if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8218 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8219 			if (addr_lo) {
8220 				dev_err(hdev->dev,
8221 					"PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8222 					eng_name_str, addr_lo);
8223 				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8224 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8225 				razwi_happened = true;
8226 			}
8227 		}
8228 
8229 		if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8230 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8231 			if (addr_lo) {
8232 				dev_err(hdev->dev,
8233 						"PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8234 						eng_name_str, addr_lo);
8235 				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8236 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8237 				razwi_happened = true;
8238 			}
8239 		}
8240 		/* In common case the loop will break, when there is only one engine id, or
8241 		 * several engines with the same router. The exceptional case is with psoc razwi
8242 		 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8243 		 * interfaces of sft router). In this case, maybe the first router won't hold info
8244 		 * and we will need to iterate on the other router.
8245 		 */
8246 		if (razwi_happened)
8247 			break;
8248 	}
8249 
8250 	return razwi_happened;
8251 }
8252 
8253 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev,u64 * event_mask)8254 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8255 {
8256 	u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8257 
8258 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8259 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8260 		if (!razwi_intr)
8261 			return 0;
8262 	}
8263 
8264 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8265 
8266 	dev_err_ratelimited(hdev->dev,
8267 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8268 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8269 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8270 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8271 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8272 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8273 
8274 	if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8275 		error_count++;
8276 	else
8277 		dev_err_ratelimited(hdev->dev,
8278 				"PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8279 				razwi_mask_info);
8280 
8281 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8282 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8283 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8284 
8285 	return error_count;
8286 }
8287 
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base,u16 event_type)8288 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8289 {
8290 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8291 
8292 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8293 
8294 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8295 		if (sts_val & BIT(i)) {
8296 			gaudi2_print_event(hdev, event_type, true,
8297 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
8298 			sts_clr_val |= BIT(i);
8299 			error_count++;
8300 		}
8301 	}
8302 
8303 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8304 
8305 	return error_count;
8306 }
8307 
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,bool extended_err_check,u64 * event_mask)8308 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8309 					bool extended_err_check, u64 *event_mask)
8310 {
8311 	enum razwi_event_sources module;
8312 	u32 error_count = 0;
8313 	u64 qman_base;
8314 	u8 index;
8315 
8316 	switch (event_type) {
8317 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8318 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8319 		qman_base = mmDCORE0_TPC0_QM_BASE +
8320 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8321 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8322 		module = RAZWI_TPC;
8323 		break;
8324 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8325 		qman_base = mmDCORE0_TPC6_QM_BASE;
8326 		module = RAZWI_TPC;
8327 		break;
8328 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8329 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8330 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8331 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8332 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8333 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8334 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8335 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8336 		module = RAZWI_MME;
8337 		break;
8338 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8339 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8340 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8341 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8342 		module = RAZWI_PDMA;
8343 		break;
8344 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8345 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8346 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8347 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8348 		module = RAZWI_ROT;
8349 		break;
8350 	default:
8351 		return 0;
8352 	}
8353 
8354 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8355 
8356 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
8357 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8358 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8359 		error_count += _gaudi2_handle_qm_sei_err(hdev,
8360 					qman_base + NIC_QM_OFFSET, event_type);
8361 
8362 	if (extended_err_check) {
8363 		/* check if RAZWI happened */
8364 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8365 		hl_check_for_glbl_errors(hdev);
8366 	}
8367 
8368 	return error_count;
8369 }
8370 
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8371 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8372 {
8373 	u32 qid_base, error_count = 0;
8374 	u64 qman_base;
8375 	u8 index = 0;
8376 
8377 	switch (event_type) {
8378 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8379 		index = event_type - GAUDI2_EVENT_TPC0_QM;
8380 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8381 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8382 		break;
8383 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8384 		index = event_type - GAUDI2_EVENT_TPC6_QM;
8385 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8386 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8387 		break;
8388 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8389 		index = event_type - GAUDI2_EVENT_TPC12_QM;
8390 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8391 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8392 		break;
8393 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8394 		index = event_type - GAUDI2_EVENT_TPC18_QM;
8395 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8396 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8397 		break;
8398 	case GAUDI2_EVENT_TPC24_QM:
8399 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8400 		qman_base = mmDCORE0_TPC6_QM_BASE;
8401 		break;
8402 	case GAUDI2_EVENT_MME0_QM:
8403 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8404 		qman_base = mmDCORE0_MME_QM_BASE;
8405 		break;
8406 	case GAUDI2_EVENT_MME1_QM:
8407 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8408 		qman_base = mmDCORE1_MME_QM_BASE;
8409 		break;
8410 	case GAUDI2_EVENT_MME2_QM:
8411 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8412 		qman_base = mmDCORE2_MME_QM_BASE;
8413 		break;
8414 	case GAUDI2_EVENT_MME3_QM:
8415 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8416 		qman_base = mmDCORE3_MME_QM_BASE;
8417 		break;
8418 	case GAUDI2_EVENT_HDMA0_QM:
8419 		index = 0;
8420 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8421 		qman_base = mmDCORE0_EDMA0_QM_BASE;
8422 		break;
8423 	case GAUDI2_EVENT_HDMA1_QM:
8424 		index = 1;
8425 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8426 		qman_base = mmDCORE0_EDMA1_QM_BASE;
8427 		break;
8428 	case GAUDI2_EVENT_HDMA2_QM:
8429 		index = 2;
8430 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8431 		qman_base = mmDCORE1_EDMA0_QM_BASE;
8432 		break;
8433 	case GAUDI2_EVENT_HDMA3_QM:
8434 		index = 3;
8435 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8436 		qman_base = mmDCORE1_EDMA1_QM_BASE;
8437 		break;
8438 	case GAUDI2_EVENT_HDMA4_QM:
8439 		index = 4;
8440 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8441 		qman_base = mmDCORE2_EDMA0_QM_BASE;
8442 		break;
8443 	case GAUDI2_EVENT_HDMA5_QM:
8444 		index = 5;
8445 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8446 		qman_base = mmDCORE2_EDMA1_QM_BASE;
8447 		break;
8448 	case GAUDI2_EVENT_HDMA6_QM:
8449 		index = 6;
8450 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8451 		qman_base = mmDCORE3_EDMA0_QM_BASE;
8452 		break;
8453 	case GAUDI2_EVENT_HDMA7_QM:
8454 		index = 7;
8455 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8456 		qman_base = mmDCORE3_EDMA1_QM_BASE;
8457 		break;
8458 	case GAUDI2_EVENT_PDMA0_QM:
8459 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8460 		qman_base = mmPDMA0_QM_BASE;
8461 		break;
8462 	case GAUDI2_EVENT_PDMA1_QM:
8463 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8464 		qman_base = mmPDMA1_QM_BASE;
8465 		break;
8466 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8467 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8468 		qman_base = mmROT0_QM_BASE;
8469 		break;
8470 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8471 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8472 		qman_base = mmROT1_QM_BASE;
8473 		break;
8474 	default:
8475 		return 0;
8476 	}
8477 
8478 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8479 
8480 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8481 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8482 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8483 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8484 	}
8485 
8486 	hl_check_for_glbl_errors(hdev);
8487 
8488 	return error_count;
8489 }
8490 
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev,u16 event_type)8491 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8492 {
8493 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8494 
8495 	for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8496 		sts_clr_val = 0;
8497 		sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8498 				(arc_farm * ARC_FARM_OFFSET));
8499 
8500 		for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8501 			if (sts_val & BIT(i)) {
8502 				gaudi2_print_event(hdev, event_type, true,
8503 						"ARC FARM ARC %u err cause: %s",
8504 						arc_farm, gaudi2_arc_sei_error_cause[i]);
8505 				sts_clr_val |= BIT(i);
8506 				error_count++;
8507 			}
8508 		}
8509 		WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8510 				sts_clr_val);
8511 	}
8512 
8513 	hl_check_for_glbl_errors(hdev);
8514 
8515 	return error_count;
8516 }
8517 
gaudi2_handle_cpu_sei_err(struct hl_device * hdev,u16 event_type)8518 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8519 {
8520 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8521 
8522 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8523 
8524 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8525 		if (sts_val & BIT(i)) {
8526 			gaudi2_print_event(hdev, event_type, true,
8527 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8528 			sts_clr_val |= BIT(i);
8529 			error_count++;
8530 		}
8531 	}
8532 
8533 	hl_check_for_glbl_errors(hdev);
8534 
8535 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8536 
8537 	return error_count;
8538 }
8539 
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8540 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8541 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8542 					u64 *event_mask)
8543 {
8544 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8545 	u32 error_count = 0;
8546 	int i;
8547 
8548 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8549 		if (intr_cause_data & BIT(i)) {
8550 			gaudi2_print_event(hdev, event_type, true,
8551 				"err cause: %s", guadi2_rot_error_cause[i]);
8552 			error_count++;
8553 		}
8554 
8555 	/* check if RAZWI happened */
8556 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8557 	hl_check_for_glbl_errors(hdev);
8558 
8559 	return error_count;
8560 }
8561 
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8562 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8563 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8564 					u64 *event_mask)
8565 {
8566 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8567 	u32 error_count = 0;
8568 	int i;
8569 
8570 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8571 		if (intr_cause_data & BIT(i)) {
8572 			gaudi2_print_event(hdev, event_type, true,
8573 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8574 			error_count++;
8575 		}
8576 
8577 	/* check if RAZWI happened */
8578 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8579 	hl_check_for_glbl_errors(hdev);
8580 
8581 	return error_count;
8582 }
8583 
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,u16 event_type,u64 * event_mask)8584 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8585 					u64 *event_mask)
8586 {
8587 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8588 	int i;
8589 
8590 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8591 		/* DCORE DEC */
8592 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8593 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8594 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8595 	else
8596 		/* PCIE DEC */
8597 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8598 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8599 
8600 	sts_val = RREG32(sts_addr);
8601 
8602 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8603 		if (sts_val & BIT(i)) {
8604 			gaudi2_print_event(hdev, event_type, true,
8605 				"err cause: %s", gaudi2_dec_error_cause[i]);
8606 			sts_clr_val |= BIT(i);
8607 			error_count++;
8608 		}
8609 	}
8610 
8611 	/* check if RAZWI happened */
8612 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8613 	hl_check_for_glbl_errors(hdev);
8614 
8615 	/* Write 1 clear errors */
8616 	WREG32(sts_addr, sts_clr_val);
8617 
8618 	return error_count;
8619 }
8620 
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8621 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8622 					u64 *event_mask)
8623 {
8624 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8625 	int i;
8626 
8627 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8628 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8629 
8630 	sts_val = RREG32(sts_addr);
8631 
8632 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8633 		if (sts_val & BIT(i)) {
8634 			gaudi2_print_event(hdev, event_type, true,
8635 				"err cause: %s", guadi2_mme_error_cause[i]);
8636 			sts_clr_val |= BIT(i);
8637 			error_count++;
8638 		}
8639 	}
8640 
8641 	/* check if RAZWI happened */
8642 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8643 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8644 
8645 	hl_check_for_glbl_errors(hdev);
8646 
8647 	WREG32(sts_clr_addr, sts_clr_val);
8648 
8649 	return error_count;
8650 }
8651 
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8652 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8653 					u64 intr_cause_data)
8654 {
8655 	int i, error_count = 0;
8656 
8657 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8658 		if (intr_cause_data & BIT(i)) {
8659 			gaudi2_print_event(hdev, event_type, true,
8660 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8661 			error_count++;
8662 		}
8663 
8664 	hl_check_for_glbl_errors(hdev);
8665 
8666 	return error_count;
8667 }
8668 
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8669 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8670 					u64 *event_mask)
8671 {
8672 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8673 	int i;
8674 
8675 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8676 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8677 
8678 	sts_val = RREG32(sts_addr);
8679 
8680 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8681 		if (sts_val & BIT(i)) {
8682 			gaudi2_print_event(hdev, event_type, true,
8683 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8684 			sts_clr_val |= BIT(i);
8685 			error_count++;
8686 		}
8687 	}
8688 
8689 	/* check if RAZWI happened on WAP0/1 */
8690 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8691 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8692 	hl_check_for_glbl_errors(hdev);
8693 
8694 	WREG32(sts_clr_addr, sts_clr_val);
8695 
8696 	return error_count;
8697 }
8698 
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8699 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8700 					u64 intr_cause_data)
8701 {
8702 	u32 error_count = 0;
8703 	int i;
8704 
8705 	/* If an AXI read or write error is received, an error is reported and
8706 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8707 	 * register of the KDMA engine, the reported error is always HBW even if
8708 	 * the actual error caused by a LBW KDMA transaction.
8709 	 */
8710 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8711 		if (intr_cause_data & BIT(i)) {
8712 			gaudi2_print_event(hdev, event_type, true,
8713 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8714 			error_count++;
8715 		}
8716 
8717 	hl_check_for_glbl_errors(hdev);
8718 
8719 	return error_count;
8720 }
8721 
gaudi2_handle_dma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause)8722 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8723 {
8724 	u32 error_count = 0;
8725 	int i;
8726 
8727 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8728 		if (intr_cause & BIT(i)) {
8729 			gaudi2_print_event(hdev, event_type, true,
8730 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8731 			error_count++;
8732 		}
8733 
8734 	hl_check_for_glbl_errors(hdev);
8735 
8736 	return error_count;
8737 }
8738 
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev,u64 * event_mask)8739 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8740 {
8741 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8742 
8743 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8744 	if (RREG32(razwi_happened_addr)) {
8745 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8746 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8747 		WREG32(razwi_happened_addr, 0x1);
8748 	}
8749 
8750 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8751 	if (RREG32(razwi_happened_addr)) {
8752 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8753 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8754 		WREG32(razwi_happened_addr, 0x1);
8755 	}
8756 
8757 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8758 	if (RREG32(razwi_happened_addr)) {
8759 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8760 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8761 		WREG32(razwi_happened_addr, 0x1);
8762 	}
8763 
8764 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8765 	if (RREG32(razwi_happened_addr)) {
8766 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8767 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8768 		WREG32(razwi_happened_addr, 0x1);
8769 	}
8770 }
8771 
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u16 event_type,u64 intr_cause_data,u64 * event_mask)8772 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8773 					u64 intr_cause_data, u64 *event_mask)
8774 {
8775 	u32 error_count = 0;
8776 	int i;
8777 
8778 	gaudi2_print_event(hdev, event_type, true,
8779 		"intr_cause_data: %#llx", intr_cause_data);
8780 
8781 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8782 		if (!(intr_cause_data & BIT_ULL(i)))
8783 			continue;
8784 
8785 		gaudi2_print_event(hdev, event_type, true,
8786 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8787 		error_count++;
8788 
8789 		/*
8790 		 * Always check for LBW and HBW additional info as the indication itself is
8791 		 * sometimes missing
8792 		 */
8793 	}
8794 
8795 	hl_check_for_glbl_errors(hdev);
8796 	gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8797 
8798 	return error_count;
8799 }
8800 
gaudi2_handle_pif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8801 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8802 				u64 intr_cause_data)
8803 
8804 {
8805 	u32 error_count = 0;
8806 	int i;
8807 
8808 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8809 		if (intr_cause_data & BIT_ULL(i)) {
8810 			gaudi2_print_event(hdev, event_type, true,
8811 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8812 			error_count++;
8813 		}
8814 	}
8815 
8816 	return error_count;
8817 }
8818 
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8819 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8820 {
8821 	u32 error_count = 0;
8822 	int i;
8823 
8824 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8825 		if (intr_cause_data & BIT_ULL(i)) {
8826 			gaudi2_print_event(hdev, event_type, true,
8827 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8828 			error_count++;
8829 		}
8830 	}
8831 
8832 	return error_count;
8833 }
8834 
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu,u64 * event_mask)8835 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8836 					u64 *event_mask)
8837 {
8838 	u32 valid, val;
8839 	u64 addr;
8840 
8841 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8842 
8843 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8844 		return;
8845 
8846 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8847 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8848 	addr <<= 32;
8849 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8850 
8851 	if (is_pmmu) {
8852 		dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
8853 	} else {
8854 
8855 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8856 		addr &= HW_UNSCRAMBLED_BITS_MASK;
8857 		dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
8858 				addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
8859 	}
8860 
8861 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8862 
8863 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8864 }
8865 
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8866 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8867 {
8868 	u32 valid, val;
8869 	u64 addr;
8870 
8871 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8872 
8873 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8874 		return;
8875 
8876 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8877 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8878 	addr <<= 32;
8879 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8880 
8881 	if (!is_pmmu)
8882 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8883 
8884 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8885 				is_pmmu ? "PMMU" : "HMMU", addr);
8886 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8887 }
8888 
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,u16 event_type,u64 mmu_base,bool is_pmmu,u64 * event_mask)8889 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8890 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8891 {
8892 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8893 	int i;
8894 
8895 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8896 
8897 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8898 		if (spi_sei_cause & BIT(i)) {
8899 			gaudi2_print_event(hdev, event_type, true,
8900 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8901 
8902 			if (i == 0)
8903 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8904 			else if (i == 1)
8905 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8906 
8907 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8908 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8909 
8910 			error_count++;
8911 		}
8912 	}
8913 
8914 	/* Clear cause */
8915 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8916 
8917 	/* Clear interrupt */
8918 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8919 
8920 	return error_count;
8921 }
8922 
gaudi2_handle_sm_err(struct hl_device * hdev,u16 event_type,u8 sm_index)8923 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8924 {
8925 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8926 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8927 	int i;
8928 
8929 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8930 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8931 
8932 	sei_cause_val = RREG32(sei_cause_addr);
8933 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8934 	cq_intr_val = RREG32(cq_intr_addr);
8935 
8936 	/* SEI interrupt */
8937 	if (sei_cause_cause) {
8938 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8939 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8940 					sei_cause_val);
8941 
8942 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8943 			if (!(sei_cause_cause & BIT(i)))
8944 				continue;
8945 
8946 			gaudi2_print_event(hdev, event_type, true,
8947 				"err cause: %s. %s: 0x%X",
8948 				gaudi2_sm_sei_cause[i].cause_name,
8949 				gaudi2_sm_sei_cause[i].log_name,
8950 				sei_cause_log);
8951 			error_count++;
8952 			break;
8953 		}
8954 
8955 		/* Clear SM_SEI_CAUSE */
8956 		WREG32(sei_cause_addr, 0);
8957 	}
8958 
8959 	/* CQ interrupt */
8960 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8961 		cq_intr_queue_index =
8962 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8963 					cq_intr_val);
8964 
8965 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8966 				sm_index, cq_intr_queue_index);
8967 		error_count++;
8968 
8969 		/* Clear CQ_INTR */
8970 		WREG32(cq_intr_addr, 0);
8971 	}
8972 
8973 	hl_check_for_glbl_errors(hdev);
8974 
8975 	return error_count;
8976 }
8977 
get_hmmu_base(u16 event_type)8978 static u64 get_hmmu_base(u16 event_type)
8979 {
8980 	u8 dcore, index_in_dcore;
8981 
8982 	switch (event_type) {
8983 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
8984 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
8985 		dcore = 0;
8986 		index_in_dcore = 0;
8987 	break;
8988 	case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
8989 	case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
8990 		dcore = 1;
8991 		index_in_dcore = 0;
8992 	break;
8993 	case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
8994 	case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
8995 		dcore = 0;
8996 		index_in_dcore = 1;
8997 	break;
8998 	case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8999 	case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9000 		dcore = 1;
9001 		index_in_dcore = 1;
9002 	break;
9003 	case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9004 	case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9005 		dcore = 3;
9006 		index_in_dcore = 2;
9007 	break;
9008 	case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9009 	case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9010 		dcore = 2;
9011 		index_in_dcore = 2;
9012 	break;
9013 	case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9014 	case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9015 		dcore = 3;
9016 		index_in_dcore = 3;
9017 	break;
9018 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9019 	case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9020 		dcore = 2;
9021 		index_in_dcore = 3;
9022 	break;
9023 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9024 	case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9025 		dcore = 0;
9026 		index_in_dcore = 2;
9027 	break;
9028 	case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9029 	case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9030 		dcore = 1;
9031 		index_in_dcore = 2;
9032 	break;
9033 	case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9034 	case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9035 		dcore = 0;
9036 		index_in_dcore = 3;
9037 	break;
9038 	case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9039 	case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9040 		dcore = 1;
9041 		index_in_dcore = 3;
9042 	break;
9043 	case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9044 	case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9045 		dcore = 3;
9046 		index_in_dcore = 0;
9047 	break;
9048 	case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9049 	case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9050 		dcore = 2;
9051 		index_in_dcore = 0;
9052 	break;
9053 	case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9054 	case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9055 		dcore = 3;
9056 		index_in_dcore = 1;
9057 	break;
9058 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9059 	case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9060 		dcore = 2;
9061 		index_in_dcore = 1;
9062 	break;
9063 	default:
9064 		return ULONG_MAX;
9065 	}
9066 
9067 	return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9068 }
9069 
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)9070 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9071 {
9072 	bool is_pmmu = false;
9073 	u32 error_count = 0;
9074 	u64 mmu_base;
9075 
9076 	switch (event_type) {
9077 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9078 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9079 		mmu_base = get_hmmu_base(event_type);
9080 		break;
9081 
9082 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9083 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9084 		is_pmmu = true;
9085 		mmu_base = mmPMMU_HBW_MMU_BASE;
9086 		break;
9087 	default:
9088 		return 0;
9089 	}
9090 
9091 	if (mmu_base == ULONG_MAX)
9092 		return 0;
9093 
9094 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9095 							is_pmmu, event_mask);
9096 	hl_check_for_glbl_errors(hdev);
9097 
9098 	return error_count;
9099 }
9100 
9101 
9102 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)9103 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9104 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9105 {
9106 	u32 addr, beat, beat_shift;
9107 	bool rc = false;
9108 
9109 	dev_err_ratelimited(hdev->dev,
9110 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9111 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9112 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9113 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9114 
9115 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9116 	dev_err_ratelimited(hdev->dev,
9117 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9118 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9119 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9120 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9121 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9122 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9123 
9124 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
9125 	for (beat = 0 ; beat < 4 ; beat++) {
9126 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9127 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9128 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9129 						beat,
9130 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9131 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9132 
9133 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9134 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9135 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9136 						beat,
9137 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9138 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9139 			rc |= true;
9140 		}
9141 
9142 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9143 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9144 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9145 			dev_err_ratelimited(hdev->dev,
9146 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9147 					beat,
9148 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9149 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9150 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9151 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9152 			rc |= true;
9153 		}
9154 
9155 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9156 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9157 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9158 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9159 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9160 	}
9161 
9162 	return rc;
9163 }
9164 
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)9165 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9166 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9167 {
9168 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9169 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9170 
9171 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9172 
9173 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9174 				derr & 0x3, derr & 0xc);
9175 
9176 	/* JIRA H6-3286 - the following prints may not be valid */
9177 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9178 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9179 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9180 		dev_err_ratelimited(hdev->dev,
9181 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9182 				i,
9183 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9184 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9185 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9186 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9187 	}
9188 }
9189 
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)9190 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9191 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9192 {
9193 	__le32 *col_cmd = ca_par_err_data->dbg_col;
9194 	__le16 *row_cmd = ca_par_err_data->dbg_row;
9195 	u32 i;
9196 
9197 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9198 
9199 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9200 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9201 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9202 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9203 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9204 }
9205 
9206 /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)9207 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9208 					struct hl_eq_hbm_sei_data *sei_data)
9209 {
9210 	bool require_hard_reset = false;
9211 	u32 hbm_id, mc_id, cause_idx;
9212 
9213 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9214 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9215 
9216 	cause_idx = sei_data->hdr.sei_cause;
9217 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9218 		gaudi2_print_event(hdev, event_type, true,
9219 			"err cause: %s",
9220 			"Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9221 		return true;
9222 	}
9223 
9224 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9225 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9226 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9227 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9228 		hbm_mc_sei_cause[cause_idx]);
9229 
9230 	/* Print error-specific info */
9231 	switch (cause_idx) {
9232 	case HBM_SEI_CATTRIP:
9233 		require_hard_reset = true;
9234 		break;
9235 
9236 	case  HBM_SEI_CMD_PARITY_EVEN:
9237 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9238 						le32_to_cpu(sei_data->hdr.cnt));
9239 		require_hard_reset = true;
9240 		break;
9241 
9242 	case  HBM_SEI_CMD_PARITY_ODD:
9243 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9244 						le32_to_cpu(sei_data->hdr.cnt));
9245 		require_hard_reset = true;
9246 		break;
9247 
9248 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
9249 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9250 						le32_to_cpu(sei_data->hdr.cnt));
9251 		require_hard_reset = true;
9252 		break;
9253 
9254 	case HBM_SEI_READ_ERR:
9255 		/* Unlike other SEI events, read error requires further processing of the
9256 		 * raw data in order to determine the root cause.
9257 		 */
9258 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9259 								&sei_data->read_err_info,
9260 								le32_to_cpu(sei_data->hdr.cnt));
9261 		break;
9262 
9263 	default:
9264 		break;
9265 	}
9266 
9267 	require_hard_reset |= !!sei_data->hdr.is_critical;
9268 
9269 	return require_hard_reset;
9270 }
9271 
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)9272 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9273 				u64 intr_cause_data)
9274 {
9275 	if (intr_cause_data) {
9276 		gaudi2_print_event(hdev, event_type, true,
9277 			"temperature error cause: %#llx", intr_cause_data);
9278 		return 1;
9279 	}
9280 
9281 	return 0;
9282 }
9283 
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)9284 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9285 {
9286 	u32 i, error_count = 0;
9287 
9288 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9289 		if (intr_cause_data & hbm_mc_spi[i].mask) {
9290 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9291 				hbm_mc_spi[i].cause);
9292 			error_count++;
9293 		}
9294 
9295 	return error_count;
9296 }
9297 
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)9298 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9299 {
9300 	ktime_t zero_time = ktime_set(0, 0);
9301 
9302 	mutex_lock(&hdev->clk_throttling.lock);
9303 
9304 	switch (event_type) {
9305 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9306 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9307 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9308 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9309 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9310 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9311 		break;
9312 
9313 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9314 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9315 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9316 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9317 		break;
9318 
9319 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9320 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9321 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9322 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9323 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9324 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9325 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9326 		break;
9327 
9328 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9329 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9330 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9331 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9332 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9333 		break;
9334 
9335 	default:
9336 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9337 		break;
9338 	}
9339 
9340 	mutex_unlock(&hdev->clk_throttling.lock);
9341 }
9342 
gaudi2_print_out_of_sync_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9343 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9344 					struct cpucp_pkt_sync_err *sync_err)
9345 {
9346 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9347 
9348 	gaudi2_print_event(hdev, event_type, false,
9349 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9350 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9351 		q->pi, atomic_read(&q->ci));
9352 }
9353 
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev,u16 event_type)9354 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9355 {
9356 	u32 p2p_intr, msix_gw_intr, error_count = 0;
9357 
9358 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9359 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9360 
9361 	if (p2p_intr) {
9362 		gaudi2_print_event(hdev, event_type, true,
9363 			"pcie p2p transaction terminated due to security, req_id(0x%x)",
9364 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9365 
9366 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9367 		error_count++;
9368 	}
9369 
9370 	if (msix_gw_intr) {
9371 		gaudi2_print_event(hdev, event_type, true,
9372 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9373 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9374 
9375 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9376 		error_count++;
9377 	}
9378 
9379 	return error_count;
9380 }
9381 
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)9382 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9383 			struct hl_eq_pcie_drain_ind_data *drain_data)
9384 {
9385 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9386 
9387 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9388 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9389 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9390 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9391 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9392 
9393 	if (cause & BIT_ULL(0)) {
9394 		dev_err_ratelimited(hdev->dev,
9395 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9396 			!!lbw_rd, !!lbw_wr);
9397 		error_count++;
9398 	}
9399 
9400 	if (cause & BIT_ULL(1)) {
9401 		dev_err_ratelimited(hdev->dev,
9402 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9403 			hbw_rd, hbw_wr);
9404 		error_count++;
9405 	}
9406 
9407 	return error_count;
9408 }
9409 
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)9410 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9411 {
9412 	u32 error_count = 0;
9413 	int i;
9414 
9415 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9416 		if (intr_cause_data & BIT_ULL(i)) {
9417 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9418 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
9419 			error_count++;
9420 		}
9421 	}
9422 
9423 	hl_check_for_glbl_errors(hdev);
9424 
9425 	return error_count;
9426 }
9427 
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9428 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9429 					struct cpucp_pkt_sync_err *sync_err)
9430 {
9431 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9432 
9433 	gaudi2_print_event(hdev, event_type, false,
9434 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9435 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9436 }
9437 
hl_arc_event_handle(struct hl_device * hdev,u16 event_type,struct hl_eq_engine_arc_intr_data * data)9438 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9439 					struct hl_eq_engine_arc_intr_data *data)
9440 {
9441 	struct hl_engine_arc_dccm_queue_full_irq *q;
9442 	u32 intr_type, engine_id;
9443 	u64 payload;
9444 
9445 	intr_type = le32_to_cpu(data->intr_type);
9446 	engine_id = le32_to_cpu(data->engine_id);
9447 	payload = le64_to_cpu(data->payload);
9448 
9449 	switch (intr_type) {
9450 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9451 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9452 
9453 		gaudi2_print_event(hdev, event_type, true,
9454 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9455 				engine_id, intr_type, q->queue_index);
9456 		return 1;
9457 	default:
9458 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9459 		return 0;
9460 	}
9461 }
9462 
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)9463 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9464 {
9465 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9466 	bool reset_required = false, is_critical = false;
9467 	u32 index, ctl, reset_flags = 0, error_count = 0;
9468 	u64 event_mask = 0;
9469 	u16 event_type;
9470 
9471 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
9472 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9473 
9474 	if (event_type >= GAUDI2_EVENT_SIZE) {
9475 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9476 				event_type, GAUDI2_EVENT_SIZE - 1);
9477 		return;
9478 	}
9479 
9480 	gaudi2->events_stat[event_type]++;
9481 	gaudi2->events_stat_aggregate[event_type]++;
9482 
9483 	switch (event_type) {
9484 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9485 		fallthrough;
9486 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9487 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9488 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9489 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9490 		is_critical = eq_entry->ecc_data.is_critical;
9491 		error_count++;
9492 		break;
9493 
9494 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9495 		fallthrough;
9496 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9497 		fallthrough;
9498 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9499 		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9500 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9501 		break;
9502 
9503 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9504 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9505 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9506 		break;
9507 
9508 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9509 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9510 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9511 		event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9512 		break;
9513 
9514 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9515 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9516 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9517 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9518 		break;
9519 
9520 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9521 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9522 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9523 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9524 					&eq_entry->razwi_with_intr_cause, &event_mask);
9525 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9526 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9527 		break;
9528 
9529 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9530 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9531 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9532 						&eq_entry->razwi_with_intr_cause, &event_mask);
9533 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9534 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9535 		break;
9536 
9537 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9538 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9539 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9540 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9541 		break;
9542 
9543 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9544 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9545 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9546 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9547 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9548 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9549 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9550 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9551 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9552 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9553 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9554 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9555 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9556 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9557 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9558 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9559 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9560 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9561 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9562 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9563 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9564 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9565 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9566 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9567 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9568 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9569 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9570 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9571 					&eq_entry->razwi_with_intr_cause, &event_mask);
9572 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9573 		break;
9574 
9575 	case GAUDI2_EVENT_DEC0_SPI:
9576 	case GAUDI2_EVENT_DEC1_SPI:
9577 	case GAUDI2_EVENT_DEC2_SPI:
9578 	case GAUDI2_EVENT_DEC3_SPI:
9579 	case GAUDI2_EVENT_DEC4_SPI:
9580 	case GAUDI2_EVENT_DEC5_SPI:
9581 	case GAUDI2_EVENT_DEC6_SPI:
9582 	case GAUDI2_EVENT_DEC7_SPI:
9583 	case GAUDI2_EVENT_DEC8_SPI:
9584 	case GAUDI2_EVENT_DEC9_SPI:
9585 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9586 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9587 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9588 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9589 		break;
9590 
9591 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9592 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9593 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9594 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9595 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9596 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9597 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9598 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9599 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9600 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9601 		break;
9602 
9603 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9604 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9605 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9606 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9607 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9608 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9609 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9610 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9611 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9612 		break;
9613 
9614 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9615 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9616 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9617 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9618 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9619 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9620 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9621 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9622 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9623 		break;
9624 
9625 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9626 	case GAUDI2_EVENT_KDMA0_CORE:
9627 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9628 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9629 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9630 		break;
9631 
9632 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9633 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9634 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9635 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9636 		break;
9637 
9638 	case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9639 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9640 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9641 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9642 		break;
9643 
9644 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9645 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9646 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9647 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9648 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9649 		break;
9650 
9651 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9652 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9653 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9654 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9655 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9656 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9657 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9658 		break;
9659 
9660 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9661 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9662 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9663 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9664 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9665 		break;
9666 
9667 	case GAUDI2_EVENT_PMMU_FATAL_0:
9668 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9669 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9670 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9671 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9672 		break;
9673 
9674 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9675 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9676 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9677 		break;
9678 
9679 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9680 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9681 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9682 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9683 			reset_required = true;
9684 		}
9685 		error_count++;
9686 		break;
9687 
9688 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9689 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9690 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9691 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9692 		break;
9693 
9694 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9695 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9696 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9697 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9698 		break;
9699 
9700 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9701 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9702 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9703 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9704 		break;
9705 
9706 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9707 		error_count = gaudi2_handle_psoc_drain(hdev,
9708 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9709 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9710 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9711 		break;
9712 
9713 	case GAUDI2_EVENT_CPU_AXI_ECC:
9714 		error_count = GAUDI2_NA_EVENT_CAUSE;
9715 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9716 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9717 		break;
9718 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9719 		error_count = GAUDI2_NA_EVENT_CAUSE;
9720 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9721 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9722 		break;
9723 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9724 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9725 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9726 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9727 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9728 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9729 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9730 		break;
9731 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9732 		error_count = GAUDI2_NA_EVENT_CAUSE;
9733 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9734 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9735 		break;
9736 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9737 		error_count = GAUDI2_NA_EVENT_CAUSE;
9738 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9739 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9740 		break;
9741 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9742 		error_count = GAUDI2_NA_EVENT_CAUSE;
9743 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9744 		break;
9745 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9746 		error_count = GAUDI2_NA_EVENT_CAUSE;
9747 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9748 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9749 		break;
9750 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9751 		error_count = GAUDI2_NA_EVENT_CAUSE;
9752 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9753 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9754 		break;
9755 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9756 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9757 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9758 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9759 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9760 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9761 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9762 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9763 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9764 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9765 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9766 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9767 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9768 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9769 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9770 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9771 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9772 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9773 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9774 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9775 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9776 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9777 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9778 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9779 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9780 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9781 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9782 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9783 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9784 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9785 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9786 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9787 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9788 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9789 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9790 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9791 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9792 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9793 		fallthrough;
9794 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9795 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9796 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9797 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9798 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9799 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9800 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9801 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9802 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9803 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9804 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9805 		error_count = GAUDI2_NA_EVENT_CAUSE;
9806 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9807 		break;
9808 
9809 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9810 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9811 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9812 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9813 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9814 		error_count = GAUDI2_NA_EVENT_CAUSE;
9815 		break;
9816 
9817 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9818 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9819 		error_count = GAUDI2_NA_EVENT_CAUSE;
9820 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9821 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9822 		break;
9823 
9824 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9825 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9826 		error_count = GAUDI2_NA_EVENT_CAUSE;
9827 		/* Do nothing- FW will handle it */
9828 		break;
9829 
9830 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9831 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9832 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9833 		break;
9834 
9835 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9836 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9837 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9838 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9839 		break;
9840 
9841 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9842 		error_count = GAUDI2_NA_EVENT_CAUSE;
9843 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9844 		break;
9845 
9846 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9847 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9848 						le64_to_cpu(eq_entry->data[0]));
9849 		error_count = GAUDI2_NA_EVENT_CAUSE;
9850 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9851 		break;
9852 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9853 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9854 						le64_to_cpu(eq_entry->data[0]));
9855 		error_count = GAUDI2_NA_EVENT_CAUSE;
9856 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9857 		break;
9858 
9859 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9860 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9861 		error_count = GAUDI2_NA_EVENT_CAUSE;
9862 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9863 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9864 		break;
9865 
9866 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9867 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9868 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9869 		break;
9870 
9871 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9872 	case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9873 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9874 		error_count = GAUDI2_NA_EVENT_CAUSE;
9875 		is_critical = true;
9876 		break;
9877 
9878 	default:
9879 		if (gaudi2_irq_map_table[event_type].valid) {
9880 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9881 						event_type);
9882 			error_count = GAUDI2_NA_EVENT_CAUSE;
9883 		}
9884 	}
9885 
9886 	/* Make sure to dump an error in case no error cause was printed so far.
9887 	 * Note that although we have counted the errors, we use this number as
9888 	 * a boolean.
9889 	 */
9890 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9891 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9892 	else if (error_count == 0)
9893 		gaudi2_print_event(hdev, event_type, true,
9894 				"No error cause for H/W event %u", event_type);
9895 
9896 	if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9897 				reset_required) {
9898 		if (reset_required ||
9899 				(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
9900 			reset_flags |= HL_DRV_RESET_HARD;
9901 
9902 		if (hdev->hard_reset_on_fw_events ||
9903 				(hdev->asic_prop.fw_security_enabled && is_critical))
9904 			goto reset_device;
9905 	}
9906 
9907 	/* Send unmask irq only for interrupts not classified as MSG */
9908 	if (!gaudi2_irq_map_table[event_type].msg)
9909 		hl_fw_unmask_irq(hdev, event_type);
9910 
9911 	if (event_mask)
9912 		hl_notifier_event_send_all(hdev, event_mask);
9913 
9914 	return;
9915 
9916 reset_device:
9917 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9918 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9919 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9920 	} else {
9921 		reset_flags |= HL_DRV_RESET_DELAY;
9922 	}
9923 	/* escalate general hw errors to critical/fatal error */
9924 	if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
9925 		hl_handle_critical_hw_err(hdev, event_type, &event_mask);
9926 
9927 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9928 	hl_device_cond_reset(hdev, reset_flags, event_mask);
9929 }
9930 
gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device * hdev,struct packet_lin_dma * lin_dma_pkt,dma_addr_t pkt_dma_addr,u32 hw_queue_id,u32 size,u64 addr,u32 val)9931 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9932 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9933 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9934 {
9935 	u32 ctl, pkt_size;
9936 	int rc = 0;
9937 
9938 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9939 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9940 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9941 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9942 
9943 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9944 	lin_dma_pkt->src_addr = cpu_to_le64(val);
9945 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9946 	lin_dma_pkt->tsize = cpu_to_le32(size);
9947 
9948 	pkt_size = sizeof(struct packet_lin_dma);
9949 
9950 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9951 	if (rc)
9952 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9953 				hw_queue_id);
9954 
9955 	return rc;
9956 }
9957 
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)9958 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9959 {
9960 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9961 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9962 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9963 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9964 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9965 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9966 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9967 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9968 	void *lin_dma_pkts_arr;
9969 	dma_addr_t pkt_dma_addr;
9970 	int rc = 0, dma_num = 0;
9971 
9972 	if (prop->edma_enabled_mask == 0) {
9973 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9974 		return -EIO;
9975 	}
9976 
9977 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9978 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9979 	comp_addr = CFG_BASE + sob_addr;
9980 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9981 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9982 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9983 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9984 
9985 	/* Calculate how many lin dma pkts we'll need */
9986 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9987 	pkt_size = sizeof(struct packet_lin_dma);
9988 
9989 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9990 					&pkt_dma_addr, GFP_KERNEL);
9991 	if (!lin_dma_pkts_arr)
9992 		return -ENOMEM;
9993 
9994 	/*
9995 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9996 	 * only the first one to restore later
9997 	 * also set the sob addr for all edma cores for completion.
9998 	 * set QM as trusted to allow it to access physical address with MMU bp.
9999 	 */
10000 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10001 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10002 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10003 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10004 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10005 
10006 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10007 				continue;
10008 
10009 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10010 					edma_offset, mmubp);
10011 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10012 					lower_32_bits(comp_addr));
10013 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10014 					upper_32_bits(comp_addr));
10015 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10016 					comp_val);
10017 			gaudi2_qman_set_test_mode(hdev,
10018 					edma_queues_id[dcore] + 4 * edma_idx, true);
10019 		}
10020 	}
10021 
10022 	WREG32(sob_addr, 0);
10023 
10024 	while (cur_addr < end_addr) {
10025 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10026 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10027 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10028 
10029 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10030 					continue;
10031 
10032 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10033 
10034 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10035 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10036 					pkt_dma_addr + dma_num * pkt_size,
10037 					edma_queues_id[dcore] + edma_idx * 4,
10038 					chunk_size, cur_addr, val);
10039 				if (rc)
10040 					goto end;
10041 
10042 				dma_num++;
10043 				cur_addr += chunk_size;
10044 				if (cur_addr == end_addr)
10045 					break;
10046 			}
10047 		}
10048 	}
10049 
10050 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10051 	if (rc) {
10052 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10053 		goto end;
10054 	}
10055 end:
10056 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10057 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10058 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10059 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10060 
10061 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10062 				continue;
10063 
10064 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10065 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10066 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10067 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10068 			gaudi2_qman_set_test_mode(hdev,
10069 					edma_queues_id[dcore] + 4 * edma_idx, false);
10070 		}
10071 	}
10072 
10073 	WREG32(sob_addr, 0);
10074 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10075 
10076 	return rc;
10077 }
10078 
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)10079 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10080 {
10081 	int rc;
10082 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10083 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
10084 
10085 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10086 
10087 	if (rc)
10088 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10089 				prop->dram_user_base_address, size);
10090 	return rc;
10091 }
10092 
gaudi2_scrub_device_mem(struct hl_device * hdev)10093 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10094 {
10095 	int rc;
10096 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10097 	u64 val = hdev->memory_scrub_val;
10098 	u64 addr, size;
10099 
10100 	if (!hdev->memory_scrub)
10101 		return 0;
10102 
10103 	/* scrub SRAM */
10104 	addr = prop->sram_user_base_address;
10105 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10106 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10107 			addr, addr + size, val);
10108 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10109 	if (rc) {
10110 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10111 		return rc;
10112 	}
10113 
10114 	/* scrub DRAM */
10115 	rc = gaudi2_scrub_device_dram(hdev, val);
10116 	if (rc) {
10117 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10118 		return rc;
10119 	}
10120 	return 0;
10121 }
10122 
gaudi2_restore_user_sm_registers(struct hl_device * hdev)10123 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10124 {
10125 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10126 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10127 	u32 val, size, offset;
10128 	int dcore_id;
10129 
10130 	offset = hdev->asic_prop.first_available_cq[0] * 4;
10131 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10132 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10133 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10134 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10135 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10136 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10137 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10138 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10139 
10140 	/* memset dcore0 CQ registers */
10141 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10142 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10143 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10144 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10145 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10146 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10147 
10148 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10149 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10150 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10151 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10152 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10153 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10154 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10155 
10156 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10157 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10158 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10159 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10160 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10161 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10162 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10163 
10164 		cq_lbw_l_addr += DCORE_OFFSET;
10165 		cq_lbw_h_addr += DCORE_OFFSET;
10166 		cq_lbw_data_addr += DCORE_OFFSET;
10167 		cq_base_l_addr += DCORE_OFFSET;
10168 		cq_base_h_addr += DCORE_OFFSET;
10169 		cq_size_addr += DCORE_OFFSET;
10170 	}
10171 
10172 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10173 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10174 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10175 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10176 
10177 	/* memset dcore0 monitors */
10178 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10179 
10180 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10181 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
10182 
10183 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10184 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10185 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10186 
10187 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10188 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10189 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10190 		mon_sts_addr += DCORE_OFFSET;
10191 		mon_cfg_addr += DCORE_OFFSET;
10192 	}
10193 
10194 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10195 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10196 	val = 0;
10197 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10198 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10199 
10200 	/* memset dcore0 sobs */
10201 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10202 
10203 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10204 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10205 
10206 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10207 		gaudi2_memset_device_lbw(hdev, addr, size, val);
10208 		addr += DCORE_OFFSET;
10209 	}
10210 
10211 	/* Flush all WREG to prevent race */
10212 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10213 }
10214 
gaudi2_restore_user_qm_registers(struct hl_device * hdev)10215 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10216 {
10217 	u32 reg_base, hw_queue_id;
10218 
10219 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10220 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10221 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10222 			continue;
10223 
10224 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10225 
10226 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10227 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10228 	}
10229 
10230 	/* Flush all WREG to prevent race */
10231 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10232 }
10233 
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)10234 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10235 {
10236 	u32 reg_base, hw_queue_id;
10237 
10238 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10239 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10240 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10241 			continue;
10242 
10243 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10244 
10245 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10246 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10247 	}
10248 
10249 	/* Flush all WREG to prevent race */
10250 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10251 }
10252 
gaudi2_context_switch(struct hl_device * hdev,u32 asid)10253 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10254 {
10255 	return 0;
10256 }
10257 
gaudi2_restore_phase_topology(struct hl_device * hdev)10258 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10259 {
10260 }
10261 
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)10262 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10263 						struct dup_block_ctx *cfg_ctx)
10264 {
10265 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10266 	u8 seq;
10267 	int i;
10268 
10269 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
10270 		seq = block_idx * cfg_ctx->instances + i;
10271 
10272 		/* skip disabled instance */
10273 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10274 			continue;
10275 
10276 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10277 					cfg_ctx->data);
10278 	}
10279 }
10280 
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)10281 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10282 						u64 mask)
10283 {
10284 	int i;
10285 
10286 	cfg_ctx->enabled_mask = mask;
10287 
10288 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
10289 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
10290 }
10291 
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)10292 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10293 {
10294 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10295 }
10296 
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)10297 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10298 {
10299 	void *host_mem_virtual_addr;
10300 	dma_addr_t host_mem_dma_addr;
10301 	u64 reserved_va_base;
10302 	u32 pos, size_left, size_to_dma;
10303 	struct hl_ctx *ctx;
10304 	int rc = 0;
10305 
10306 	/* Fetch the ctx */
10307 	ctx = hl_get_compute_ctx(hdev);
10308 	if (!ctx) {
10309 		dev_err(hdev->dev, "No ctx available\n");
10310 		return -EINVAL;
10311 	}
10312 
10313 	/* Allocate buffers for read and for poll */
10314 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10315 								GFP_KERNEL | __GFP_ZERO);
10316 	if (host_mem_virtual_addr == NULL) {
10317 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10318 		rc = -ENOMEM;
10319 		goto put_ctx;
10320 	}
10321 
10322 	/* Reserve VM region on asic side */
10323 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10324 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10325 	if (!reserved_va_base) {
10326 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10327 		rc = -ENOMEM;
10328 		goto free_data_buffer;
10329 	}
10330 
10331 	/* Create mapping on asic side */
10332 	mutex_lock(&hdev->mmu_lock);
10333 
10334 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10335 	if (rc) {
10336 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10337 		goto unreserve_va;
10338 	}
10339 
10340 	rc = hl_mmu_invalidate_cache_range(hdev, false,
10341 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10342 				      ctx->asid, reserved_va_base, SZ_2M);
10343 	if (rc) {
10344 		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10345 		goto unreserve_va;
10346 	}
10347 
10348 	mutex_unlock(&hdev->mmu_lock);
10349 
10350 	/* Enable MMU on KDMA */
10351 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10352 
10353 	pos = 0;
10354 	size_left = size;
10355 	size_to_dma = SZ_2M;
10356 
10357 	while (size_left > 0) {
10358 		if (size_left < SZ_2M)
10359 			size_to_dma = size_left;
10360 
10361 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10362 		if (rc)
10363 			break;
10364 
10365 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10366 
10367 		if (size_left <= SZ_2M)
10368 			break;
10369 
10370 		pos += SZ_2M;
10371 		addr += SZ_2M;
10372 		size_left -= SZ_2M;
10373 	}
10374 
10375 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10376 
10377 	mutex_lock(&hdev->mmu_lock);
10378 
10379 	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10380 	if (rc)
10381 		goto unreserve_va;
10382 
10383 	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10384 				      ctx->asid, reserved_va_base, SZ_2M);
10385 
10386 unreserve_va:
10387 	mutex_unlock(&hdev->mmu_lock);
10388 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10389 free_data_buffer:
10390 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10391 put_ctx:
10392 	hl_ctx_put(ctx);
10393 
10394 	return rc;
10395 }
10396 
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)10397 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10398 {
10399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10400 	int min_alloc_order, rc;
10401 
10402 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10403 		return 0;
10404 
10405 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10406 								HOST_SPACE_INTERNAL_CB_SZ,
10407 								&hdev->internal_cb_pool_dma_addr,
10408 								GFP_KERNEL | __GFP_ZERO);
10409 
10410 	if (!hdev->internal_cb_pool_virt_addr)
10411 		return -ENOMEM;
10412 
10413 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10414 					gaudi2_get_wait_cb_size(hdev)));
10415 
10416 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10417 	if (!hdev->internal_cb_pool) {
10418 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
10419 		rc = -ENOMEM;
10420 		goto free_internal_cb_pool;
10421 	}
10422 
10423 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10424 				HOST_SPACE_INTERNAL_CB_SZ, -1);
10425 	if (rc) {
10426 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10427 		rc = -EFAULT;
10428 		goto destroy_internal_cb_pool;
10429 	}
10430 
10431 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10432 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10433 
10434 	if (!hdev->internal_cb_va_base) {
10435 		rc = -ENOMEM;
10436 		goto destroy_internal_cb_pool;
10437 	}
10438 
10439 	mutex_lock(&hdev->mmu_lock);
10440 
10441 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10442 					HOST_SPACE_INTERNAL_CB_SZ);
10443 	if (rc)
10444 		goto unreserve_internal_cb_pool;
10445 
10446 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10447 	if (rc)
10448 		goto unmap_internal_cb_pool;
10449 
10450 	mutex_unlock(&hdev->mmu_lock);
10451 
10452 	return 0;
10453 
10454 unmap_internal_cb_pool:
10455 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10456 unreserve_internal_cb_pool:
10457 	mutex_unlock(&hdev->mmu_lock);
10458 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10459 destroy_internal_cb_pool:
10460 	gen_pool_destroy(hdev->internal_cb_pool);
10461 free_internal_cb_pool:
10462 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10463 					hdev->internal_cb_pool_dma_addr);
10464 
10465 	return rc;
10466 }
10467 
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)10468 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10469 {
10470 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10471 
10472 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10473 		return;
10474 
10475 	mutex_lock(&hdev->mmu_lock);
10476 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10477 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10478 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10479 	mutex_unlock(&hdev->mmu_lock);
10480 
10481 	gen_pool_destroy(hdev->internal_cb_pool);
10482 
10483 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10484 					hdev->internal_cb_pool_dma_addr);
10485 }
10486 
gaudi2_restore_user_registers(struct hl_device * hdev)10487 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10488 {
10489 	gaudi2_restore_user_sm_registers(hdev);
10490 	gaudi2_restore_user_qm_registers(hdev);
10491 }
10492 
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10493 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10494 {
10495 	struct hl_device *hdev = ctx->hdev;
10496 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10497 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10498 	int rc;
10499 
10500 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10501 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10502 	if (rc)
10503 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10504 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10505 
10506 	return rc;
10507 }
10508 
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10509 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10510 {
10511 	struct hl_device *hdev = ctx->hdev;
10512 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10513 	int rc;
10514 
10515 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10516 				prop->pmmu.page_size, true);
10517 	if (rc)
10518 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10519 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10520 }
10521 
gaudi2_ctx_init(struct hl_ctx * ctx)10522 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10523 {
10524 	int rc;
10525 
10526 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10527 	if (rc)
10528 		return rc;
10529 
10530 	/* No need to clear user registers if the device has just
10531 	 * performed reset, we restore only nic qm registers
10532 	 */
10533 	if (ctx->hdev->reset_upon_device_release)
10534 		gaudi2_restore_nic_qm_registers(ctx->hdev);
10535 	else
10536 		gaudi2_restore_user_registers(ctx->hdev);
10537 
10538 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10539 	if (rc)
10540 		return rc;
10541 
10542 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10543 	if (rc)
10544 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10545 
10546 	return rc;
10547 }
10548 
gaudi2_ctx_fini(struct hl_ctx * ctx)10549 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10550 {
10551 	if (ctx->asid == HL_KERNEL_ASID_ID)
10552 		return;
10553 
10554 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10555 
10556 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10557 }
10558 
gaudi2_pre_schedule_cs(struct hl_cs * cs)10559 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10560 {
10561 	struct hl_device *hdev = cs->ctx->hdev;
10562 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10563 	u32 mon_payload, sob_id, mon_id;
10564 
10565 	if (!cs_needs_completion(cs))
10566 		return 0;
10567 
10568 	/*
10569 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10570 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10571 	 * cyclic index. The SOB value is increased when each of the CS jobs is
10572 	 * completed. When the SOB reaches the number of CS jobs, the monitor
10573 	 * generates MSI-X interrupt.
10574 	 */
10575 
10576 	sob_id = mon_id = index;
10577 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10578 				(1 << CQ_ENTRY_READY_SHIFT) | index;
10579 
10580 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10581 				cs->jobs_cnt);
10582 
10583 	return 0;
10584 }
10585 
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)10586 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10587 {
10588 	return HL_INVALID_QUEUE;
10589 }
10590 
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)10591 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10592 {
10593 	struct hl_cb *cb = data;
10594 	struct packet_msg_short *pkt;
10595 	u32 value, ctl, pkt_size = sizeof(*pkt);
10596 
10597 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10598 	memset(pkt, 0, pkt_size);
10599 
10600 	/* Inc by 1, Mode ADD */
10601 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10602 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10603 
10604 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10605 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10606 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10607 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10608 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10609 
10610 	pkt->value = cpu_to_le32(value);
10611 	pkt->ctl = cpu_to_le32(ctl);
10612 
10613 	return size + pkt_size;
10614 }
10615 
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)10616 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10617 {
10618 	u32 ctl, pkt_size = sizeof(*pkt);
10619 
10620 	memset(pkt, 0, pkt_size);
10621 
10622 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10623 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10624 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10625 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10626 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10627 
10628 	pkt->value = cpu_to_le32(value);
10629 	pkt->ctl = cpu_to_le32(ctl);
10630 
10631 	return pkt_size;
10632 }
10633 
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)10634 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10635 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10636 {
10637 	u32 ctl, value, pkt_size = sizeof(*pkt);
10638 	u8 mask;
10639 
10640 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10641 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10642 		return 0;
10643 	}
10644 
10645 	memset(pkt, 0, pkt_size);
10646 
10647 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10648 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10649 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10650 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10651 
10652 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10653 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10654 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10655 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10656 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10657 
10658 	pkt->value = cpu_to_le32(value);
10659 	pkt->ctl = cpu_to_le32(ctl);
10660 
10661 	return pkt_size;
10662 }
10663 
gaudi2_add_fence_pkt(struct packet_fence * pkt)10664 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10665 {
10666 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10667 
10668 	memset(pkt, 0, pkt_size);
10669 
10670 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10671 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10672 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10673 
10674 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10675 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10676 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10677 
10678 	pkt->cfg = cpu_to_le32(cfg);
10679 	pkt->ctl = cpu_to_le32(ctl);
10680 
10681 	return pkt_size;
10682 }
10683 
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)10684 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10685 {
10686 	struct hl_cb *cb = prop->data;
10687 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10688 
10689 	u64 monitor_base, fence_addr = 0;
10690 	u32 stream_index, size = prop->size;
10691 	u16 msg_addr_offset;
10692 
10693 	stream_index = prop->q_idx % 4;
10694 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10695 			QM_FENCE2_OFFSET + stream_index * 4;
10696 
10697 	/*
10698 	 * monitor_base should be the content of the base0 address registers,
10699 	 * so it will be added to the msg short offsets
10700 	 */
10701 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10702 
10703 	/* First monitor config packet: low address of the sync */
10704 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10705 				monitor_base;
10706 
10707 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10708 
10709 	/* Second monitor config packet: high address of the sync */
10710 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10711 				monitor_base;
10712 
10713 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10714 
10715 	/*
10716 	 * Third monitor config packet: the payload, i.e. what to write when the
10717 	 * sync triggers
10718 	 */
10719 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10720 				monitor_base;
10721 
10722 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10723 
10724 	/* Fourth monitor config packet: bind the monitor to a sync object */
10725 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10726 
10727 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10728 						prop->sob_val, msg_addr_offset);
10729 
10730 	/* Fence packet */
10731 	size += gaudi2_add_fence_pkt(buf + size);
10732 
10733 	return size;
10734 }
10735 
gaudi2_reset_sob(struct hl_device * hdev,void * data)10736 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10737 {
10738 	struct hl_hw_sob *hw_sob = data;
10739 
10740 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10741 
10742 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10743 
10744 	kref_init(&hw_sob->kref);
10745 }
10746 
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)10747 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10748 {
10749 }
10750 
gaudi2_get_device_time(struct hl_device * hdev)10751 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10752 {
10753 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10754 
10755 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10756 }
10757 
gaudi2_collective_wait_init_cs(struct hl_cs * cs)10758 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10759 {
10760 	return 0;
10761 }
10762 
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)10763 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10764 					struct hl_cs *cs, u32 wait_queue_id,
10765 					u32 collective_engine_id, u32 encaps_signal_offset)
10766 {
10767 	return -EINVAL;
10768 }
10769 
10770 /*
10771  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10772  *                   to DMMU page-size address (64MB) before mapping it in
10773  *                   the MMU.
10774  * The operation is performed on both the virtual and physical addresses.
10775  * for device with 6 HBMs the scramble is:
10776  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10777  *
10778  * Example:
10779  * =============================================================================
10780  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10781  * Phys address                                                     in MMU last
10782  *                                                                    HOP
10783  * =============================================================================
10784  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10785  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10786  * =============================================================================
10787  */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)10788 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10789 {
10790 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10791 	u32 divisor, mod_va;
10792 	u64 div_va;
10793 
10794 	/* accept any address in the DRAM address space */
10795 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10796 									VA_HBM_SPACE_END)) {
10797 
10798 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10799 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10800 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10801 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10802 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10803 	}
10804 
10805 	return raw_addr;
10806 }
10807 
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)10808 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10809 {
10810 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10811 	u32 divisor, mod_va;
10812 	u64 div_va;
10813 
10814 	/* accept any address in the DRAM address space */
10815 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10816 									VA_HBM_SPACE_END)) {
10817 
10818 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10819 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10820 					PAGE_SIZE_64MB, &mod_va);
10821 
10822 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10823 					(div_va * divisor + mod_va));
10824 	}
10825 
10826 	return scrambled_addr;
10827 }
10828 
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)10829 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10830 {
10831 	u32 base = 0, dcore_id, dec_id;
10832 
10833 	if (core_id >= NUMBER_OF_DEC) {
10834 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10835 		goto out;
10836 	}
10837 
10838 	if (core_id < 8) {
10839 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10840 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10841 
10842 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10843 				dec_id * DCORE_VDEC_OFFSET;
10844 	} else {
10845 		/* PCIe Shared Decoder */
10846 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10847 	}
10848 out:
10849 	return base;
10850 }
10851 
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)10852 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10853 				u32 *block_size, u32 *block_id)
10854 {
10855 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10856 	int i;
10857 
10858 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10859 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10860 			*block_id = i;
10861 			if (block_size)
10862 				*block_size = gaudi2->mapped_blocks[i].size;
10863 			return 0;
10864 		}
10865 	}
10866 
10867 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10868 
10869 	return -EINVAL;
10870 }
10871 
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)10872 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10873 			u32 block_id, u32 block_size)
10874 {
10875 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10876 	u64 offset_in_bar;
10877 	u64 address;
10878 	int rc;
10879 
10880 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10881 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10882 		return -EINVAL;
10883 	}
10884 
10885 	/* we allow mapping only an entire block */
10886 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10887 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10888 		return -EINVAL;
10889 	}
10890 
10891 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10892 
10893 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10894 
10895 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10896 			VM_DONTCOPY | VM_NORESERVE);
10897 
10898 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10899 			block_size, vma->vm_page_prot);
10900 	if (rc)
10901 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10902 
10903 	return rc;
10904 }
10905 
gaudi2_enable_events_from_fw(struct hl_device * hdev)10906 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10907 {
10908 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10909 
10910 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10911 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10912 
10913 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10914 		WREG32(irq_handler_offset,
10915 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10916 }
10917 
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)10918 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10919 {
10920 	switch (mmu_id) {
10921 	case HW_CAP_DCORE0_DMMU0:
10922 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10923 		break;
10924 	case HW_CAP_DCORE0_DMMU1:
10925 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10926 		break;
10927 	case HW_CAP_DCORE0_DMMU2:
10928 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10929 		break;
10930 	case HW_CAP_DCORE0_DMMU3:
10931 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10932 		break;
10933 	case HW_CAP_DCORE1_DMMU0:
10934 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10935 		break;
10936 	case HW_CAP_DCORE1_DMMU1:
10937 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10938 		break;
10939 	case HW_CAP_DCORE1_DMMU2:
10940 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10941 		break;
10942 	case HW_CAP_DCORE1_DMMU3:
10943 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10944 		break;
10945 	case HW_CAP_DCORE2_DMMU0:
10946 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10947 		break;
10948 	case HW_CAP_DCORE2_DMMU1:
10949 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10950 		break;
10951 	case HW_CAP_DCORE2_DMMU2:
10952 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10953 		break;
10954 	case HW_CAP_DCORE2_DMMU3:
10955 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10956 		break;
10957 	case HW_CAP_DCORE3_DMMU0:
10958 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10959 		break;
10960 	case HW_CAP_DCORE3_DMMU1:
10961 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10962 		break;
10963 	case HW_CAP_DCORE3_DMMU2:
10964 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10965 		break;
10966 	case HW_CAP_DCORE3_DMMU3:
10967 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10968 		break;
10969 	case HW_CAP_PMMU:
10970 		*mmu_base = mmPMMU_HBW_MMU_BASE;
10971 		break;
10972 	default:
10973 		return -EINVAL;
10974 	}
10975 
10976 	return 0;
10977 }
10978 
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)10979 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10980 {
10981 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10982 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10983 	u32 mmu_base;
10984 
10985 	if (!(gaudi2->hw_cap_initialized & mmu_id))
10986 		return;
10987 
10988 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10989 		return;
10990 
10991 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10992 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10993 }
10994 
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)10995 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10996 {
10997 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10998 
10999 	/* check all HMMUs */
11000 	for (i = 0 ; i < num_of_hmmus ; i++) {
11001 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11002 
11003 		if (mmu_cap_mask & mmu_id)
11004 			gaudi2_ack_mmu_error(hdev, mmu_id);
11005 	}
11006 
11007 	/* check PMMU */
11008 	if (mmu_cap_mask & HW_CAP_PMMU)
11009 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11010 
11011 	return 0;
11012 }
11013 
gaudi2_get_msi_info(__le32 * table)11014 static void gaudi2_get_msi_info(__le32 *table)
11015 {
11016 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11017 }
11018 
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)11019 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11020 {
11021 	switch (pll_idx) {
11022 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11023 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11024 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11025 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11026 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11027 	case HL_GAUDI2_MME_PLL: return MME_PLL;
11028 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11029 	case HL_GAUDI2_IF_PLL: return IF_PLL;
11030 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11031 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11032 	case HL_GAUDI2_VID_PLL: return VID_PLL;
11033 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11034 	default: return -EINVAL;
11035 	}
11036 }
11037 
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)11038 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11039 {
11040 	/* Not implemented */
11041 	return 0;
11042 }
11043 
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)11044 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11045 {
11046 	/* Not implemented */
11047 	return 0;
11048 }
11049 
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)11050 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11051 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
11052 {
11053 	/* Not implemented */
11054 	return 0;
11055 }
11056 
11057 
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)11058 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11059 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
11060 				u32 engine_id, char **buf, size_t *size, size_t *offset)
11061 {
11062 	/* Not implemented */
11063 	return 0;
11064 }
11065 
11066 
11067 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11068 	.monitor_valid = gaudi2_monitor_valid,
11069 	.print_single_monitor = gaudi2_print_single_monitor,
11070 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11071 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
11072 };
11073 
gaudi2_state_dump_init(struct hl_device * hdev)11074 static void gaudi2_state_dump_init(struct hl_device *hdev)
11075 {
11076 	/* Not implemented */
11077 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11078 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11079 }
11080 
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)11081 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11082 {
11083 	return 0;
11084 }
11085 
gaudi2_get_stream_master_qid_arr(void)11086 static u32 *gaudi2_get_stream_master_qid_arr(void)
11087 {
11088 	return NULL;
11089 }
11090 
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)11091 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11092 				struct attribute_group *dev_vrm_attr_grp)
11093 {
11094 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11095 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11096 }
11097 
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)11098 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11099 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
11100 {
11101 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11102 
11103 	/* for host pages the page size must be  */
11104 	if (!is_dram_addr) {
11105 		if (page_size % mmu_prop->page_size)
11106 			goto page_size_err;
11107 
11108 		*real_page_size = mmu_prop->page_size;
11109 		return 0;
11110 	}
11111 
11112 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11113 		goto page_size_err;
11114 
11115 	/*
11116 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11117 	 * than DRAM page size).
11118 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11119 	 * this mismatch when calculating the address to place in the MMU page table.
11120 	 * (in that case also make sure that the dram_page_size is not greater than the
11121 	 * mmu page size)
11122 	 */
11123 	*real_page_size = prop->dram_page_size;
11124 
11125 	return 0;
11126 
11127 page_size_err:
11128 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11129 							page_size, mmu_prop->page_size >> 10);
11130 	return -EFAULT;
11131 }
11132 
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)11133 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11134 {
11135 	return -EOPNOTSUPP;
11136 }
11137 
gaudi2_send_device_activity(struct hl_device * hdev,bool open)11138 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11139 {
11140 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11141 
11142 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11143 		return 0;
11144 
11145 	return hl_fw_send_device_activity(hdev, open);
11146 }
11147 
11148 static const struct hl_asic_funcs gaudi2_funcs = {
11149 	.early_init = gaudi2_early_init,
11150 	.early_fini = gaudi2_early_fini,
11151 	.late_init = gaudi2_late_init,
11152 	.late_fini = gaudi2_late_fini,
11153 	.sw_init = gaudi2_sw_init,
11154 	.sw_fini = gaudi2_sw_fini,
11155 	.hw_init = gaudi2_hw_init,
11156 	.hw_fini = gaudi2_hw_fini,
11157 	.halt_engines = gaudi2_halt_engines,
11158 	.suspend = gaudi2_suspend,
11159 	.resume = gaudi2_resume,
11160 	.mmap = gaudi2_mmap,
11161 	.ring_doorbell = gaudi2_ring_doorbell,
11162 	.pqe_write = gaudi2_pqe_write,
11163 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11164 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
11165 	.scrub_device_mem = gaudi2_scrub_device_mem,
11166 	.scrub_device_dram = gaudi2_scrub_device_dram,
11167 	.get_int_queue_base = NULL,
11168 	.test_queues = gaudi2_test_queues,
11169 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11170 	.asic_dma_pool_free = gaudi2_dma_pool_free,
11171 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11172 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11173 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
11174 	.asic_dma_map_single = gaudi2_dma_map_single,
11175 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11176 	.cs_parser = gaudi2_cs_parser,
11177 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
11178 	.add_end_of_cb_packets = NULL,
11179 	.update_eq_ci = gaudi2_update_eq_ci,
11180 	.context_switch = gaudi2_context_switch,
11181 	.restore_phase_topology = gaudi2_restore_phase_topology,
11182 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
11183 	.add_device_attr = gaudi2_add_device_attr,
11184 	.handle_eqe = gaudi2_handle_eqe,
11185 	.get_events_stat = gaudi2_get_events_stat,
11186 	.read_pte = NULL,
11187 	.write_pte = NULL,
11188 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11189 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11190 	.mmu_prefetch_cache_range = NULL,
11191 	.send_heartbeat = gaudi2_send_heartbeat,
11192 	.debug_coresight = gaudi2_debug_coresight,
11193 	.is_device_idle = gaudi2_is_device_idle,
11194 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
11195 	.hw_queues_lock = gaudi2_hw_queues_lock,
11196 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
11197 	.get_pci_id = gaudi2_get_pci_id,
11198 	.get_eeprom_data = gaudi2_get_eeprom_data,
11199 	.get_monitor_dump = gaudi2_get_monitor_dump,
11200 	.send_cpu_message = gaudi2_send_cpu_message,
11201 	.pci_bars_map = gaudi2_pci_bars_map,
11202 	.init_iatu = gaudi2_init_iatu,
11203 	.rreg = hl_rreg,
11204 	.wreg = hl_wreg,
11205 	.halt_coresight = gaudi2_halt_coresight,
11206 	.ctx_init = gaudi2_ctx_init,
11207 	.ctx_fini = gaudi2_ctx_fini,
11208 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
11209 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11210 	.load_firmware_to_device = NULL,
11211 	.load_boot_fit_to_device = NULL,
11212 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
11213 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
11214 	.gen_signal_cb = gaudi2_gen_signal_cb,
11215 	.gen_wait_cb = gaudi2_gen_wait_cb,
11216 	.reset_sob = gaudi2_reset_sob,
11217 	.reset_sob_group = gaudi2_reset_sob_group,
11218 	.get_device_time = gaudi2_get_device_time,
11219 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
11220 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11221 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11222 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
11223 	.scramble_addr = gaudi2_mmu_scramble_addr,
11224 	.descramble_addr = gaudi2_mmu_descramble_addr,
11225 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11226 	.get_hw_block_id = gaudi2_get_hw_block_id,
11227 	.hw_block_mmap = gaudi2_block_mmap,
11228 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
11229 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11230 	.get_msi_info = gaudi2_get_msi_info,
11231 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11232 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11233 	.init_firmware_loader = gaudi2_init_firmware_loader,
11234 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11235 	.state_dump_init = gaudi2_state_dump_init,
11236 	.get_sob_addr = &gaudi2_get_sob_addr,
11237 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11238 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11239 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11240 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11241 	.access_dev_mem = hl_access_dev_mem,
11242 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
11243 	.set_engine_cores = gaudi2_set_engine_cores,
11244 	.set_engines = gaudi2_set_engines,
11245 	.send_device_activity = gaudi2_send_device_activity,
11246 	.set_dram_properties = gaudi2_set_dram_properties,
11247 	.set_binning_masks = gaudi2_set_binning_masks,
11248 };
11249 
gaudi2_set_asic_funcs(struct hl_device * hdev)11250 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11251 {
11252 	hdev->asic_funcs = &gaudi2_funcs;
11253 }
11254