1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
24
25 /*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70 #define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
83
84 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86 #define GAUDI_MAX_STRING_LEN 20
87
88 #define GAUDI_CB_POOL_CB_CNT 512
89 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
99 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102 BIT(GAUDI_ENGINE_ID_MME_0) |\
103 BIT(GAUDI_ENGINE_ID_MME_2) |\
104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 "gaudi cpu eq"
111 };
112
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122 };
123
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 [0] = GAUDI_QUEUE_ID_DMA_0_0,
126 [1] = GAUDI_QUEUE_ID_DMA_0_1,
127 [2] = GAUDI_QUEUE_ID_DMA_0_2,
128 [3] = GAUDI_QUEUE_ID_DMA_0_3,
129 [4] = GAUDI_QUEUE_ID_DMA_1_0,
130 [5] = GAUDI_QUEUE_ID_DMA_1_1,
131 [6] = GAUDI_QUEUE_ID_DMA_1_2,
132 [7] = GAUDI_QUEUE_ID_DMA_1_3,
133 [8] = GAUDI_QUEUE_ID_DMA_5_0,
134 [9] = GAUDI_QUEUE_ID_DMA_5_1,
135 [10] = GAUDI_QUEUE_ID_DMA_5_2,
136 [11] = GAUDI_QUEUE_ID_DMA_5_3
137 };
138
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
145 [PACKET_REPEAT] = sizeof(struct packet_repeat),
146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
147 [PACKET_FENCE] = sizeof(struct packet_fence),
148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
149 [PACKET_NOP] = sizeof(struct packet_nop),
150 [PACKET_STOP] = sizeof(struct packet_stop),
151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
152 [PACKET_WAIT] = sizeof(struct packet_wait),
153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154 };
155
validate_packet_id(enum packet_id id)156 static inline bool validate_packet_id(enum packet_id id)
157 {
158 switch (id) {
159 case PACKET_WREG_32:
160 case PACKET_WREG_BULK:
161 case PACKET_MSG_LONG:
162 case PACKET_MSG_SHORT:
163 case PACKET_CP_DMA:
164 case PACKET_REPEAT:
165 case PACKET_MSG_PROT:
166 case PACKET_FENCE:
167 case PACKET_LIN_DMA:
168 case PACKET_NOP:
169 case PACKET_STOP:
170 case PACKET_ARB_POINT:
171 case PACKET_WAIT:
172 case PACKET_LOAD_AND_EXE:
173 return true;
174 default:
175 return false;
176 }
177 }
178
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 "tpc_address_exceed_slm",
182 "tpc_div_by_0",
183 "tpc_spu_mac_overflow",
184 "tpc_spu_addsub_overflow",
185 "tpc_spu_abs_overflow",
186 "tpc_spu_fp_dst_nan_inf",
187 "tpc_spu_fp_dst_denorm",
188 "tpc_vpu_mac_overflow",
189 "tpc_vpu_addsub_overflow",
190 "tpc_vpu_abs_overflow",
191 "tpc_vpu_fp_dst_nan_inf",
192 "tpc_vpu_fp_dst_denorm",
193 "tpc_assertions",
194 "tpc_illegal_instruction",
195 "tpc_pc_wrap_around",
196 "tpc_qm_sw_err",
197 "tpc_hbw_rresp_err",
198 "tpc_hbw_bresp_err",
199 "tpc_lbw_rresp_err",
200 "tpc_lbw_bresp_err"
201 };
202
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 "PQ AXI HBW error",
206 "CQ AXI HBW error",
207 "CP AXI HBW error",
208 "CP error due to undefined OPCODE",
209 "CP encountered STOP OPCODE",
210 "CP AXI LBW error",
211 "CP WRREG32 or WRBULK returned error",
212 "N/A",
213 "FENCE 0 inc over max value and clipped",
214 "FENCE 1 inc over max value and clipped",
215 "FENCE 2 inc over max value and clipped",
216 "FENCE 3 inc over max value and clipped",
217 "FENCE 0 dec under min value and clipped",
218 "FENCE 1 dec under min value and clipped",
219 "FENCE 2 dec under min value and clipped",
220 "FENCE 3 dec under min value and clipped"
221 };
222
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 "Choice push while full error",
226 "Choice Q watchdog error",
227 "MSG AXI LBW returned with error"
228 };
229
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345
346 struct ecc_info_extract_params {
347 u64 block_address;
348 u32 num_memories;
349 bool derr;
350 bool disable_clock_gating;
351 };
352
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354 u64 phys_addr);
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358 u32 size, u64 val);
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360 u32 tpc_id);
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
gaudi_get_fixed_properties(struct hl_device * hdev)366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
367 {
368 struct asic_fixed_properties *prop = &hdev->asic_prop;
369 u32 num_sync_stream_queues = 0;
370 int i;
371
372 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 prop->hw_queues_props = kcalloc(prop->max_queues,
374 sizeof(struct hw_queue_properties),
375 GFP_KERNEL);
376
377 if (!prop->hw_queues_props)
378 return -ENOMEM;
379
380 for (i = 0 ; i < prop->max_queues ; i++) {
381 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 prop->hw_queues_props[i].driver_only = 0;
384 prop->hw_queues_props[i].requires_kernel_cb = 1;
385 prop->hw_queues_props[i].supports_sync_stream = 1;
386 num_sync_stream_queues++;
387 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 prop->hw_queues_props[i].driver_only = 1;
390 prop->hw_queues_props[i].requires_kernel_cb = 0;
391 prop->hw_queues_props[i].supports_sync_stream = 0;
392 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 prop->hw_queues_props[i].driver_only = 0;
395 prop->hw_queues_props[i].requires_kernel_cb = 0;
396 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 prop->hw_queues_props[i].driver_only = 0;
399 prop->hw_queues_props[i].requires_kernel_cb = 0;
400 prop->hw_queues_props[i].supports_sync_stream = 0;
401 }
402 }
403
404 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405 prop->sync_stream_first_sob = 0;
406 prop->sync_stream_first_mon = 0;
407 prop->dram_base_address = DRAM_PHYS_BASE;
408 prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 prop->dram_end_address = prop->dram_base_address +
410 prop->dram_size;
411 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413 prop->sram_base_address = SRAM_BASE_ADDR;
414 prop->sram_size = SRAM_SIZE;
415 prop->sram_end_address = prop->sram_base_address +
416 prop->sram_size;
417 prop->sram_user_base_address = prop->sram_base_address +
418 SRAM_USER_BASE_OFFSET;
419
420 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421 if (hdev->pldm)
422 prop->mmu_pgt_size = 0x800000; /* 8MB */
423 else
424 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 prop->mmu_pte_size = HL_PTE_SIZE;
426 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 prop->dram_page_size = PAGE_SIZE_2MB;
429
430 prop->pmmu.hop0_shift = HOP0_SHIFT;
431 prop->pmmu.hop1_shift = HOP1_SHIFT;
432 prop->pmmu.hop2_shift = HOP2_SHIFT;
433 prop->pmmu.hop3_shift = HOP3_SHIFT;
434 prop->pmmu.hop4_shift = HOP4_SHIFT;
435 prop->pmmu.hop0_mask = HOP0_MASK;
436 prop->pmmu.hop1_mask = HOP1_MASK;
437 prop->pmmu.hop2_mask = HOP2_MASK;
438 prop->pmmu.hop3_mask = HOP3_MASK;
439 prop->pmmu.hop4_mask = HOP4_MASK;
440 prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 prop->pmmu.end_addr =
442 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 prop->pmmu.page_size = PAGE_SIZE_4KB;
444 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445
446 /* PMMU and HPMMU are the same except of page size */
447 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450 /* shifts and masks are the same in PMMU and DMMU */
451 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456 prop->cfg_size = CFG_SIZE;
457 prop->max_asid = MAX_ASID;
458 prop->num_of_events = GAUDI_EVENT_SIZE;
459 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
461 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462
463 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470 CARD_NAME_MAX_LEN);
471
472 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
474 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 num_sync_stream_queues * HL_RSVD_SOBS;
476 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 num_sync_stream_queues * HL_RSVD_MONS;
478
479 return 0;
480 }
481
gaudi_pci_bars_map(struct hl_device * hdev)482 static int gaudi_pci_bars_map(struct hl_device *hdev)
483 {
484 static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 bool is_wc[3] = {false, false, true};
486 int rc;
487
488 rc = hl_pci_bars_map(hdev, name, is_wc);
489 if (rc)
490 return rc;
491
492 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 (CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495 return 0;
496 }
497
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499 {
500 struct gaudi_device *gaudi = hdev->asic_specific;
501 struct hl_inbound_pci_region pci_region;
502 u64 old_addr = addr;
503 int rc;
504
505 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506 return old_addr;
507
508 /* Inbound Region 2 - Bar 4 - Point to HBM */
509 pci_region.mode = PCI_BAR_MATCH_MODE;
510 pci_region.bar = HBM_BAR_ID;
511 pci_region.addr = addr;
512 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513 if (rc)
514 return U64_MAX;
515
516 if (gaudi) {
517 old_addr = gaudi->hbm_bar_cur_addr;
518 gaudi->hbm_bar_cur_addr = addr;
519 }
520
521 return old_addr;
522 }
523
gaudi_init_iatu(struct hl_device * hdev)524 static int gaudi_init_iatu(struct hl_device *hdev)
525 {
526 struct hl_inbound_pci_region inbound_region;
527 struct hl_outbound_pci_region outbound_region;
528 int rc;
529
530 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 inbound_region.mode = PCI_BAR_MATCH_MODE;
532 inbound_region.bar = SRAM_BAR_ID;
533 inbound_region.addr = SRAM_BASE_ADDR;
534 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535 if (rc)
536 goto done;
537
538 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539 inbound_region.mode = PCI_BAR_MATCH_MODE;
540 inbound_region.bar = CFG_BAR_ID;
541 inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543 if (rc)
544 goto done;
545
546 /* Inbound Region 2 - Bar 4 - Point to HBM */
547 inbound_region.mode = PCI_BAR_MATCH_MODE;
548 inbound_region.bar = HBM_BAR_ID;
549 inbound_region.addr = DRAM_PHYS_BASE;
550 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551 if (rc)
552 goto done;
553
554 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556 /* Outbound Region 0 - Point to Host */
557 outbound_region.addr = HOST_PHYS_BASE;
558 outbound_region.size = HOST_PHYS_SIZE;
559 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561 done:
562 return rc;
563 }
564
gaudi_early_init(struct hl_device * hdev)565 static int gaudi_early_init(struct hl_device *hdev)
566 {
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct pci_dev *pdev = hdev->pdev;
569 int rc;
570
571 rc = gaudi_get_fixed_properties(hdev);
572 if (rc) {
573 dev_err(hdev->dev, "Failed to get fixed properties\n");
574 return rc;
575 }
576
577 /* Check BAR sizes */
578 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579 dev_err(hdev->dev,
580 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581 SRAM_BAR_ID,
582 (unsigned long long) pci_resource_len(pdev,
583 SRAM_BAR_ID),
584 SRAM_BAR_SIZE);
585 rc = -ENODEV;
586 goto free_queue_props;
587 }
588
589 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590 dev_err(hdev->dev,
591 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592 CFG_BAR_ID,
593 (unsigned long long) pci_resource_len(pdev,
594 CFG_BAR_ID),
595 CFG_BAR_SIZE);
596 rc = -ENODEV;
597 goto free_queue_props;
598 }
599
600 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602 rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603 mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604 if (rc)
605 goto free_queue_props;
606
607 /* GAUDI Firmware does not yet support security */
608 prop->fw_security_disabled = true;
609 dev_info(hdev->dev, "firmware-level security is disabled\n");
610
611 return 0;
612
613 free_queue_props:
614 kfree(hdev->asic_prop.hw_queues_props);
615 return rc;
616 }
617
gaudi_early_fini(struct hl_device * hdev)618 static int gaudi_early_fini(struct hl_device *hdev)
619 {
620 kfree(hdev->asic_prop.hw_queues_props);
621 hl_pci_fini(hdev);
622
623 return 0;
624 }
625
626 /**
627 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628 *
629 * @hdev: pointer to hl_device structure
630 *
631 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)632 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633 {
634 struct asic_fixed_properties *prop = &hdev->asic_prop;
635 u32 trace_freq = 0;
636 u32 pll_clk = 0;
637 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642
643 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644 if (div_sel == DIV_SEL_REF_CLK)
645 trace_freq = PLL_REF_CLK;
646 else
647 trace_freq = PLL_REF_CLK / (div_fctr + 1);
648 } else if (div_sel == DIV_SEL_PLL_CLK ||
649 div_sel == DIV_SEL_DIVIDED_PLL) {
650 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651 if (div_sel == DIV_SEL_PLL_CLK)
652 trace_freq = pll_clk;
653 else
654 trace_freq = pll_clk / (div_fctr + 1);
655 } else {
656 dev_warn(hdev->dev,
657 "Received invalid div select value: %d", div_sel);
658 }
659
660 prop->psoc_timestamp_frequency = trace_freq;
661 prop->psoc_pci_pll_nr = nr;
662 prop->psoc_pci_pll_nf = nf;
663 prop->psoc_pci_pll_od = od;
664 prop->psoc_pci_pll_div_factor = div_fctr;
665 }
666
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)667 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669 {
670 struct asic_fixed_properties *prop = &hdev->asic_prop;
671 struct packet_lin_dma *init_tpc_mem_pkt;
672 struct hl_cs_job *job;
673 struct hl_cb *cb;
674 u64 dst_addr;
675 u32 cb_size, ctl;
676 u8 tpc_id;
677 int rc;
678
679 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680 if (!cb)
681 return -EFAULT;
682
683 init_tpc_mem_pkt = cb->kernel_address;
684 cb_size = sizeof(*init_tpc_mem_pkt);
685 memset(init_tpc_mem_pkt, 0, cb_size);
686
687 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688
689 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693
694 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695
696 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697 dst_addr = (prop->sram_user_base_address &
698 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701
702 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703 if (!job) {
704 dev_err(hdev->dev, "Failed to allocate a new job\n");
705 rc = -ENOMEM;
706 goto release_cb;
707 }
708
709 job->id = 0;
710 job->user_cb = cb;
711 job->user_cb->cs_cnt++;
712 job->user_cb_size = cb_size;
713 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714 job->patched_cb = job->user_cb;
715 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716
717 hl_debugfs_add_job(hdev, job);
718
719 rc = gaudi_send_job_on_qman0(hdev, job);
720
721 if (rc)
722 goto free_job;
723
724 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726 if (rc)
727 break;
728 }
729
730 free_job:
731 hl_userptr_delete_list(hdev, &job->userptr_list);
732 hl_debugfs_remove_job(hdev, job);
733 kfree(job);
734 cb->cs_cnt--;
735
736 release_cb:
737 hl_cb_put(cb);
738 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739
740 return rc;
741 }
742
743 /*
744 * gaudi_init_tpc_mem() - Initialize TPC memories.
745 * @hdev: Pointer to hl_device structure.
746 *
747 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748 *
749 * Return: 0 for success, negative value for error.
750 */
gaudi_init_tpc_mem(struct hl_device * hdev)751 static int gaudi_init_tpc_mem(struct hl_device *hdev)
752 {
753 const struct firmware *fw;
754 size_t fw_size;
755 void *cpu_addr;
756 dma_addr_t dma_handle;
757 int rc;
758
759 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
760 if (rc) {
761 dev_err(hdev->dev, "Firmware file %s is not found!\n",
762 GAUDI_TPC_FW_FILE);
763 goto out;
764 }
765
766 fw_size = fw->size;
767 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
768 &dma_handle, GFP_KERNEL | __GFP_ZERO);
769 if (!cpu_addr) {
770 dev_err(hdev->dev,
771 "Failed to allocate %zu of dma memory for TPC kernel\n",
772 fw_size);
773 rc = -ENOMEM;
774 goto out;
775 }
776
777 memcpy(cpu_addr, fw->data, fw_size);
778
779 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
780
781 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
782 dma_handle);
783
784 out:
785 release_firmware(fw);
786 return rc;
787 }
788
gaudi_late_init(struct hl_device * hdev)789 static int gaudi_late_init(struct hl_device *hdev)
790 {
791 struct gaudi_device *gaudi = hdev->asic_specific;
792 int rc;
793
794 rc = gaudi->cpucp_info_get(hdev);
795 if (rc) {
796 dev_err(hdev->dev, "Failed to get cpucp info\n");
797 return rc;
798 }
799
800 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
801 if (rc) {
802 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
803 return rc;
804 }
805
806 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
807
808 gaudi_fetch_psoc_frequency(hdev);
809
810 rc = gaudi_mmu_clear_pgt_range(hdev);
811 if (rc) {
812 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
813 goto disable_pci_access;
814 }
815
816 rc = gaudi_init_tpc_mem(hdev);
817 if (rc) {
818 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
819 goto disable_pci_access;
820 }
821
822 return 0;
823
824 disable_pci_access:
825 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
826
827 return rc;
828 }
829
gaudi_late_fini(struct hl_device * hdev)830 static void gaudi_late_fini(struct hl_device *hdev)
831 {
832 const struct hwmon_channel_info **channel_info_arr;
833 int i = 0;
834
835 if (!hdev->hl_chip_info->info)
836 return;
837
838 channel_info_arr = hdev->hl_chip_info->info;
839
840 while (channel_info_arr[i]) {
841 kfree(channel_info_arr[i]->config);
842 kfree(channel_info_arr[i]);
843 i++;
844 }
845
846 kfree(channel_info_arr);
847
848 hdev->hl_chip_info->info = NULL;
849 }
850
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)851 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
852 {
853 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
854 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
855 int i, j, rc = 0;
856
857 /*
858 * The device CPU works with 40-bits addresses, while bit 39 must be set
859 * to '1' when accessing the host.
860 * Bits 49:39 of the full host address are saved for a later
861 * configuration of the HW to perform extension to 50 bits.
862 * Because there is a single HW register that holds the extension bits,
863 * these bits must be identical in all allocated range.
864 */
865
866 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
867 virt_addr_arr[i] =
868 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
869 HL_CPU_ACCESSIBLE_MEM_SIZE,
870 &dma_addr_arr[i],
871 GFP_KERNEL | __GFP_ZERO);
872 if (!virt_addr_arr[i]) {
873 rc = -ENOMEM;
874 goto free_dma_mem_arr;
875 }
876
877 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
878 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
879 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
880 break;
881 }
882
883 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
884 dev_err(hdev->dev,
885 "MSB of CPU accessible DMA memory are not identical in all range\n");
886 rc = -EFAULT;
887 goto free_dma_mem_arr;
888 }
889
890 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
891 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
892 hdev->cpu_pci_msb_addr =
893 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
894
895 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
896
897 free_dma_mem_arr:
898 for (j = 0 ; j < i ; j++)
899 hdev->asic_funcs->asic_dma_free_coherent(hdev,
900 HL_CPU_ACCESSIBLE_MEM_SIZE,
901 virt_addr_arr[j],
902 dma_addr_arr[j]);
903
904 return rc;
905 }
906
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)907 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
908 {
909 struct gaudi_device *gaudi = hdev->asic_specific;
910 struct gaudi_internal_qman_info *q;
911 u32 i;
912
913 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
914 q = &gaudi->internal_qmans[i];
915 if (!q->pq_kernel_addr)
916 continue;
917 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
918 q->pq_kernel_addr,
919 q->pq_dma_addr);
920 }
921 }
922
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)923 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
924 {
925 struct gaudi_device *gaudi = hdev->asic_specific;
926 struct gaudi_internal_qman_info *q;
927 int rc, i;
928
929 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
930 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
931 continue;
932
933 q = &gaudi->internal_qmans[i];
934
935 switch (i) {
936 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
937 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
938 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
939 break;
940 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
941 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
942 break;
943 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
944 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
945 break;
946 default:
947 dev_err(hdev->dev, "Bad internal queue index %d", i);
948 rc = -EINVAL;
949 goto free_internal_qmans_pq_mem;
950 }
951
952 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
953 hdev, q->pq_size,
954 &q->pq_dma_addr,
955 GFP_KERNEL | __GFP_ZERO);
956 if (!q->pq_kernel_addr) {
957 rc = -ENOMEM;
958 goto free_internal_qmans_pq_mem;
959 }
960 }
961
962 return 0;
963
964 free_internal_qmans_pq_mem:
965 gaudi_free_internal_qmans_pq_mem(hdev);
966 return rc;
967 }
968
gaudi_sw_init(struct hl_device * hdev)969 static int gaudi_sw_init(struct hl_device *hdev)
970 {
971 struct gaudi_device *gaudi;
972 u32 i, event_id = 0;
973 int rc;
974
975 /* Allocate device structure */
976 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
977 if (!gaudi)
978 return -ENOMEM;
979
980 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
981 if (gaudi_irq_map_table[i].valid) {
982 if (event_id == GAUDI_EVENT_SIZE) {
983 dev_err(hdev->dev,
984 "Event array exceeds the limit of %u events\n",
985 GAUDI_EVENT_SIZE);
986 rc = -EINVAL;
987 goto free_gaudi_device;
988 }
989
990 gaudi->events[event_id++] =
991 gaudi_irq_map_table[i].fc_id;
992 }
993 }
994
995 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
996
997 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
998
999 hdev->asic_specific = gaudi;
1000
1001 /* Create DMA pool for small allocations */
1002 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1003 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1004 if (!hdev->dma_pool) {
1005 dev_err(hdev->dev, "failed to create DMA pool\n");
1006 rc = -ENOMEM;
1007 goto free_gaudi_device;
1008 }
1009
1010 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1011 if (rc)
1012 goto free_dma_pool;
1013
1014 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1015 if (!hdev->cpu_accessible_dma_pool) {
1016 dev_err(hdev->dev,
1017 "Failed to create CPU accessible DMA pool\n");
1018 rc = -ENOMEM;
1019 goto free_cpu_dma_mem;
1020 }
1021
1022 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1023 (uintptr_t) hdev->cpu_accessible_dma_mem,
1024 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1025 if (rc) {
1026 dev_err(hdev->dev,
1027 "Failed to add memory to CPU accessible DMA pool\n");
1028 rc = -EFAULT;
1029 goto free_cpu_accessible_dma_pool;
1030 }
1031
1032 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1033 if (rc)
1034 goto free_cpu_accessible_dma_pool;
1035
1036 spin_lock_init(&gaudi->hw_queues_lock);
1037 mutex_init(&gaudi->clk_gate_mutex);
1038
1039 hdev->supports_sync_stream = true;
1040 hdev->supports_coresight = true;
1041
1042 return 0;
1043
1044 free_cpu_accessible_dma_pool:
1045 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1046 free_cpu_dma_mem:
1047 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1048 hdev->cpu_pci_msb_addr);
1049 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1050 HL_CPU_ACCESSIBLE_MEM_SIZE,
1051 hdev->cpu_accessible_dma_mem,
1052 hdev->cpu_accessible_dma_address);
1053 free_dma_pool:
1054 dma_pool_destroy(hdev->dma_pool);
1055 free_gaudi_device:
1056 kfree(gaudi);
1057 return rc;
1058 }
1059
gaudi_sw_fini(struct hl_device * hdev)1060 static int gaudi_sw_fini(struct hl_device *hdev)
1061 {
1062 struct gaudi_device *gaudi = hdev->asic_specific;
1063
1064 gaudi_free_internal_qmans_pq_mem(hdev);
1065
1066 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1067
1068 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1069 hdev->cpu_pci_msb_addr);
1070 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1071 HL_CPU_ACCESSIBLE_MEM_SIZE,
1072 hdev->cpu_accessible_dma_mem,
1073 hdev->cpu_accessible_dma_address);
1074
1075 dma_pool_destroy(hdev->dma_pool);
1076
1077 mutex_destroy(&gaudi->clk_gate_mutex);
1078
1079 kfree(gaudi);
1080
1081 return 0;
1082 }
1083
gaudi_irq_handler_single(int irq,void * arg)1084 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1085 {
1086 struct hl_device *hdev = arg;
1087 int i;
1088
1089 if (hdev->disabled)
1090 return IRQ_HANDLED;
1091
1092 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1093 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1094
1095 hl_irq_handler_eq(irq, &hdev->event_queue);
1096
1097 return IRQ_HANDLED;
1098 }
1099
1100 /*
1101 * For backward compatibility, new MSI interrupts should be set after the
1102 * existing CPU and NIC interrupts.
1103 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1104 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1105 bool cpu_eq)
1106 {
1107 int msi_vec;
1108
1109 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1110 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1111 GAUDI_EVENT_QUEUE_MSI_IDX);
1112
1113 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1114 (nr + NIC_NUMBER_OF_ENGINES + 1);
1115
1116 return pci_irq_vector(hdev->pdev, msi_vec);
1117 }
1118
gaudi_enable_msi_single(struct hl_device * hdev)1119 static int gaudi_enable_msi_single(struct hl_device *hdev)
1120 {
1121 int rc, irq;
1122
1123 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1124
1125 irq = gaudi_pci_irq_vector(hdev, 0, false);
1126 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1127 "gaudi single msi", hdev);
1128 if (rc)
1129 dev_err(hdev->dev,
1130 "Failed to request single MSI IRQ\n");
1131
1132 return rc;
1133 }
1134
gaudi_enable_msi_multi(struct hl_device * hdev)1135 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1136 {
1137 int cq_cnt = hdev->asic_prop.completion_queues_count;
1138 int rc, i, irq_cnt_init, irq;
1139
1140 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1141 irq = gaudi_pci_irq_vector(hdev, i, false);
1142 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1143 &hdev->completion_queue[i]);
1144 if (rc) {
1145 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1146 goto free_irqs;
1147 }
1148 }
1149
1150 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1151 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1152 &hdev->event_queue);
1153 if (rc) {
1154 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1155 goto free_irqs;
1156 }
1157
1158 return 0;
1159
1160 free_irqs:
1161 for (i = 0 ; i < irq_cnt_init ; i++)
1162 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1163 &hdev->completion_queue[i]);
1164 return rc;
1165 }
1166
gaudi_enable_msi(struct hl_device * hdev)1167 static int gaudi_enable_msi(struct hl_device *hdev)
1168 {
1169 struct gaudi_device *gaudi = hdev->asic_specific;
1170 int rc;
1171
1172 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1173 return 0;
1174
1175 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1176 PCI_IRQ_MSI);
1177 if (rc < 0) {
1178 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1179 return rc;
1180 }
1181
1182 if (rc < NUMBER_OF_INTERRUPTS) {
1183 gaudi->multi_msi_mode = false;
1184 rc = gaudi_enable_msi_single(hdev);
1185 } else {
1186 gaudi->multi_msi_mode = true;
1187 rc = gaudi_enable_msi_multi(hdev);
1188 }
1189
1190 if (rc)
1191 goto free_pci_irq_vectors;
1192
1193 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1194
1195 return 0;
1196
1197 free_pci_irq_vectors:
1198 pci_free_irq_vectors(hdev->pdev);
1199 return rc;
1200 }
1201
gaudi_sync_irqs(struct hl_device * hdev)1202 static void gaudi_sync_irqs(struct hl_device *hdev)
1203 {
1204 struct gaudi_device *gaudi = hdev->asic_specific;
1205 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1206
1207 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1208 return;
1209
1210 /* Wait for all pending IRQs to be finished */
1211 if (gaudi->multi_msi_mode) {
1212 for (i = 0 ; i < cq_cnt ; i++)
1213 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1214
1215 synchronize_irq(gaudi_pci_irq_vector(hdev,
1216 GAUDI_EVENT_QUEUE_MSI_IDX,
1217 true));
1218 } else {
1219 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1220 }
1221 }
1222
gaudi_disable_msi(struct hl_device * hdev)1223 static void gaudi_disable_msi(struct hl_device *hdev)
1224 {
1225 struct gaudi_device *gaudi = hdev->asic_specific;
1226 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1227
1228 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1229 return;
1230
1231 gaudi_sync_irqs(hdev);
1232
1233 if (gaudi->multi_msi_mode) {
1234 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1235 true);
1236 free_irq(irq, &hdev->event_queue);
1237
1238 for (i = 0 ; i < cq_cnt ; i++) {
1239 irq = gaudi_pci_irq_vector(hdev, i, false);
1240 free_irq(irq, &hdev->completion_queue[i]);
1241 }
1242 } else {
1243 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1244 }
1245
1246 pci_free_irq_vectors(hdev->pdev);
1247
1248 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1249 }
1250
gaudi_init_scrambler_sram(struct hl_device * hdev)1251 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1252 {
1253 struct gaudi_device *gaudi = hdev->asic_specific;
1254
1255 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1256 return;
1257
1258 if (!hdev->sram_scrambler_enable)
1259 return;
1260
1261 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1262 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1264 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1266 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1268 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1270 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1272 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1274 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1276 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277
1278 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1279 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1281 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1283 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1285 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1287 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1289 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1291 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1293 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294
1295 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1296 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1298 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1300 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1302 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1304 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1306 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1308 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1310 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311
1312 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1313 }
1314
gaudi_init_scrambler_hbm(struct hl_device * hdev)1315 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1316 {
1317 struct gaudi_device *gaudi = hdev->asic_specific;
1318
1319 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1320 return;
1321
1322 if (!hdev->dram_scrambler_enable)
1323 return;
1324
1325 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1326 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1328 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1330 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1332 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1334 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1336 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1338 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1340 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341
1342 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1343 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1345 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1347 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1349 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1351 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1353 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1355 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1357 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358
1359 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1360 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1362 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1364 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1366 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1368 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1370 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1372 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1374 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375
1376 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1377 }
1378
gaudi_init_e2e(struct hl_device * hdev)1379 static void gaudi_init_e2e(struct hl_device *hdev)
1380 {
1381 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1382 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1383 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1384 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1385
1386 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1387 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1388 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1389 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1390
1391 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1392 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1393 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1394 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1395
1396 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1397 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1398 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1399 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1400
1401 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1402 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1403 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1404 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1405
1406 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1407 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1408 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1409 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1410
1411 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1412 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1413 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1414 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1415
1416 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1417 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1418 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1419 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1420
1421 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1422 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1423 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1424 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1425
1426 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1427 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1428 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1429 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1430
1431 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1432 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1433 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1434 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1435
1436 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1437 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1438 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1439 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1440
1441 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1442 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1443 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1444 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1445
1446 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1447 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1448 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1449 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1450
1451 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1452 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1453 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1454 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1455
1456 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1457 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1458 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1459 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1460
1461 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1462 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1463 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1464 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1465
1466 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1467 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1468 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1469 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1470
1471 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1472 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1473 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1474 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1475
1476 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1477 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1478 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1479 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1480
1481 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1482 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1483 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1484 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1485
1486 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1487 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1488 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1489 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1490
1491 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1492 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1493 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1494 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1495
1496 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1497 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1498 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1499 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1500
1501 if (!hdev->dram_scrambler_enable) {
1502 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1503 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1504 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1505 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1506
1507 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1508 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1509 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1510 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1511
1512 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1513 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1514 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1515 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1516
1517 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1518 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1519 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1520 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1521
1522 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1523 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1524 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1525 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1526
1527 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1528 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1529 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1530 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1531
1532 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1533 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1534 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1535 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1536
1537 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1538 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1539 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1540 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1541
1542 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1543 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1544 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1545 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1546
1547 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1548 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1549 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1550 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1551
1552 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1553 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1554 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1555 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1556
1557 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1558 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1559 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1560 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1561
1562 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1563 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1564 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1565 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1566
1567 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1568 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1569 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1570 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1571
1572 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1573 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1574 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1575 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1576
1577 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1578 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1579 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1580 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1581
1582 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1583 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1584 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1585 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1586
1587 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1588 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1589 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1590 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1591
1592 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1593 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1594 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1595 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1596
1597 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1598 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1599 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1600 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1601
1602 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1603 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1604 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1605 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1606
1607 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1608 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1609 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1610 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1611
1612 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1613 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1614 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1615 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1616
1617 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1618 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1619 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1620 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1621 }
1622
1623 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1624 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1625 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1626 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1627
1628 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1629 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1630 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1631 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1632
1633 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1634 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1635 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1636 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1637
1638 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1639 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1640 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1641 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1642
1643 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1644 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1645 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1646 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1647
1648 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1649 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1650 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1651 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1652
1653 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1654 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1655 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1656 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1657
1658 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1659 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1660 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1661 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1662
1663 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1664 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1665 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1666 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1667
1668 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1669 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1670 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1671 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1672
1673 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1674 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1675 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1676 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1677
1678 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1679 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1680 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1681 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1682
1683 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1684 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1685 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1686 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1687
1688 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1689 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1690 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1691 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1692
1693 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1694 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1695 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1696 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1697
1698 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1699 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1700 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1701 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1702
1703 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1704 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1705 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1706 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1707
1708 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1709 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1710 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1711 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1712
1713 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1714 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1715 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1716 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1717
1718 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1719 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1720 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1721 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1722
1723 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1724 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1725 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1726 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1727
1728 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1729 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1730 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1731 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1732
1733 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1734 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1735 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1736 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1737
1738 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1739 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1740 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1741 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1742 }
1743
gaudi_init_hbm_cred(struct hl_device * hdev)1744 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1745 {
1746 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1747
1748 hbm0_wr = 0x33333333;
1749 hbm0_rd = 0x77777777;
1750 hbm1_wr = 0x55555555;
1751 hbm1_rd = 0xDDDDDDDD;
1752
1753 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1754 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1755 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1756 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1757
1758 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1759 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1760 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1761 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1762
1763 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1764 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1765 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1766 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1767
1768 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1769 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1770 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1771 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1772
1773 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1774 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1775 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1776 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1777 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1778 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1779 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1780 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1783 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785
1786 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1787 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1788 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1789 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1790 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1791 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1792 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1793 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1796 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798 }
1799
gaudi_init_golden_registers(struct hl_device * hdev)1800 static void gaudi_init_golden_registers(struct hl_device *hdev)
1801 {
1802 u32 tpc_offset;
1803 int tpc_id, i;
1804
1805 gaudi_init_e2e(hdev);
1806
1807 gaudi_init_hbm_cred(hdev);
1808
1809 hdev->asic_funcs->disable_clock_gating(hdev);
1810
1811 for (tpc_id = 0, tpc_offset = 0;
1812 tpc_id < TPC_NUMBER_OF_ENGINES;
1813 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1814 /* Mask all arithmetic interrupts from TPC */
1815 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1816 /* Set 16 cache lines */
1817 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1818 ICACHE_FETCH_LINE_NUM, 2);
1819 }
1820
1821 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1822 for (i = 0 ; i < 128 ; i += 8)
1823 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1824
1825 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1826 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1827 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1828 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1829 }
1830
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)1831 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1832 int qman_id, dma_addr_t qman_pq_addr)
1833 {
1834 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1835 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1836 u32 q_off, dma_qm_offset;
1837 u32 dma_qm_err_cfg;
1838
1839 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1840
1841 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1842 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1843 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1844 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1845 so_base_en_lo = lower_32_bits(CFG_BASE +
1846 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1847 so_base_en_hi = upper_32_bits(CFG_BASE +
1848 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1849 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1850 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1852 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1853 so_base_ws_lo = lower_32_bits(CFG_BASE +
1854 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855 so_base_ws_hi = upper_32_bits(CFG_BASE +
1856 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1857
1858 q_off = dma_qm_offset + qman_id * 4;
1859
1860 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1861 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1862
1863 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1864 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1865 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1866
1867 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1868 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1869 QMAN_LDMA_SRC_OFFSET);
1870 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1871 QMAN_LDMA_DST_OFFSET);
1872
1873 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1874 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1875 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1876 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1877 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1878 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1879 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1880 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1881
1882 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1883
1884 /* The following configuration is needed only once per QMAN */
1885 if (qman_id == 0) {
1886 /* Configure RAZWI IRQ */
1887 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1888 if (hdev->stop_on_err) {
1889 dma_qm_err_cfg |=
1890 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1891 }
1892
1893 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1894 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1895 lower_32_bits(CFG_BASE +
1896 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1897 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1898 upper_32_bits(CFG_BASE +
1899 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1900 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1901 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1902 dma_id);
1903
1904 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1905 QM_ARB_ERR_MSG_EN_MASK);
1906
1907 /* Increase ARB WDT to support streams architecture */
1908 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1909 GAUDI_ARB_WDT_TIMEOUT);
1910
1911 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1912 QMAN_EXTERNAL_MAKE_TRUSTED);
1913
1914 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1915 }
1916 }
1917
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)1918 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1919 {
1920 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1921 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1922
1923 /* Set to maximum possible according to physical size */
1924 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1925 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1926
1927 /* WA for H/W bug H3-2116 */
1928 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1929
1930 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1931 if (hdev->stop_on_err)
1932 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1933
1934 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1935 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1936 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1937 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1938 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1939 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1940 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1941 WREG32(mmDMA0_CORE_PROT + dma_offset,
1942 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1943 /* If the channel is secured, it should be in MMU bypass mode */
1944 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1945 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1946 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1947 }
1948
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)1949 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1950 u32 enable_mask)
1951 {
1952 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1953
1954 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1955 }
1956
gaudi_init_pci_dma_qmans(struct hl_device * hdev)1957 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1958 {
1959 struct gaudi_device *gaudi = hdev->asic_specific;
1960 struct hl_hw_queue *q;
1961 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1962
1963 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1964 return;
1965
1966 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1967 dma_id = gaudi_dma_assignment[i];
1968 /*
1969 * For queues after the CPU Q need to add 1 to get the correct
1970 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1971 * order to get the correct MSI register.
1972 */
1973 if (dma_id > 1) {
1974 cpu_skip = 1;
1975 nic_skip = NIC_NUMBER_OF_ENGINES;
1976 } else {
1977 cpu_skip = 0;
1978 nic_skip = 0;
1979 }
1980
1981 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1982 q_idx = 4 * dma_id + j + cpu_skip;
1983 q = &hdev->kernel_queues[q_idx];
1984 q->cq_id = cq_id++;
1985 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1986 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1987 q->bus_address);
1988 }
1989
1990 gaudi_init_dma_core(hdev, dma_id);
1991
1992 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1993 }
1994
1995 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1996 }
1997
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)1998 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1999 int qman_id, u64 qman_base_addr)
2000 {
2001 u32 mtr_base_lo, mtr_base_hi;
2002 u32 so_base_lo, so_base_hi;
2003 u32 q_off, dma_qm_offset;
2004 u32 dma_qm_err_cfg;
2005
2006 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2007
2008 mtr_base_lo = lower_32_bits(CFG_BASE +
2009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2010 mtr_base_hi = upper_32_bits(CFG_BASE +
2011 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2012 so_base_lo = lower_32_bits(CFG_BASE +
2013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2014 so_base_hi = upper_32_bits(CFG_BASE +
2015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2016
2017 q_off = dma_qm_offset + qman_id * 4;
2018
2019 if (qman_id < 4) {
2020 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2021 lower_32_bits(qman_base_addr));
2022 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2023 upper_32_bits(qman_base_addr));
2024
2025 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2026 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2027 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2028
2029 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2030 QMAN_CPDMA_SIZE_OFFSET);
2031 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2032 QMAN_CPDMA_SRC_OFFSET);
2033 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2034 QMAN_CPDMA_DST_OFFSET);
2035 } else {
2036 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2037 QMAN_LDMA_SIZE_OFFSET);
2038 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2039 QMAN_LDMA_SRC_OFFSET);
2040 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2041 QMAN_LDMA_DST_OFFSET);
2042
2043 /* Configure RAZWI IRQ */
2044 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2045 if (hdev->stop_on_err) {
2046 dma_qm_err_cfg |=
2047 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2048 }
2049 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2050
2051 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2052 lower_32_bits(CFG_BASE +
2053 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2054 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2055 upper_32_bits(CFG_BASE +
2056 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2057 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2058 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2059 dma_id);
2060
2061 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2062 QM_ARB_ERR_MSG_EN_MASK);
2063
2064 /* Increase ARB WDT to support streams architecture */
2065 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2066 GAUDI_ARB_WDT_TIMEOUT);
2067
2068 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2069 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2070 QMAN_INTERNAL_MAKE_TRUSTED);
2071 }
2072
2073 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2074 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2075 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2076 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2077 }
2078
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2079 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2080 {
2081 struct gaudi_device *gaudi = hdev->asic_specific;
2082 struct gaudi_internal_qman_info *q;
2083 u64 qman_base_addr;
2084 int i, j, dma_id, internal_q_index;
2085
2086 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2087 return;
2088
2089 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2090 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2091
2092 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2093 /*
2094 * Add the CPU queue in order to get the correct queue
2095 * number as all internal queue are placed after it
2096 */
2097 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2098
2099 q = &gaudi->internal_qmans[internal_q_index];
2100 qman_base_addr = (u64) q->pq_dma_addr;
2101 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2102 qman_base_addr);
2103 }
2104
2105 /* Initializing lower CP for HBM DMA QMAN */
2106 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2107
2108 gaudi_init_dma_core(hdev, dma_id);
2109
2110 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2111 }
2112
2113 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2114 }
2115
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2116 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2117 int qman_id, u64 qman_base_addr)
2118 {
2119 u32 mtr_base_lo, mtr_base_hi;
2120 u32 so_base_lo, so_base_hi;
2121 u32 q_off, mme_id;
2122 u32 mme_qm_err_cfg;
2123
2124 mtr_base_lo = lower_32_bits(CFG_BASE +
2125 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2126 mtr_base_hi = upper_32_bits(CFG_BASE +
2127 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2128 so_base_lo = lower_32_bits(CFG_BASE +
2129 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2130 so_base_hi = upper_32_bits(CFG_BASE +
2131 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2132
2133 q_off = mme_offset + qman_id * 4;
2134
2135 if (qman_id < 4) {
2136 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2137 lower_32_bits(qman_base_addr));
2138 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2139 upper_32_bits(qman_base_addr));
2140
2141 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2142 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2143 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2144
2145 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2146 QMAN_CPDMA_SIZE_OFFSET);
2147 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2148 QMAN_CPDMA_SRC_OFFSET);
2149 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2150 QMAN_CPDMA_DST_OFFSET);
2151 } else {
2152 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2153 QMAN_LDMA_SIZE_OFFSET);
2154 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2155 QMAN_LDMA_SRC_OFFSET);
2156 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2157 QMAN_LDMA_DST_OFFSET);
2158
2159 /* Configure RAZWI IRQ */
2160 mme_id = mme_offset /
2161 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2162
2163 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2164 if (hdev->stop_on_err) {
2165 mme_qm_err_cfg |=
2166 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2167 }
2168 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2169 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2170 lower_32_bits(CFG_BASE +
2171 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2172 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2173 upper_32_bits(CFG_BASE +
2174 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2175 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2176 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2177 mme_id);
2178
2179 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2180 QM_ARB_ERR_MSG_EN_MASK);
2181
2182 /* Increase ARB WDT to support streams architecture */
2183 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2184 GAUDI_ARB_WDT_TIMEOUT);
2185
2186 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2187 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2188 QMAN_INTERNAL_MAKE_TRUSTED);
2189 }
2190
2191 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2192 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2193 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2194 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2195 }
2196
gaudi_init_mme_qmans(struct hl_device * hdev)2197 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2198 {
2199 struct gaudi_device *gaudi = hdev->asic_specific;
2200 struct gaudi_internal_qman_info *q;
2201 u64 qman_base_addr;
2202 u32 mme_offset;
2203 int i, internal_q_index;
2204
2205 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2206 return;
2207
2208 /*
2209 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2210 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2211 */
2212
2213 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2214
2215 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2216 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2217 q = &gaudi->internal_qmans[internal_q_index];
2218 qman_base_addr = (u64) q->pq_dma_addr;
2219 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2220 qman_base_addr);
2221 if (i == 3)
2222 mme_offset = 0;
2223 }
2224
2225 /* Initializing lower CP for MME QMANs */
2226 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2227 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2228 gaudi_init_mme_qman(hdev, 0, 4, 0);
2229
2230 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2231 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2232
2233 gaudi->hw_cap_initialized |= HW_CAP_MME;
2234 }
2235
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2236 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2237 int qman_id, u64 qman_base_addr)
2238 {
2239 u32 mtr_base_lo, mtr_base_hi;
2240 u32 so_base_lo, so_base_hi;
2241 u32 q_off, tpc_id;
2242 u32 tpc_qm_err_cfg;
2243
2244 mtr_base_lo = lower_32_bits(CFG_BASE +
2245 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2246 mtr_base_hi = upper_32_bits(CFG_BASE +
2247 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2248 so_base_lo = lower_32_bits(CFG_BASE +
2249 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2250 so_base_hi = upper_32_bits(CFG_BASE +
2251 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2252
2253 q_off = tpc_offset + qman_id * 4;
2254
2255 if (qman_id < 4) {
2256 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2257 lower_32_bits(qman_base_addr));
2258 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2259 upper_32_bits(qman_base_addr));
2260
2261 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2262 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2263 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2264
2265 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2266 QMAN_CPDMA_SIZE_OFFSET);
2267 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2268 QMAN_CPDMA_SRC_OFFSET);
2269 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2270 QMAN_CPDMA_DST_OFFSET);
2271 } else {
2272 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2273 QMAN_LDMA_SIZE_OFFSET);
2274 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2275 QMAN_LDMA_SRC_OFFSET);
2276 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2277 QMAN_LDMA_DST_OFFSET);
2278
2279 /* Configure RAZWI IRQ */
2280 tpc_id = tpc_offset /
2281 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2282
2283 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2284 if (hdev->stop_on_err) {
2285 tpc_qm_err_cfg |=
2286 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2287 }
2288
2289 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2290 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2291 lower_32_bits(CFG_BASE +
2292 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2293 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2294 upper_32_bits(CFG_BASE +
2295 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2296 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2297 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2298 tpc_id);
2299
2300 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2301 QM_ARB_ERR_MSG_EN_MASK);
2302
2303 /* Increase ARB WDT to support streams architecture */
2304 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2305 GAUDI_ARB_WDT_TIMEOUT);
2306
2307 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2308 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2309 QMAN_INTERNAL_MAKE_TRUSTED);
2310 }
2311
2312 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2313 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2314 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2315 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2316 }
2317
gaudi_init_tpc_qmans(struct hl_device * hdev)2318 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2319 {
2320 struct gaudi_device *gaudi = hdev->asic_specific;
2321 struct gaudi_internal_qman_info *q;
2322 u64 qman_base_addr;
2323 u32 so_base_hi, tpc_offset = 0;
2324 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2325 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2326 int i, tpc_id, internal_q_index;
2327
2328 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2329 return;
2330
2331 so_base_hi = upper_32_bits(CFG_BASE +
2332 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2333
2334 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2335 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2336 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2337 tpc_id * QMAN_STREAMS + i;
2338 q = &gaudi->internal_qmans[internal_q_index];
2339 qman_base_addr = (u64) q->pq_dma_addr;
2340 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2341 qman_base_addr);
2342
2343 if (i == 3) {
2344 /* Initializing lower CP for TPC QMAN */
2345 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2346
2347 /* Enable the QMAN and TPC channel */
2348 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2349 QMAN_TPC_ENABLE);
2350 }
2351 }
2352
2353 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2354 so_base_hi);
2355
2356 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2357
2358 gaudi->hw_cap_initialized |=
2359 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2360 }
2361 }
2362
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)2363 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2364 {
2365 struct gaudi_device *gaudi = hdev->asic_specific;
2366
2367 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2368 return;
2369
2370 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2371 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2372 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2373 }
2374
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)2375 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2376 {
2377 struct gaudi_device *gaudi = hdev->asic_specific;
2378
2379 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2380 return;
2381
2382 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2383 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2384 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2385 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2386 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2387 }
2388
gaudi_disable_mme_qmans(struct hl_device * hdev)2389 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2390 {
2391 struct gaudi_device *gaudi = hdev->asic_specific;
2392
2393 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2394 return;
2395
2396 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2397 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2398 }
2399
gaudi_disable_tpc_qmans(struct hl_device * hdev)2400 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2401 {
2402 struct gaudi_device *gaudi = hdev->asic_specific;
2403 u32 tpc_offset = 0;
2404 int tpc_id;
2405
2406 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2407 return;
2408
2409 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2410 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2411 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2412 }
2413 }
2414
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)2415 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2416 {
2417 struct gaudi_device *gaudi = hdev->asic_specific;
2418
2419 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2420 return;
2421
2422 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2423 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2424 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2425 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426 }
2427
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)2428 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2429 {
2430 struct gaudi_device *gaudi = hdev->asic_specific;
2431
2432 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2433 return;
2434
2435 /* Stop CPs of HBM DMA QMANs */
2436
2437 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442 }
2443
gaudi_stop_mme_qmans(struct hl_device * hdev)2444 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2445 {
2446 struct gaudi_device *gaudi = hdev->asic_specific;
2447
2448 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2449 return;
2450
2451 /* Stop CPs of MME QMANs */
2452 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2453 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2454 }
2455
gaudi_stop_tpc_qmans(struct hl_device * hdev)2456 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2457 {
2458 struct gaudi_device *gaudi = hdev->asic_specific;
2459
2460 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2461 return;
2462
2463 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2464 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2465 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2466 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2467 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2468 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2469 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471 }
2472
gaudi_pci_dma_stall(struct hl_device * hdev)2473 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2474 {
2475 struct gaudi_device *gaudi = hdev->asic_specific;
2476
2477 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2478 return;
2479
2480 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2481 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2482 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2483 }
2484
gaudi_hbm_dma_stall(struct hl_device * hdev)2485 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2486 {
2487 struct gaudi_device *gaudi = hdev->asic_specific;
2488
2489 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2490 return;
2491
2492 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2493 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2494 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2495 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2496 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2497 }
2498
gaudi_mme_stall(struct hl_device * hdev)2499 static void gaudi_mme_stall(struct hl_device *hdev)
2500 {
2501 struct gaudi_device *gaudi = hdev->asic_specific;
2502
2503 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2504 return;
2505
2506 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2507 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2508 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2509 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2510 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2511 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2512 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2513 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2514 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2515 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2516 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2517 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2518 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2519 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2520 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2521 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2522 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2523 }
2524
gaudi_tpc_stall(struct hl_device * hdev)2525 static void gaudi_tpc_stall(struct hl_device *hdev)
2526 {
2527 struct gaudi_device *gaudi = hdev->asic_specific;
2528
2529 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2530 return;
2531
2532 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2533 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2534 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2535 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2536 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2537 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2538 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540 }
2541
gaudi_set_clock_gating(struct hl_device * hdev)2542 static void gaudi_set_clock_gating(struct hl_device *hdev)
2543 {
2544 struct gaudi_device *gaudi = hdev->asic_specific;
2545 u32 qman_offset;
2546 bool enable;
2547 int i;
2548
2549 /* In case we are during debug session, don't enable the clock gate
2550 * as it may interfere
2551 */
2552 if (hdev->in_debug)
2553 return;
2554
2555 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2556 enable = !!(hdev->clock_gating_mask &
2557 (BIT_ULL(gaudi_dma_assignment[i])));
2558
2559 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2560 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2561 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2562 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2563 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2564 }
2565
2566 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2567 enable = !!(hdev->clock_gating_mask &
2568 (BIT_ULL(gaudi_dma_assignment[i])));
2569
2570 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2571 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2572 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2573 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2574 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2575 }
2576
2577 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2578 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2580
2581 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2582 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2583 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2584
2585 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2586 enable = !!(hdev->clock_gating_mask &
2587 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2588
2589 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2590 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2591 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2592 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2593
2594 qman_offset += TPC_QMAN_OFFSET;
2595 }
2596
2597 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2598 }
2599
gaudi_disable_clock_gating(struct hl_device * hdev)2600 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2601 {
2602 struct gaudi_device *gaudi = hdev->asic_specific;
2603 u32 qman_offset;
2604 int i;
2605
2606 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2607 return;
2608
2609 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2610 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2611 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2612
2613 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2614 }
2615
2616 WREG32(mmMME0_QM_CGM_CFG, 0);
2617 WREG32(mmMME0_QM_CGM_CFG1, 0);
2618 WREG32(mmMME2_QM_CGM_CFG, 0);
2619 WREG32(mmMME2_QM_CGM_CFG1, 0);
2620
2621 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2622 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2623 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2624
2625 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2626 }
2627
2628 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2629 }
2630
gaudi_enable_timestamp(struct hl_device * hdev)2631 static void gaudi_enable_timestamp(struct hl_device *hdev)
2632 {
2633 /* Disable the timestamp counter */
2634 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2635
2636 /* Zero the lower/upper parts of the 64-bit counter */
2637 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2638 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2639
2640 /* Enable the counter */
2641 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2642 }
2643
gaudi_disable_timestamp(struct hl_device * hdev)2644 static void gaudi_disable_timestamp(struct hl_device *hdev)
2645 {
2646 /* Disable the timestamp counter */
2647 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2648 }
2649
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset)2650 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2651 {
2652 u32 wait_timeout_ms;
2653
2654 dev_info(hdev->dev,
2655 "Halting compute engines and disabling interrupts\n");
2656
2657 if (hdev->pldm)
2658 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2659 else
2660 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2661
2662
2663 gaudi_stop_mme_qmans(hdev);
2664 gaudi_stop_tpc_qmans(hdev);
2665 gaudi_stop_hbm_dma_qmans(hdev);
2666 gaudi_stop_pci_dma_qmans(hdev);
2667
2668 hdev->asic_funcs->disable_clock_gating(hdev);
2669
2670 msleep(wait_timeout_ms);
2671
2672 gaudi_pci_dma_stall(hdev);
2673 gaudi_hbm_dma_stall(hdev);
2674 gaudi_tpc_stall(hdev);
2675 gaudi_mme_stall(hdev);
2676
2677 msleep(wait_timeout_ms);
2678
2679 gaudi_disable_mme_qmans(hdev);
2680 gaudi_disable_tpc_qmans(hdev);
2681 gaudi_disable_hbm_dma_qmans(hdev);
2682 gaudi_disable_pci_dma_qmans(hdev);
2683
2684 gaudi_disable_timestamp(hdev);
2685
2686 gaudi_disable_msi(hdev);
2687 }
2688
gaudi_mmu_init(struct hl_device * hdev)2689 static int gaudi_mmu_init(struct hl_device *hdev)
2690 {
2691 struct asic_fixed_properties *prop = &hdev->asic_prop;
2692 struct gaudi_device *gaudi = hdev->asic_specific;
2693 u64 hop0_addr;
2694 int rc, i;
2695
2696 if (!hdev->mmu_enable)
2697 return 0;
2698
2699 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2700 return 0;
2701
2702 hdev->dram_supports_virtual_memory = false;
2703
2704 for (i = 0 ; i < prop->max_asid ; i++) {
2705 hop0_addr = prop->mmu_pgt_addr +
2706 (i * prop->mmu_hop_table_size);
2707
2708 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2709 if (rc) {
2710 dev_err(hdev->dev,
2711 "failed to set hop0 addr for asid %d\n", i);
2712 goto err;
2713 }
2714 }
2715
2716 /* init MMU cache manage page */
2717 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2718 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2719
2720 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2721
2722 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2723 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2724
2725 WREG32(mmSTLB_HOP_CONFIGURATION,
2726 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2727
2728 /*
2729 * The H/W expects the first PI after init to be 1. After wraparound
2730 * we'll write 0.
2731 */
2732 gaudi->mmu_cache_inv_pi = 1;
2733
2734 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2735
2736 return 0;
2737
2738 err:
2739 return rc;
2740 }
2741
gaudi_load_firmware_to_device(struct hl_device * hdev)2742 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2743 {
2744 void __iomem *dst;
2745
2746 /* HBM scrambler must be initialized before pushing F/W to HBM */
2747 gaudi_init_scrambler_hbm(hdev);
2748
2749 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2750
2751 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2752 }
2753
gaudi_load_boot_fit_to_device(struct hl_device * hdev)2754 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2755 {
2756 void __iomem *dst;
2757
2758 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2759
2760 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2761 }
2762
gaudi_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)2763 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2764 enum hl_fw_component fwc)
2765 {
2766 const char *name;
2767 u32 ver_off;
2768 char *dest;
2769
2770 switch (fwc) {
2771 case FW_COMP_UBOOT:
2772 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2773 dest = hdev->asic_prop.uboot_ver;
2774 name = "U-Boot";
2775 break;
2776 case FW_COMP_PREBOOT:
2777 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2778 dest = hdev->asic_prop.preboot_ver;
2779 name = "Preboot";
2780 break;
2781 default:
2782 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2783 return;
2784 }
2785
2786 ver_off &= ~((u32)SRAM_BASE_ADDR);
2787
2788 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2789 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2790 VERSION_MAX_LEN);
2791 } else {
2792 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2793 name, ver_off);
2794 strcpy(dest, "unavailable");
2795 }
2796 }
2797
gaudi_init_cpu(struct hl_device * hdev)2798 static int gaudi_init_cpu(struct hl_device *hdev)
2799 {
2800 struct gaudi_device *gaudi = hdev->asic_specific;
2801 int rc;
2802
2803 if (!hdev->cpu_enable)
2804 return 0;
2805
2806 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2807 return 0;
2808
2809 /*
2810 * The device CPU works with 40 bits addresses.
2811 * This register sets the extension to 50 bits.
2812 */
2813 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2814
2815 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2816 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2817 mmCPU_CMD_STATUS_TO_HOST,
2818 mmCPU_BOOT_ERR0,
2819 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2820 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2821
2822 if (rc)
2823 return rc;
2824
2825 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2826
2827 return 0;
2828 }
2829
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)2830 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2831 {
2832 struct gaudi_device *gaudi = hdev->asic_specific;
2833 struct hl_eq *eq;
2834 u32 status;
2835 struct hl_hw_queue *cpu_pq =
2836 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2837 int err;
2838
2839 if (!hdev->cpu_queues_enable)
2840 return 0;
2841
2842 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2843 return 0;
2844
2845 eq = &hdev->event_queue;
2846
2847 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2848 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2849
2850 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2851 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2852
2853 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2854 lower_32_bits(hdev->cpu_accessible_dma_address));
2855 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2856 upper_32_bits(hdev->cpu_accessible_dma_address));
2857
2858 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2859 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2860 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2861
2862 /* Used for EQ CI */
2863 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2864
2865 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2866
2867 if (gaudi->multi_msi_mode)
2868 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2869 else
2870 WREG32(mmCPU_IF_QUEUE_INIT,
2871 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2872
2873 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2874
2875 err = hl_poll_timeout(
2876 hdev,
2877 mmCPU_IF_QUEUE_INIT,
2878 status,
2879 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2880 1000,
2881 cpu_timeout);
2882
2883 if (err) {
2884 dev_err(hdev->dev,
2885 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
2886 return -EIO;
2887 }
2888
2889 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2890 return 0;
2891 }
2892
gaudi_pre_hw_init(struct hl_device * hdev)2893 static void gaudi_pre_hw_init(struct hl_device *hdev)
2894 {
2895 /* Perform read from the device to make sure device is up */
2896 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2897
2898 /* Set the access through PCI bars (Linux driver only) as
2899 * secured
2900 */
2901 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2902 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2903 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2904
2905 /* Perform read to flush the waiting writes to ensure
2906 * configuration was set in the device
2907 */
2908 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2909
2910 /*
2911 * Let's mark in the H/W that we have reached this point. We check
2912 * this value in the reset_before_init function to understand whether
2913 * we need to reset the chip before doing H/W init. This register is
2914 * cleared by the H/W upon H/W reset
2915 */
2916 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2917
2918 /* Configure the reset registers. Must be done as early as possible
2919 * in case we fail during H/W initialization
2920 */
2921 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2922 (CFG_RST_H_DMA_MASK |
2923 CFG_RST_H_MME_MASK |
2924 CFG_RST_H_SM_MASK |
2925 CFG_RST_H_TPC_7_MASK));
2926
2927 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2928
2929 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2930 (CFG_RST_H_HBM_MASK |
2931 CFG_RST_H_TPC_7_MASK |
2932 CFG_RST_H_NIC_MASK |
2933 CFG_RST_H_SM_MASK |
2934 CFG_RST_H_DMA_MASK |
2935 CFG_RST_H_MME_MASK |
2936 CFG_RST_H_CPU_MASK |
2937 CFG_RST_H_MMU_MASK));
2938
2939 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2940 (CFG_RST_L_IF_MASK |
2941 CFG_RST_L_PSOC_MASK |
2942 CFG_RST_L_TPC_MASK));
2943 }
2944
gaudi_hw_init(struct hl_device * hdev)2945 static int gaudi_hw_init(struct hl_device *hdev)
2946 {
2947 int rc;
2948
2949 dev_info(hdev->dev, "Starting initialization of H/W\n");
2950
2951 gaudi_pre_hw_init(hdev);
2952
2953 gaudi_init_pci_dma_qmans(hdev);
2954
2955 gaudi_init_hbm_dma_qmans(hdev);
2956
2957 rc = gaudi_init_cpu(hdev);
2958 if (rc) {
2959 dev_err(hdev->dev, "failed to initialize CPU\n");
2960 return rc;
2961 }
2962
2963 /* SRAM scrambler must be initialized after CPU is running from HBM */
2964 gaudi_init_scrambler_sram(hdev);
2965
2966 /* This is here just in case we are working without CPU */
2967 gaudi_init_scrambler_hbm(hdev);
2968
2969 gaudi_init_golden_registers(hdev);
2970
2971 rc = gaudi_mmu_init(hdev);
2972 if (rc)
2973 return rc;
2974
2975 gaudi_init_security(hdev);
2976
2977 gaudi_init_mme_qmans(hdev);
2978
2979 gaudi_init_tpc_qmans(hdev);
2980
2981 hdev->asic_funcs->set_clock_gating(hdev);
2982
2983 gaudi_enable_timestamp(hdev);
2984
2985 /* MSI must be enabled before CPU queues are initialized */
2986 rc = gaudi_enable_msi(hdev);
2987 if (rc)
2988 goto disable_queues;
2989
2990 /* must be called after MSI was enabled */
2991 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2992 if (rc) {
2993 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2994 rc);
2995 goto disable_msi;
2996 }
2997
2998 /* Perform read from the device to flush all configuration */
2999 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3000
3001 return 0;
3002
3003 disable_msi:
3004 gaudi_disable_msi(hdev);
3005 disable_queues:
3006 gaudi_disable_mme_qmans(hdev);
3007 gaudi_disable_pci_dma_qmans(hdev);
3008
3009 return rc;
3010 }
3011
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset)3012 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3013 {
3014 struct gaudi_device *gaudi = hdev->asic_specific;
3015 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3016
3017 if (!hard_reset) {
3018 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3019 return;
3020 }
3021
3022 if (hdev->pldm) {
3023 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3024 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3025 } else {
3026 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3027 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3028 }
3029
3030 /* Set device to handle FLR by H/W as we will put the device CPU to
3031 * halt mode
3032 */
3033 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3034 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3035
3036 /* I don't know what is the state of the CPU so make sure it is
3037 * stopped in any means necessary
3038 */
3039 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3040 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3041
3042 msleep(cpu_timeout_ms);
3043
3044 /* Tell ASIC not to re-initialize PCIe */
3045 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3046
3047 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3048
3049 /* H/W bug WA:
3050 * rdata[31:0] = strap_read_val;
3051 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3052 */
3053 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3054 (boot_strap & 0x001FFFFF));
3055 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3056
3057 /* Restart BTL/BLR upon hard-reset */
3058 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3059
3060 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3061 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3062 dev_info(hdev->dev,
3063 "Issued HARD reset command, going to wait %dms\n",
3064 reset_timeout_ms);
3065
3066 /*
3067 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3068 * itself is in reset. Need to wait until the reset is deasserted
3069 */
3070 msleep(reset_timeout_ms);
3071
3072 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3073 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3074 dev_err(hdev->dev,
3075 "Timeout while waiting for device to reset 0x%x\n",
3076 status);
3077
3078 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3079
3080 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3081 HW_CAP_HBM | HW_CAP_PCI_DMA |
3082 HW_CAP_MME | HW_CAP_TPC_MASK |
3083 HW_CAP_HBM_DMA | HW_CAP_PLL |
3084 HW_CAP_MMU |
3085 HW_CAP_SRAM_SCRAMBLER |
3086 HW_CAP_HBM_SCRAMBLER |
3087 HW_CAP_CLK_GATE);
3088
3089 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3090 }
3091
gaudi_suspend(struct hl_device * hdev)3092 static int gaudi_suspend(struct hl_device *hdev)
3093 {
3094 int rc;
3095
3096 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3097 if (rc)
3098 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3099
3100 return rc;
3101 }
3102
gaudi_resume(struct hl_device * hdev)3103 static int gaudi_resume(struct hl_device *hdev)
3104 {
3105 return gaudi_init_iatu(hdev);
3106 }
3107
gaudi_cb_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)3108 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3109 void *cpu_addr, dma_addr_t dma_addr, size_t size)
3110 {
3111 int rc;
3112
3113 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3114 VM_DONTCOPY | VM_NORESERVE;
3115
3116 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
3117 if (rc)
3118 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3119
3120 return rc;
3121 }
3122
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)3123 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3124 {
3125 struct gaudi_device *gaudi = hdev->asic_specific;
3126 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3127 int dma_id;
3128 bool invalid_queue = false;
3129
3130 switch (hw_queue_id) {
3131 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3132 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3133 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3135 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136 break;
3137
3138 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3139 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 break;
3144
3145 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3146 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3153 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 break;
3158
3159 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 break;
3165
3166 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3167 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 break;
3172
3173 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3174 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3175 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3181 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3182 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185 break;
3186
3187 case GAUDI_QUEUE_ID_CPU_PQ:
3188 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3189 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3190 else
3191 invalid_queue = true;
3192 break;
3193
3194 case GAUDI_QUEUE_ID_MME_0_0:
3195 db_reg_offset = mmMME2_QM_PQ_PI_0;
3196 break;
3197
3198 case GAUDI_QUEUE_ID_MME_0_1:
3199 db_reg_offset = mmMME2_QM_PQ_PI_1;
3200 break;
3201
3202 case GAUDI_QUEUE_ID_MME_0_2:
3203 db_reg_offset = mmMME2_QM_PQ_PI_2;
3204 break;
3205
3206 case GAUDI_QUEUE_ID_MME_0_3:
3207 db_reg_offset = mmMME2_QM_PQ_PI_3;
3208 break;
3209
3210 case GAUDI_QUEUE_ID_MME_1_0:
3211 db_reg_offset = mmMME0_QM_PQ_PI_0;
3212 break;
3213
3214 case GAUDI_QUEUE_ID_MME_1_1:
3215 db_reg_offset = mmMME0_QM_PQ_PI_1;
3216 break;
3217
3218 case GAUDI_QUEUE_ID_MME_1_2:
3219 db_reg_offset = mmMME0_QM_PQ_PI_2;
3220 break;
3221
3222 case GAUDI_QUEUE_ID_MME_1_3:
3223 db_reg_offset = mmMME0_QM_PQ_PI_3;
3224 break;
3225
3226 case GAUDI_QUEUE_ID_TPC_0_0:
3227 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3228 break;
3229
3230 case GAUDI_QUEUE_ID_TPC_0_1:
3231 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3232 break;
3233
3234 case GAUDI_QUEUE_ID_TPC_0_2:
3235 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3236 break;
3237
3238 case GAUDI_QUEUE_ID_TPC_0_3:
3239 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3240 break;
3241
3242 case GAUDI_QUEUE_ID_TPC_1_0:
3243 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3244 break;
3245
3246 case GAUDI_QUEUE_ID_TPC_1_1:
3247 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3248 break;
3249
3250 case GAUDI_QUEUE_ID_TPC_1_2:
3251 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3252 break;
3253
3254 case GAUDI_QUEUE_ID_TPC_1_3:
3255 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3256 break;
3257
3258 case GAUDI_QUEUE_ID_TPC_2_0:
3259 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3260 break;
3261
3262 case GAUDI_QUEUE_ID_TPC_2_1:
3263 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3264 break;
3265
3266 case GAUDI_QUEUE_ID_TPC_2_2:
3267 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3268 break;
3269
3270 case GAUDI_QUEUE_ID_TPC_2_3:
3271 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3272 break;
3273
3274 case GAUDI_QUEUE_ID_TPC_3_0:
3275 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3276 break;
3277
3278 case GAUDI_QUEUE_ID_TPC_3_1:
3279 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3280 break;
3281
3282 case GAUDI_QUEUE_ID_TPC_3_2:
3283 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3284 break;
3285
3286 case GAUDI_QUEUE_ID_TPC_3_3:
3287 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3288 break;
3289
3290 case GAUDI_QUEUE_ID_TPC_4_0:
3291 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3292 break;
3293
3294 case GAUDI_QUEUE_ID_TPC_4_1:
3295 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3296 break;
3297
3298 case GAUDI_QUEUE_ID_TPC_4_2:
3299 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3300 break;
3301
3302 case GAUDI_QUEUE_ID_TPC_4_3:
3303 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3304 break;
3305
3306 case GAUDI_QUEUE_ID_TPC_5_0:
3307 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3308 break;
3309
3310 case GAUDI_QUEUE_ID_TPC_5_1:
3311 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3312 break;
3313
3314 case GAUDI_QUEUE_ID_TPC_5_2:
3315 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3316 break;
3317
3318 case GAUDI_QUEUE_ID_TPC_5_3:
3319 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3320 break;
3321
3322 case GAUDI_QUEUE_ID_TPC_6_0:
3323 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3324 break;
3325
3326 case GAUDI_QUEUE_ID_TPC_6_1:
3327 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3328 break;
3329
3330 case GAUDI_QUEUE_ID_TPC_6_2:
3331 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3332 break;
3333
3334 case GAUDI_QUEUE_ID_TPC_6_3:
3335 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3336 break;
3337
3338 case GAUDI_QUEUE_ID_TPC_7_0:
3339 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3340 break;
3341
3342 case GAUDI_QUEUE_ID_TPC_7_1:
3343 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3344 break;
3345
3346 case GAUDI_QUEUE_ID_TPC_7_2:
3347 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3348 break;
3349
3350 case GAUDI_QUEUE_ID_TPC_7_3:
3351 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3352 break;
3353
3354 default:
3355 invalid_queue = true;
3356 }
3357
3358 if (invalid_queue) {
3359 /* Should never get here */
3360 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3361 hw_queue_id);
3362 return;
3363 }
3364
3365 db_value = pi;
3366
3367 /* ring the doorbell */
3368 WREG32(db_reg_offset, db_value);
3369
3370 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3371 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3372 GAUDI_EVENT_PI_UPDATE);
3373 }
3374
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)3375 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3376 struct hl_bd *bd)
3377 {
3378 __le64 *pbd = (__le64 *) bd;
3379
3380 /* The QMANs are on the host memory so a simple copy suffice */
3381 pqe[0] = pbd[0];
3382 pqe[1] = pbd[1];
3383 }
3384
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)3385 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3386 dma_addr_t *dma_handle, gfp_t flags)
3387 {
3388 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3389 dma_handle, flags);
3390
3391 /* Shift to the device's base physical address of host memory */
3392 if (kernel_addr)
3393 *dma_handle += HOST_PHYS_BASE;
3394
3395 return kernel_addr;
3396 }
3397
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)3398 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3399 void *cpu_addr, dma_addr_t dma_handle)
3400 {
3401 /* Cancel the device's base physical address of host memory */
3402 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3403
3404 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3405 }
3406
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)3407 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3408 u32 queue_id, dma_addr_t *dma_handle,
3409 u16 *queue_len)
3410 {
3411 struct gaudi_device *gaudi = hdev->asic_specific;
3412 struct gaudi_internal_qman_info *q;
3413
3414 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3415 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3416 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3417 return NULL;
3418 }
3419
3420 q = &gaudi->internal_qmans[queue_id];
3421 *dma_handle = q->pq_dma_addr;
3422 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3423
3424 return q->pq_kernel_addr;
3425 }
3426
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,long * result)3427 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3428 u16 len, u32 timeout, long *result)
3429 {
3430 struct gaudi_device *gaudi = hdev->asic_specific;
3431
3432 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3433 if (result)
3434 *result = 0;
3435 return 0;
3436 }
3437
3438 if (!timeout)
3439 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3440
3441 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3442 timeout, result);
3443 }
3444
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)3445 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3446 {
3447 struct packet_msg_prot *fence_pkt;
3448 dma_addr_t pkt_dma_addr;
3449 u32 fence_val, tmp, timeout_usec;
3450 dma_addr_t fence_dma_addr;
3451 u32 *fence_ptr;
3452 int rc;
3453
3454 if (hdev->pldm)
3455 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3456 else
3457 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3458
3459 fence_val = GAUDI_QMAN0_FENCE_VAL;
3460
3461 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3462 &fence_dma_addr);
3463 if (!fence_ptr) {
3464 dev_err(hdev->dev,
3465 "Failed to allocate memory for H/W queue %d testing\n",
3466 hw_queue_id);
3467 return -ENOMEM;
3468 }
3469
3470 *fence_ptr = 0;
3471
3472 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3473 sizeof(struct packet_msg_prot),
3474 GFP_KERNEL, &pkt_dma_addr);
3475 if (!fence_pkt) {
3476 dev_err(hdev->dev,
3477 "Failed to allocate packet for H/W queue %d testing\n",
3478 hw_queue_id);
3479 rc = -ENOMEM;
3480 goto free_fence_ptr;
3481 }
3482
3483 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3484 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3485 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3486
3487 fence_pkt->ctl = cpu_to_le32(tmp);
3488 fence_pkt->value = cpu_to_le32(fence_val);
3489 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3490
3491 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3492 sizeof(struct packet_msg_prot),
3493 pkt_dma_addr);
3494 if (rc) {
3495 dev_err(hdev->dev,
3496 "Failed to send fence packet to H/W queue %d\n",
3497 hw_queue_id);
3498 goto free_pkt;
3499 }
3500
3501 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3502 1000, timeout_usec, true);
3503
3504 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3505
3506 if (rc == -ETIMEDOUT) {
3507 dev_err(hdev->dev,
3508 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3509 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3510 rc = -EIO;
3511 }
3512
3513 free_pkt:
3514 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3515 pkt_dma_addr);
3516 free_fence_ptr:
3517 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3518 fence_dma_addr);
3519 return rc;
3520 }
3521
gaudi_test_cpu_queue(struct hl_device * hdev)3522 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3523 {
3524 struct gaudi_device *gaudi = hdev->asic_specific;
3525
3526 /*
3527 * check capability here as send_cpu_message() won't update the result
3528 * value if no capability
3529 */
3530 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3531 return 0;
3532
3533 return hl_fw_test_cpu_queue(hdev);
3534 }
3535
gaudi_test_queues(struct hl_device * hdev)3536 static int gaudi_test_queues(struct hl_device *hdev)
3537 {
3538 int i, rc, ret_val = 0;
3539
3540 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3541 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3542 rc = gaudi_test_queue(hdev, i);
3543 if (rc)
3544 ret_val = -EINVAL;
3545 }
3546 }
3547
3548 rc = gaudi_test_cpu_queue(hdev);
3549 if (rc)
3550 ret_val = -EINVAL;
3551
3552 return ret_val;
3553 }
3554
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)3555 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3556 gfp_t mem_flags, dma_addr_t *dma_handle)
3557 {
3558 void *kernel_addr;
3559
3560 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3561 return NULL;
3562
3563 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3564
3565 /* Shift to the device's base physical address of host memory */
3566 if (kernel_addr)
3567 *dma_handle += HOST_PHYS_BASE;
3568
3569 return kernel_addr;
3570 }
3571
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)3572 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3573 dma_addr_t dma_addr)
3574 {
3575 /* Cancel the device's base physical address of host memory */
3576 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3577
3578 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3579 }
3580
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)3581 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3582 size_t size, dma_addr_t *dma_handle)
3583 {
3584 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3585 }
3586
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)3587 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3588 size_t size, void *vaddr)
3589 {
3590 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3591 }
3592
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3593 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3594 int nents, enum dma_data_direction dir)
3595 {
3596 struct scatterlist *sg;
3597 int i;
3598
3599 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3600 return -ENOMEM;
3601
3602 /* Shift to the device's base physical address of host memory */
3603 for_each_sg(sgl, sg, nents, i)
3604 sg->dma_address += HOST_PHYS_BASE;
3605
3606 return 0;
3607 }
3608
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3609 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3610 int nents, enum dma_data_direction dir)
3611 {
3612 struct scatterlist *sg;
3613 int i;
3614
3615 /* Cancel the device's base physical address of host memory */
3616 for_each_sg(sgl, sg, nents, i)
3617 sg->dma_address -= HOST_PHYS_BASE;
3618
3619 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3620 }
3621
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)3622 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3623 struct sg_table *sgt)
3624 {
3625 struct scatterlist *sg, *sg_next_iter;
3626 u32 count, dma_desc_cnt;
3627 u64 len, len_next;
3628 dma_addr_t addr, addr_next;
3629
3630 dma_desc_cnt = 0;
3631
3632 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3633
3634 len = sg_dma_len(sg);
3635 addr = sg_dma_address(sg);
3636
3637 if (len == 0)
3638 break;
3639
3640 while ((count + 1) < sgt->nents) {
3641 sg_next_iter = sg_next(sg);
3642 len_next = sg_dma_len(sg_next_iter);
3643 addr_next = sg_dma_address(sg_next_iter);
3644
3645 if (len_next == 0)
3646 break;
3647
3648 if ((addr + len == addr_next) &&
3649 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3650 len += len_next;
3651 count++;
3652 sg = sg_next_iter;
3653 } else {
3654 break;
3655 }
3656 }
3657
3658 dma_desc_cnt++;
3659 }
3660
3661 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3662 }
3663
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)3664 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3665 struct hl_cs_parser *parser,
3666 struct packet_lin_dma *user_dma_pkt,
3667 u64 addr, enum dma_data_direction dir)
3668 {
3669 struct hl_userptr *userptr;
3670 int rc;
3671
3672 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3673 parser->job_userptr_list, &userptr))
3674 goto already_pinned;
3675
3676 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3677 if (!userptr)
3678 return -ENOMEM;
3679
3680 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3681 userptr);
3682 if (rc)
3683 goto free_userptr;
3684
3685 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3686
3687 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3688 userptr->sgt->nents, dir);
3689 if (rc) {
3690 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3691 goto unpin_memory;
3692 }
3693
3694 userptr->dma_mapped = true;
3695 userptr->dir = dir;
3696
3697 already_pinned:
3698 parser->patched_cb_size +=
3699 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3700
3701 return 0;
3702
3703 unpin_memory:
3704 hl_unpin_host_memory(hdev, userptr);
3705 free_userptr:
3706 kfree(userptr);
3707 return rc;
3708 }
3709
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)3710 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3711 struct hl_cs_parser *parser,
3712 struct packet_lin_dma *user_dma_pkt,
3713 bool src_in_host)
3714 {
3715 enum dma_data_direction dir;
3716 bool skip_host_mem_pin = false, user_memset;
3717 u64 addr;
3718 int rc = 0;
3719
3720 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3721 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3722 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3723
3724 if (src_in_host) {
3725 if (user_memset)
3726 skip_host_mem_pin = true;
3727
3728 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3729 dir = DMA_TO_DEVICE;
3730 addr = le64_to_cpu(user_dma_pkt->src_addr);
3731 } else {
3732 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3733 dir = DMA_FROM_DEVICE;
3734 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3735 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3736 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3737 }
3738
3739 if (skip_host_mem_pin)
3740 parser->patched_cb_size += sizeof(*user_dma_pkt);
3741 else
3742 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3743 addr, dir);
3744
3745 return rc;
3746 }
3747
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3748 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3749 struct hl_cs_parser *parser,
3750 struct packet_lin_dma *user_dma_pkt)
3751 {
3752 bool src_in_host = false;
3753 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3754 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3755 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3756
3757 dev_dbg(hdev->dev, "DMA packet details:\n");
3758 dev_dbg(hdev->dev, "source == 0x%llx\n",
3759 le64_to_cpu(user_dma_pkt->src_addr));
3760 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3761 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3762
3763 /*
3764 * Special handling for DMA with size 0. Bypass all validations
3765 * because no transactions will be done except for WR_COMP, which
3766 * is not a security issue
3767 */
3768 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3769 parser->patched_cb_size += sizeof(*user_dma_pkt);
3770 return 0;
3771 }
3772
3773 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3774 src_in_host = true;
3775
3776 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3777 src_in_host);
3778 }
3779
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)3780 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3781 struct hl_cs_parser *parser,
3782 struct packet_load_and_exe *user_pkt)
3783 {
3784 u32 cfg;
3785
3786 cfg = le32_to_cpu(user_pkt->cfg);
3787
3788 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3789 dev_err(hdev->dev,
3790 "User not allowed to use Load and Execute\n");
3791 return -EPERM;
3792 }
3793
3794 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3795
3796 return 0;
3797 }
3798
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)3799 static int gaudi_validate_cb(struct hl_device *hdev,
3800 struct hl_cs_parser *parser, bool is_mmu)
3801 {
3802 u32 cb_parsed_length = 0;
3803 int rc = 0;
3804
3805 parser->patched_cb_size = 0;
3806
3807 /* cb_user_size is more than 0 so loop will always be executed */
3808 while (cb_parsed_length < parser->user_cb_size) {
3809 enum packet_id pkt_id;
3810 u16 pkt_size;
3811 struct gaudi_packet *user_pkt;
3812
3813 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3814
3815 pkt_id = (enum packet_id) (
3816 (le64_to_cpu(user_pkt->header) &
3817 PACKET_HEADER_PACKET_ID_MASK) >>
3818 PACKET_HEADER_PACKET_ID_SHIFT);
3819
3820 if (!validate_packet_id(pkt_id)) {
3821 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3822 rc = -EINVAL;
3823 break;
3824 }
3825
3826 pkt_size = gaudi_packet_sizes[pkt_id];
3827 cb_parsed_length += pkt_size;
3828 if (cb_parsed_length > parser->user_cb_size) {
3829 dev_err(hdev->dev,
3830 "packet 0x%x is out of CB boundary\n", pkt_id);
3831 rc = -EINVAL;
3832 break;
3833 }
3834
3835 switch (pkt_id) {
3836 case PACKET_MSG_PROT:
3837 dev_err(hdev->dev,
3838 "User not allowed to use MSG_PROT\n");
3839 rc = -EPERM;
3840 break;
3841
3842 case PACKET_CP_DMA:
3843 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3844 rc = -EPERM;
3845 break;
3846
3847 case PACKET_STOP:
3848 dev_err(hdev->dev, "User not allowed to use STOP\n");
3849 rc = -EPERM;
3850 break;
3851
3852 case PACKET_WREG_BULK:
3853 dev_err(hdev->dev,
3854 "User not allowed to use WREG_BULK\n");
3855 rc = -EPERM;
3856 break;
3857
3858 case PACKET_LOAD_AND_EXE:
3859 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3860 (struct packet_load_and_exe *) user_pkt);
3861 break;
3862
3863 case PACKET_LIN_DMA:
3864 parser->contains_dma_pkt = true;
3865 if (is_mmu)
3866 parser->patched_cb_size += pkt_size;
3867 else
3868 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3869 (struct packet_lin_dma *) user_pkt);
3870 break;
3871
3872 case PACKET_WREG_32:
3873 case PACKET_MSG_LONG:
3874 case PACKET_MSG_SHORT:
3875 case PACKET_REPEAT:
3876 case PACKET_FENCE:
3877 case PACKET_NOP:
3878 case PACKET_ARB_POINT:
3879 parser->patched_cb_size += pkt_size;
3880 break;
3881
3882 default:
3883 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3884 pkt_id);
3885 rc = -EINVAL;
3886 break;
3887 }
3888
3889 if (rc)
3890 break;
3891 }
3892
3893 /*
3894 * The new CB should have space at the end for two MSG_PROT packets:
3895 * 1. A packet that will act as a completion packet
3896 * 2. A packet that will generate MSI-X interrupt
3897 */
3898 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3899
3900 return rc;
3901 }
3902
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)3903 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3904 struct hl_cs_parser *parser,
3905 struct packet_lin_dma *user_dma_pkt,
3906 struct packet_lin_dma *new_dma_pkt,
3907 u32 *new_dma_pkt_size)
3908 {
3909 struct hl_userptr *userptr;
3910 struct scatterlist *sg, *sg_next_iter;
3911 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3912 u64 len, len_next;
3913 dma_addr_t dma_addr, dma_addr_next;
3914 u64 device_memory_addr, addr;
3915 enum dma_data_direction dir;
3916 struct sg_table *sgt;
3917 bool src_in_host = false;
3918 bool skip_host_mem_pin = false;
3919 bool user_memset;
3920
3921 ctl = le32_to_cpu(user_dma_pkt->ctl);
3922
3923 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3924 src_in_host = true;
3925
3926 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3927 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3928
3929 if (src_in_host) {
3930 addr = le64_to_cpu(user_dma_pkt->src_addr);
3931 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3932 dir = DMA_TO_DEVICE;
3933 if (user_memset)
3934 skip_host_mem_pin = true;
3935 } else {
3936 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3937 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3938 dir = DMA_FROM_DEVICE;
3939 }
3940
3941 if ((!skip_host_mem_pin) &&
3942 (!hl_userptr_is_pinned(hdev, addr,
3943 le32_to_cpu(user_dma_pkt->tsize),
3944 parser->job_userptr_list, &userptr))) {
3945 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3946 addr, user_dma_pkt->tsize);
3947 return -EFAULT;
3948 }
3949
3950 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3951 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3952 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3953 return 0;
3954 }
3955
3956 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3957
3958 sgt = userptr->sgt;
3959 dma_desc_cnt = 0;
3960
3961 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3962 len = sg_dma_len(sg);
3963 dma_addr = sg_dma_address(sg);
3964
3965 if (len == 0)
3966 break;
3967
3968 while ((count + 1) < sgt->nents) {
3969 sg_next_iter = sg_next(sg);
3970 len_next = sg_dma_len(sg_next_iter);
3971 dma_addr_next = sg_dma_address(sg_next_iter);
3972
3973 if (len_next == 0)
3974 break;
3975
3976 if ((dma_addr + len == dma_addr_next) &&
3977 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3978 len += len_next;
3979 count++;
3980 sg = sg_next_iter;
3981 } else {
3982 break;
3983 }
3984 }
3985
3986 ctl = le32_to_cpu(user_dma_pkt->ctl);
3987 if (likely(dma_desc_cnt))
3988 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3989 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3990 new_dma_pkt->ctl = cpu_to_le32(ctl);
3991 new_dma_pkt->tsize = cpu_to_le32(len);
3992
3993 if (dir == DMA_TO_DEVICE) {
3994 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3995 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3996 } else {
3997 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3998 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3999 }
4000
4001 if (!user_memset)
4002 device_memory_addr += len;
4003 dma_desc_cnt++;
4004 new_dma_pkt++;
4005 }
4006
4007 if (!dma_desc_cnt) {
4008 dev_err(hdev->dev,
4009 "Error of 0 SG entries when patching DMA packet\n");
4010 return -EFAULT;
4011 }
4012
4013 /* Fix the last dma packet - wrcomp must be as user set it */
4014 new_dma_pkt--;
4015 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4016
4017 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4018
4019 return 0;
4020 }
4021
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)4022 static int gaudi_patch_cb(struct hl_device *hdev,
4023 struct hl_cs_parser *parser)
4024 {
4025 u32 cb_parsed_length = 0;
4026 u32 cb_patched_cur_length = 0;
4027 int rc = 0;
4028
4029 /* cb_user_size is more than 0 so loop will always be executed */
4030 while (cb_parsed_length < parser->user_cb_size) {
4031 enum packet_id pkt_id;
4032 u16 pkt_size;
4033 u32 new_pkt_size = 0;
4034 struct gaudi_packet *user_pkt, *kernel_pkt;
4035
4036 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4037 kernel_pkt = parser->patched_cb->kernel_address +
4038 cb_patched_cur_length;
4039
4040 pkt_id = (enum packet_id) (
4041 (le64_to_cpu(user_pkt->header) &
4042 PACKET_HEADER_PACKET_ID_MASK) >>
4043 PACKET_HEADER_PACKET_ID_SHIFT);
4044
4045 if (!validate_packet_id(pkt_id)) {
4046 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4047 rc = -EINVAL;
4048 break;
4049 }
4050
4051 pkt_size = gaudi_packet_sizes[pkt_id];
4052 cb_parsed_length += pkt_size;
4053 if (cb_parsed_length > parser->user_cb_size) {
4054 dev_err(hdev->dev,
4055 "packet 0x%x is out of CB boundary\n", pkt_id);
4056 rc = -EINVAL;
4057 break;
4058 }
4059
4060 switch (pkt_id) {
4061 case PACKET_LIN_DMA:
4062 rc = gaudi_patch_dma_packet(hdev, parser,
4063 (struct packet_lin_dma *) user_pkt,
4064 (struct packet_lin_dma *) kernel_pkt,
4065 &new_pkt_size);
4066 cb_patched_cur_length += new_pkt_size;
4067 break;
4068
4069 case PACKET_MSG_PROT:
4070 dev_err(hdev->dev,
4071 "User not allowed to use MSG_PROT\n");
4072 rc = -EPERM;
4073 break;
4074
4075 case PACKET_CP_DMA:
4076 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4077 rc = -EPERM;
4078 break;
4079
4080 case PACKET_STOP:
4081 dev_err(hdev->dev, "User not allowed to use STOP\n");
4082 rc = -EPERM;
4083 break;
4084
4085 case PACKET_WREG_32:
4086 case PACKET_WREG_BULK:
4087 case PACKET_MSG_LONG:
4088 case PACKET_MSG_SHORT:
4089 case PACKET_REPEAT:
4090 case PACKET_FENCE:
4091 case PACKET_NOP:
4092 case PACKET_ARB_POINT:
4093 case PACKET_LOAD_AND_EXE:
4094 memcpy(kernel_pkt, user_pkt, pkt_size);
4095 cb_patched_cur_length += pkt_size;
4096 break;
4097
4098 default:
4099 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4100 pkt_id);
4101 rc = -EINVAL;
4102 break;
4103 }
4104
4105 if (rc)
4106 break;
4107 }
4108
4109 return rc;
4110 }
4111
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)4112 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4113 struct hl_cs_parser *parser)
4114 {
4115 u64 patched_cb_handle;
4116 u32 patched_cb_size;
4117 struct hl_cb *user_cb;
4118 int rc;
4119
4120 /*
4121 * The new CB should have space at the end for two MSG_PROT pkt:
4122 * 1. A packet that will act as a completion packet
4123 * 2. A packet that will generate MSI interrupt
4124 */
4125 parser->patched_cb_size = parser->user_cb_size +
4126 sizeof(struct packet_msg_prot) * 2;
4127
4128 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4129 parser->patched_cb_size, false, false,
4130 &patched_cb_handle);
4131
4132 if (rc) {
4133 dev_err(hdev->dev,
4134 "Failed to allocate patched CB for DMA CS %d\n",
4135 rc);
4136 return rc;
4137 }
4138
4139 patched_cb_handle >>= PAGE_SHIFT;
4140 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4141 (u32) patched_cb_handle);
4142 /* hl_cb_get should never fail here so use kernel WARN */
4143 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4144 (u32) patched_cb_handle);
4145 if (!parser->patched_cb) {
4146 rc = -EFAULT;
4147 goto out;
4148 }
4149
4150 /*
4151 * The check that parser->user_cb_size <= parser->user_cb->size was done
4152 * in validate_queue_index().
4153 */
4154 memcpy(parser->patched_cb->kernel_address,
4155 parser->user_cb->kernel_address,
4156 parser->user_cb_size);
4157
4158 patched_cb_size = parser->patched_cb_size;
4159
4160 /* Validate patched CB instead of user CB */
4161 user_cb = parser->user_cb;
4162 parser->user_cb = parser->patched_cb;
4163 rc = gaudi_validate_cb(hdev, parser, true);
4164 parser->user_cb = user_cb;
4165
4166 if (rc) {
4167 hl_cb_put(parser->patched_cb);
4168 goto out;
4169 }
4170
4171 if (patched_cb_size != parser->patched_cb_size) {
4172 dev_err(hdev->dev, "user CB size mismatch\n");
4173 hl_cb_put(parser->patched_cb);
4174 rc = -EINVAL;
4175 goto out;
4176 }
4177
4178 out:
4179 /*
4180 * Always call cb destroy here because we still have 1 reference
4181 * to it by calling cb_get earlier. After the job will be completed,
4182 * cb_put will release it, but here we want to remove it from the
4183 * idr
4184 */
4185 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4186 patched_cb_handle << PAGE_SHIFT);
4187
4188 return rc;
4189 }
4190
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)4191 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4192 struct hl_cs_parser *parser)
4193 {
4194 u64 patched_cb_handle;
4195 int rc;
4196
4197 rc = gaudi_validate_cb(hdev, parser, false);
4198
4199 if (rc)
4200 goto free_userptr;
4201
4202 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4203 parser->patched_cb_size, false, false,
4204 &patched_cb_handle);
4205 if (rc) {
4206 dev_err(hdev->dev,
4207 "Failed to allocate patched CB for DMA CS %d\n", rc);
4208 goto free_userptr;
4209 }
4210
4211 patched_cb_handle >>= PAGE_SHIFT;
4212 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4213 (u32) patched_cb_handle);
4214 /* hl_cb_get should never fail here so use kernel WARN */
4215 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4216 (u32) patched_cb_handle);
4217 if (!parser->patched_cb) {
4218 rc = -EFAULT;
4219 goto out;
4220 }
4221
4222 rc = gaudi_patch_cb(hdev, parser);
4223
4224 if (rc)
4225 hl_cb_put(parser->patched_cb);
4226
4227 out:
4228 /*
4229 * Always call cb destroy here because we still have 1 reference
4230 * to it by calling cb_get earlier. After the job will be completed,
4231 * cb_put will release it, but here we want to remove it from the
4232 * idr
4233 */
4234 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4235 patched_cb_handle << PAGE_SHIFT);
4236
4237 free_userptr:
4238 if (rc)
4239 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4240 return rc;
4241 }
4242
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)4243 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4244 struct hl_cs_parser *parser)
4245 {
4246 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4247
4248 /* For internal queue jobs just check if CB address is valid */
4249 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4250 parser->user_cb_size,
4251 asic_prop->sram_user_base_address,
4252 asic_prop->sram_end_address))
4253 return 0;
4254
4255 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4256 parser->user_cb_size,
4257 asic_prop->dram_user_base_address,
4258 asic_prop->dram_end_address))
4259 return 0;
4260
4261 /* PMMU and HPMMU addresses are equal, check only one of them */
4262 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4263 parser->user_cb_size,
4264 asic_prop->pmmu.start_addr,
4265 asic_prop->pmmu.end_addr))
4266 return 0;
4267
4268 dev_err(hdev->dev,
4269 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4270 parser->user_cb, parser->user_cb_size);
4271
4272 return -EFAULT;
4273 }
4274
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)4275 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4276 {
4277 struct gaudi_device *gaudi = hdev->asic_specific;
4278
4279 if (parser->queue_type == QUEUE_TYPE_INT)
4280 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4281
4282 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4283 return gaudi_parse_cb_mmu(hdev, parser);
4284 else
4285 return gaudi_parse_cb_no_mmu(hdev, parser);
4286 }
4287
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)4288 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4289 void *kernel_address, u32 len,
4290 u64 cq_addr, u32 cq_val, u32 msi_vec,
4291 bool eb)
4292 {
4293 struct gaudi_device *gaudi = hdev->asic_specific;
4294 struct packet_msg_prot *cq_pkt;
4295 u32 tmp;
4296
4297 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4298
4299 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4300 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4301
4302 if (eb)
4303 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4304
4305 cq_pkt->ctl = cpu_to_le32(tmp);
4306 cq_pkt->value = cpu_to_le32(cq_val);
4307 cq_pkt->addr = cpu_to_le64(cq_addr);
4308
4309 cq_pkt++;
4310
4311 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4312 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4313 cq_pkt->ctl = cpu_to_le32(tmp);
4314 cq_pkt->value = cpu_to_le32(1);
4315
4316 if (!gaudi->multi_msi_mode)
4317 msi_vec = 0;
4318
4319 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4320 }
4321
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)4322 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4323 {
4324 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4325 }
4326
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)4327 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4328 u32 size, u64 val)
4329 {
4330 struct packet_lin_dma *lin_dma_pkt;
4331 struct hl_cs_job *job;
4332 u32 cb_size, ctl, err_cause;
4333 struct hl_cb *cb;
4334 int rc;
4335
4336 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4337 if (!cb)
4338 return -EFAULT;
4339
4340 lin_dma_pkt = cb->kernel_address;
4341 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4342 cb_size = sizeof(*lin_dma_pkt);
4343
4344 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4345 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4346 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4347 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4348 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4349
4350 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4351 lin_dma_pkt->src_addr = cpu_to_le64(val);
4352 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4353 lin_dma_pkt->tsize = cpu_to_le32(size);
4354
4355 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4356 if (!job) {
4357 dev_err(hdev->dev, "Failed to allocate a new job\n");
4358 rc = -ENOMEM;
4359 goto release_cb;
4360 }
4361
4362 /* Verify DMA is OK */
4363 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4364 if (err_cause && !hdev->init_done) {
4365 dev_dbg(hdev->dev,
4366 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4367 err_cause);
4368 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4369 }
4370
4371 job->id = 0;
4372 job->user_cb = cb;
4373 job->user_cb->cs_cnt++;
4374 job->user_cb_size = cb_size;
4375 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4376 job->patched_cb = job->user_cb;
4377 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4378
4379 hl_debugfs_add_job(hdev, job);
4380
4381 rc = gaudi_send_job_on_qman0(hdev, job);
4382 hl_debugfs_remove_job(hdev, job);
4383 kfree(job);
4384 cb->cs_cnt--;
4385
4386 /* Verify DMA is OK */
4387 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4388 if (err_cause) {
4389 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4390 rc = -EIO;
4391 if (!hdev->init_done) {
4392 dev_dbg(hdev->dev,
4393 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4394 err_cause);
4395 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4396 }
4397 }
4398
4399 release_cb:
4400 hl_cb_put(cb);
4401 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4402
4403 return rc;
4404 }
4405
gaudi_restore_sm_registers(struct hl_device * hdev)4406 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4407 {
4408 int i;
4409
4410 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4411 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4412 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4413 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4414 }
4415
4416 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4417 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4418 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4419 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4420 }
4421
4422 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4423
4424 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4425 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4426
4427 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4428
4429 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4430 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4431 }
4432
gaudi_restore_dma_registers(struct hl_device * hdev)4433 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4434 {
4435 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4436 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4437 int i;
4438
4439 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4440 u64 sob_addr = CFG_BASE +
4441 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4442 (i * sob_delta);
4443 u32 dma_offset = i * DMA_CORE_OFFSET;
4444
4445 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4446 lower_32_bits(sob_addr));
4447 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4448 upper_32_bits(sob_addr));
4449 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4450
4451 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4452 * modified by the user for SRAM reduction
4453 */
4454 if (i > 1)
4455 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4456 0x00000001);
4457 }
4458 }
4459
gaudi_restore_qm_registers(struct hl_device * hdev)4460 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4461 {
4462 u32 qman_offset;
4463 int i;
4464
4465 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4466 qman_offset = i * DMA_QMAN_OFFSET;
4467 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4468 }
4469
4470 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4471 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4472 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4473 }
4474
4475 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4476 qman_offset = i * TPC_QMAN_OFFSET;
4477 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4478 }
4479 }
4480
gaudi_restore_user_registers(struct hl_device * hdev)4481 static void gaudi_restore_user_registers(struct hl_device *hdev)
4482 {
4483 gaudi_restore_sm_registers(hdev);
4484 gaudi_restore_dma_registers(hdev);
4485 gaudi_restore_qm_registers(hdev);
4486 }
4487
gaudi_context_switch(struct hl_device * hdev,u32 asid)4488 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4489 {
4490 struct asic_fixed_properties *prop = &hdev->asic_prop;
4491 u64 addr = prop->sram_user_base_address;
4492 u32 size = hdev->pldm ? 0x10000 :
4493 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4494 u64 val = 0x7777777777777777ull;
4495 int rc;
4496
4497 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4498 if (rc) {
4499 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4500 return rc;
4501 }
4502
4503 gaudi_mmu_prepare(hdev, asid);
4504
4505 gaudi_restore_user_registers(hdev);
4506
4507 return 0;
4508 }
4509
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)4510 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4511 {
4512 struct asic_fixed_properties *prop = &hdev->asic_prop;
4513 struct gaudi_device *gaudi = hdev->asic_specific;
4514 u64 addr = prop->mmu_pgt_addr;
4515 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4516
4517 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4518 return 0;
4519
4520 return gaudi_memset_device_memory(hdev, addr, size, 0);
4521 }
4522
gaudi_restore_phase_topology(struct hl_device * hdev)4523 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4524 {
4525
4526 }
4527
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,u32 * val)4528 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4529 {
4530 struct asic_fixed_properties *prop = &hdev->asic_prop;
4531 struct gaudi_device *gaudi = hdev->asic_specific;
4532 u64 hbm_bar_addr;
4533 int rc = 0;
4534
4535 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4536
4537 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4538 (hdev->clock_gating_mask &
4539 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4540
4541 dev_err_ratelimited(hdev->dev,
4542 "Can't read register - clock gating is enabled!\n");
4543 rc = -EFAULT;
4544 } else {
4545 *val = RREG32(addr - CFG_BASE);
4546 }
4547
4548 } else if ((addr >= SRAM_BASE_ADDR) &&
4549 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4550 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4551 (addr - SRAM_BASE_ADDR));
4552 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4553 u64 bar_base_addr = DRAM_PHYS_BASE +
4554 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4555
4556 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4557 if (hbm_bar_addr != U64_MAX) {
4558 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4559 (addr - bar_base_addr));
4560
4561 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4562 hbm_bar_addr);
4563 }
4564 if (hbm_bar_addr == U64_MAX)
4565 rc = -EIO;
4566 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4567 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4568 } else {
4569 rc = -EFAULT;
4570 }
4571
4572 return rc;
4573 }
4574
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,u32 val)4575 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4576 {
4577 struct asic_fixed_properties *prop = &hdev->asic_prop;
4578 struct gaudi_device *gaudi = hdev->asic_specific;
4579 u64 hbm_bar_addr;
4580 int rc = 0;
4581
4582 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4583
4584 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4585 (hdev->clock_gating_mask &
4586 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4587
4588 dev_err_ratelimited(hdev->dev,
4589 "Can't write register - clock gating is enabled!\n");
4590 rc = -EFAULT;
4591 } else {
4592 WREG32(addr - CFG_BASE, val);
4593 }
4594
4595 } else if ((addr >= SRAM_BASE_ADDR) &&
4596 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4597 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4598 (addr - SRAM_BASE_ADDR));
4599 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4600 u64 bar_base_addr = DRAM_PHYS_BASE +
4601 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4602
4603 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4604 if (hbm_bar_addr != U64_MAX) {
4605 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4606 (addr - bar_base_addr));
4607
4608 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4609 hbm_bar_addr);
4610 }
4611 if (hbm_bar_addr == U64_MAX)
4612 rc = -EIO;
4613 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4614 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4615 } else {
4616 rc = -EFAULT;
4617 }
4618
4619 return rc;
4620 }
4621
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,u64 * val)4622 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4623 {
4624 struct asic_fixed_properties *prop = &hdev->asic_prop;
4625 struct gaudi_device *gaudi = hdev->asic_specific;
4626 u64 hbm_bar_addr;
4627 int rc = 0;
4628
4629 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4630
4631 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4632 (hdev->clock_gating_mask &
4633 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4634
4635 dev_err_ratelimited(hdev->dev,
4636 "Can't read register - clock gating is enabled!\n");
4637 rc = -EFAULT;
4638 } else {
4639 u32 val_l = RREG32(addr - CFG_BASE);
4640 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4641
4642 *val = (((u64) val_h) << 32) | val_l;
4643 }
4644
4645 } else if ((addr >= SRAM_BASE_ADDR) &&
4646 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4647 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4648 (addr - SRAM_BASE_ADDR));
4649 } else if (addr <=
4650 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4651 u64 bar_base_addr = DRAM_PHYS_BASE +
4652 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4653
4654 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4655 if (hbm_bar_addr != U64_MAX) {
4656 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4657 (addr - bar_base_addr));
4658
4659 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4660 hbm_bar_addr);
4661 }
4662 if (hbm_bar_addr == U64_MAX)
4663 rc = -EIO;
4664 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4665 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4666 } else {
4667 rc = -EFAULT;
4668 }
4669
4670 return rc;
4671 }
4672
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,u64 val)4673 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4674 {
4675 struct asic_fixed_properties *prop = &hdev->asic_prop;
4676 struct gaudi_device *gaudi = hdev->asic_specific;
4677 u64 hbm_bar_addr;
4678 int rc = 0;
4679
4680 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4681
4682 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4683 (hdev->clock_gating_mask &
4684 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4685
4686 dev_err_ratelimited(hdev->dev,
4687 "Can't write register - clock gating is enabled!\n");
4688 rc = -EFAULT;
4689 } else {
4690 WREG32(addr - CFG_BASE, lower_32_bits(val));
4691 WREG32(addr + sizeof(u32) - CFG_BASE,
4692 upper_32_bits(val));
4693 }
4694
4695 } else if ((addr >= SRAM_BASE_ADDR) &&
4696 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4697 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4698 (addr - SRAM_BASE_ADDR));
4699 } else if (addr <=
4700 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4701 u64 bar_base_addr = DRAM_PHYS_BASE +
4702 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4703
4704 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4705 if (hbm_bar_addr != U64_MAX) {
4706 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4707 (addr - bar_base_addr));
4708
4709 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4710 hbm_bar_addr);
4711 }
4712 if (hbm_bar_addr == U64_MAX)
4713 rc = -EIO;
4714 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4715 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4716 } else {
4717 rc = -EFAULT;
4718 }
4719
4720 return rc;
4721 }
4722
gaudi_read_pte(struct hl_device * hdev,u64 addr)4723 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4724 {
4725 struct gaudi_device *gaudi = hdev->asic_specific;
4726
4727 if (hdev->hard_reset_pending)
4728 return U64_MAX;
4729
4730 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4731 (addr - gaudi->hbm_bar_cur_addr));
4732 }
4733
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)4734 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4735 {
4736 struct gaudi_device *gaudi = hdev->asic_specific;
4737
4738 if (hdev->hard_reset_pending)
4739 return;
4740
4741 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4742 (addr - gaudi->hbm_bar_cur_addr));
4743 }
4744
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)4745 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4746 {
4747 /* mask to zero the MMBP and ASID bits */
4748 WREG32_AND(reg, ~0x7FF);
4749 WREG32_OR(reg, asid);
4750 }
4751
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)4752 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4753 {
4754 struct gaudi_device *gaudi = hdev->asic_specific;
4755
4756 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4757 return;
4758
4759 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4760 WARN(1, "asid %u is too big\n", asid);
4761 return;
4762 }
4763
4764 mutex_lock(&gaudi->clk_gate_mutex);
4765
4766 hdev->asic_funcs->disable_clock_gating(hdev);
4767
4768 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4769 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4770 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4771 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4773
4774 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4775 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4776 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4777 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4779
4780 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4781 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4782 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4785
4786 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4787 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4788 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4791
4792 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4793 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4794 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4797
4798 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4799 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4800 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4803
4804 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4805 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4806 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4809
4810 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4814 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4815
4816 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4817 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4818 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4822 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4823 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4824
4825 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4830 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4832
4833 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4840
4841 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4848
4849 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4855 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4856
4857 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4864
4865 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4872
4873 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4879 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4880
4881 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4887 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4888
4889 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4896 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4897 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4898 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4899
4900 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4901 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4902 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4903 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4904 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4905 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4906 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4907 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4908 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4909 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4910 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4911 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4912
4913 hdev->asic_funcs->set_clock_gating(hdev);
4914
4915 mutex_unlock(&gaudi->clk_gate_mutex);
4916 }
4917
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)4918 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4919 struct hl_cs_job *job)
4920 {
4921 struct packet_msg_prot *fence_pkt;
4922 u32 *fence_ptr;
4923 dma_addr_t fence_dma_addr;
4924 struct hl_cb *cb;
4925 u32 tmp, timeout, dma_offset;
4926 int rc;
4927
4928 if (hdev->pldm)
4929 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4930 else
4931 timeout = HL_DEVICE_TIMEOUT_USEC;
4932
4933 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4934 dev_err_ratelimited(hdev->dev,
4935 "Can't send driver job on QMAN0 because the device is not idle\n");
4936 return -EBUSY;
4937 }
4938
4939 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4940 &fence_dma_addr);
4941 if (!fence_ptr) {
4942 dev_err(hdev->dev,
4943 "Failed to allocate fence memory for QMAN0\n");
4944 return -ENOMEM;
4945 }
4946
4947 cb = job->patched_cb;
4948
4949 fence_pkt = cb->kernel_address +
4950 job->job_cb_size - sizeof(struct packet_msg_prot);
4951
4952 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4953 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4954 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4955
4956 fence_pkt->ctl = cpu_to_le32(tmp);
4957 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4958 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4959
4960 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4961
4962 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4963
4964 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4965 job->job_cb_size, cb->bus_address);
4966 if (rc) {
4967 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4968 goto free_fence_ptr;
4969 }
4970
4971 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4972 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4973 timeout, true);
4974
4975 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4976
4977 if (rc == -ETIMEDOUT) {
4978 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4979 goto free_fence_ptr;
4980 }
4981
4982 free_fence_ptr:
4983 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4984 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4985
4986 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4987 fence_dma_addr);
4988 return rc;
4989 }
4990
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)4991 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4992 {
4993 if (event_type >= GAUDI_EVENT_SIZE)
4994 goto event_not_supported;
4995
4996 if (!gaudi_irq_map_table[event_type].valid)
4997 goto event_not_supported;
4998
4999 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5000
5001 return;
5002
5003 event_not_supported:
5004 snprintf(desc, size, "N/A");
5005 }
5006
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)5007 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5008 u32 x_y, bool is_write)
5009 {
5010 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5011
5012 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5013 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5014
5015 switch (x_y) {
5016 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5017 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5018 dma_id[0] = 0;
5019 dma_id[1] = 2;
5020 break;
5021 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5022 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5023 dma_id[0] = 1;
5024 dma_id[1] = 3;
5025 break;
5026 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5027 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5028 dma_id[0] = 4;
5029 dma_id[1] = 6;
5030 break;
5031 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5032 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5033 dma_id[0] = 5;
5034 dma_id[1] = 7;
5035 break;
5036 default:
5037 goto unknown_initiator;
5038 }
5039
5040 for (i = 0 ; i < 2 ; i++) {
5041 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5042 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5043 }
5044
5045 switch (x_y) {
5046 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5047 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5048 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5049 return "DMA0";
5050 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5051 return "DMA2";
5052 else
5053 return "DMA0 or DMA2";
5054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5056 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057 return "DMA1";
5058 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059 return "DMA3";
5060 else
5061 return "DMA1 or DMA3";
5062 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5063 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5064 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065 return "DMA4";
5066 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067 return "DMA6";
5068 else
5069 return "DMA4 or DMA6";
5070 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5071 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5072 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073 return "DMA5";
5074 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075 return "DMA7";
5076 else
5077 return "DMA5 or DMA7";
5078 }
5079
5080 unknown_initiator:
5081 return "unknown initiator";
5082 }
5083
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)5084 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5085 bool is_write)
5086 {
5087 u32 val, x_y, axi_id;
5088
5089 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5090 RREG32(mmMMU_UP_RAZWI_READ_ID);
5091 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5092 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5093 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5094 RAZWI_INITIATOR_AXI_ID_SHIFT);
5095
5096 switch (x_y) {
5097 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5098 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5099 return "TPC0";
5100 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5101 return "NIC0";
5102 break;
5103 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5104 return "TPC1";
5105 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5106 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5107 return "MME0";
5108 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5109 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5110 return "MME1";
5111 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5112 return "TPC2";
5113 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5114 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5115 return "TPC3";
5116 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5117 return "PCI";
5118 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5119 return "CPU";
5120 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5121 return "PSOC";
5122 break;
5123 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5124 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5125 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5126 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5127 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5128 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5129 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5130 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5131 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5132 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5133 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5134 return "TPC4";
5135 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5136 return "NIC1";
5137 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5138 return "NIC2";
5139 break;
5140 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5141 return "TPC5";
5142 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5143 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5144 return "MME2";
5145 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5146 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5147 return "MME3";
5148 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5149 return "TPC6";
5150 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5151 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5152 return "TPC7";
5153 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5154 return "NIC4";
5155 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5156 return "NIC5";
5157 break;
5158 default:
5159 break;
5160 }
5161
5162 dev_err(hdev->dev,
5163 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5164 val,
5165 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5166 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5167 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5168 RAZWI_INITIATOR_AXI_ID_MASK);
5169
5170 return "unknown initiator";
5171 }
5172
gaudi_print_razwi_info(struct hl_device * hdev)5173 static void gaudi_print_razwi_info(struct hl_device *hdev)
5174 {
5175 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5176 dev_err_ratelimited(hdev->dev,
5177 "RAZWI event caused by illegal write of %s\n",
5178 gaudi_get_razwi_initiator_name(hdev, true));
5179 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5180 }
5181
5182 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5183 dev_err_ratelimited(hdev->dev,
5184 "RAZWI event caused by illegal read of %s\n",
5185 gaudi_get_razwi_initiator_name(hdev, false));
5186 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5187 }
5188 }
5189
gaudi_print_mmu_error_info(struct hl_device * hdev)5190 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5191 {
5192 struct gaudi_device *gaudi = hdev->asic_specific;
5193 u64 addr;
5194 u32 val;
5195
5196 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5197 return;
5198
5199 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5200 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5201 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5202 addr <<= 32;
5203 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5204
5205 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5206 addr);
5207
5208 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5209 }
5210
5211 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5212 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5213 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5214 addr <<= 32;
5215 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5216
5217 dev_err_ratelimited(hdev->dev,
5218 "MMU access error on va 0x%llx\n", addr);
5219
5220 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5221 }
5222 }
5223
5224 /*
5225 * +-------------------+------------------------------------------------------+
5226 * | Configuration Reg | Description |
5227 * | Address | |
5228 * +-------------------+------------------------------------------------------+
5229 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5230 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5231 * | |0xF34 memory wrappers 63:32 |
5232 * | |0xF38 memory wrappers 95:64 |
5233 * | |0xF3C memory wrappers 127:96 |
5234 * +-------------------+------------------------------------------------------+
5235 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5236 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5237 * | |0xF44 memory wrappers 63:32 |
5238 * | |0xF48 memory wrappers 95:64 |
5239 * | |0xF4C memory wrappers 127:96 |
5240 * +-------------------+------------------------------------------------------+
5241 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)5242 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5243 struct ecc_info_extract_params *params, u64 *ecc_address,
5244 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5245 {
5246 struct gaudi_device *gaudi = hdev->asic_specific;
5247 u32 i, num_mem_regs, reg, err_bit;
5248 u64 err_addr, err_word = 0;
5249 int rc = 0;
5250
5251 num_mem_regs = params->num_memories / 32 +
5252 ((params->num_memories % 32) ? 1 : 0);
5253
5254 if (params->block_address >= CFG_BASE)
5255 params->block_address -= CFG_BASE;
5256
5257 if (params->derr)
5258 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5259 else
5260 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5261
5262 if (params->disable_clock_gating) {
5263 mutex_lock(&gaudi->clk_gate_mutex);
5264 hdev->asic_funcs->disable_clock_gating(hdev);
5265 }
5266
5267 /* Set invalid wrapper index */
5268 *memory_wrapper_idx = 0xFF;
5269
5270 /* Iterate through memory wrappers, a single bit must be set */
5271 for (i = 0 ; i < num_mem_regs ; i++) {
5272 err_addr += i * 4;
5273 err_word = RREG32(err_addr);
5274 if (err_word) {
5275 err_bit = __ffs(err_word);
5276 *memory_wrapper_idx = err_bit + (32 * i);
5277 break;
5278 }
5279 }
5280
5281 if (*memory_wrapper_idx == 0xFF) {
5282 dev_err(hdev->dev, "ECC error information cannot be found\n");
5283 rc = -EINVAL;
5284 goto enable_clk_gate;
5285 }
5286
5287 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5288 *memory_wrapper_idx);
5289
5290 *ecc_address =
5291 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5292 *ecc_syndrom =
5293 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5294
5295 /* Clear error indication */
5296 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5297 if (params->derr)
5298 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5299 else
5300 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5301
5302 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5303
5304 enable_clk_gate:
5305 if (params->disable_clock_gating) {
5306 hdev->asic_funcs->set_clock_gating(hdev);
5307
5308 mutex_unlock(&gaudi->clk_gate_mutex);
5309 }
5310
5311 return rc;
5312 }
5313
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 glbl_sts_addr,u64 arb_err_addr)5314 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5315 const char *qm_name,
5316 u64 glbl_sts_addr,
5317 u64 arb_err_addr)
5318 {
5319 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5320 char reg_desc[32];
5321
5322 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5323 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5324 glbl_sts_clr_val = 0;
5325 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5326
5327 if (!glbl_sts_val)
5328 continue;
5329
5330 if (i == QMAN_STREAMS)
5331 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5332 else
5333 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5334
5335 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5336 if (glbl_sts_val & BIT(j)) {
5337 dev_err_ratelimited(hdev->dev,
5338 "%s %s. err cause: %s\n",
5339 qm_name, reg_desc,
5340 gaudi_qman_error_cause[j]);
5341 glbl_sts_clr_val |= BIT(j);
5342 }
5343 }
5344
5345 /* Write 1 clear errors */
5346 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5347 }
5348
5349 arb_err_val = RREG32(arb_err_addr);
5350
5351 if (!arb_err_val)
5352 return;
5353
5354 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5355 if (arb_err_val & BIT(j)) {
5356 dev_err_ratelimited(hdev->dev,
5357 "%s ARB_ERR. err cause: %s\n",
5358 qm_name,
5359 gaudi_qman_arb_error_cause[j]);
5360 }
5361 }
5362 }
5363
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)5364 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5365 struct hl_eq_ecc_data *ecc_data)
5366 {
5367 struct ecc_info_extract_params params;
5368 u64 ecc_address = 0, ecc_syndrom = 0;
5369 u8 index, memory_wrapper_idx = 0;
5370 bool extract_info_from_fw;
5371 int rc;
5372
5373 switch (event_type) {
5374 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5375 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5376 extract_info_from_fw = true;
5377 break;
5378 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5379 index = event_type - GAUDI_EVENT_TPC0_SERR;
5380 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5381 params.num_memories = 90;
5382 params.derr = false;
5383 params.disable_clock_gating = true;
5384 extract_info_from_fw = false;
5385 break;
5386 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5387 index = event_type - GAUDI_EVENT_TPC0_DERR;
5388 params.block_address =
5389 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5390 params.num_memories = 90;
5391 params.derr = true;
5392 params.disable_clock_gating = true;
5393 extract_info_from_fw = false;
5394 break;
5395 case GAUDI_EVENT_MME0_ACC_SERR:
5396 case GAUDI_EVENT_MME1_ACC_SERR:
5397 case GAUDI_EVENT_MME2_ACC_SERR:
5398 case GAUDI_EVENT_MME3_ACC_SERR:
5399 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5400 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5401 params.num_memories = 128;
5402 params.derr = false;
5403 params.disable_clock_gating = true;
5404 extract_info_from_fw = false;
5405 break;
5406 case GAUDI_EVENT_MME0_ACC_DERR:
5407 case GAUDI_EVENT_MME1_ACC_DERR:
5408 case GAUDI_EVENT_MME2_ACC_DERR:
5409 case GAUDI_EVENT_MME3_ACC_DERR:
5410 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5411 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5412 params.num_memories = 128;
5413 params.derr = true;
5414 params.disable_clock_gating = true;
5415 extract_info_from_fw = false;
5416 break;
5417 case GAUDI_EVENT_MME0_SBAB_SERR:
5418 case GAUDI_EVENT_MME1_SBAB_SERR:
5419 case GAUDI_EVENT_MME2_SBAB_SERR:
5420 case GAUDI_EVENT_MME3_SBAB_SERR:
5421 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5422 params.block_address =
5423 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5424 params.num_memories = 33;
5425 params.derr = false;
5426 params.disable_clock_gating = true;
5427 extract_info_from_fw = false;
5428 break;
5429 case GAUDI_EVENT_MME0_SBAB_DERR:
5430 case GAUDI_EVENT_MME1_SBAB_DERR:
5431 case GAUDI_EVENT_MME2_SBAB_DERR:
5432 case GAUDI_EVENT_MME3_SBAB_DERR:
5433 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5434 params.block_address =
5435 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5436 params.num_memories = 33;
5437 params.derr = true;
5438 params.disable_clock_gating = true;
5439 extract_info_from_fw = false;
5440 break;
5441 default:
5442 return;
5443 }
5444
5445 if (extract_info_from_fw) {
5446 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5447 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5448 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5449 } else {
5450 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
5451 &ecc_syndrom, &memory_wrapper_idx);
5452 if (rc)
5453 return;
5454 }
5455
5456 dev_err(hdev->dev,
5457 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5458 ecc_address, ecc_syndrom, memory_wrapper_idx);
5459 }
5460
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)5461 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5462 {
5463 u64 glbl_sts_addr, arb_err_addr;
5464 u8 index;
5465 char desc[32];
5466
5467 switch (event_type) {
5468 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5469 index = event_type - GAUDI_EVENT_TPC0_QM;
5470 glbl_sts_addr =
5471 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5472 arb_err_addr =
5473 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5474 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5475 break;
5476 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5477 index = event_type - GAUDI_EVENT_MME0_QM;
5478 glbl_sts_addr =
5479 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5480 arb_err_addr =
5481 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5482 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5483 break;
5484 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5485 index = event_type - GAUDI_EVENT_DMA0_QM;
5486 glbl_sts_addr =
5487 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5488 arb_err_addr =
5489 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5490 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5491 break;
5492 default:
5493 return;
5494 }
5495
5496 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5497 }
5498
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)5499 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5500 bool razwi)
5501 {
5502 char desc[64] = "";
5503
5504 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5505 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5506 event_type, desc);
5507
5508 if (razwi) {
5509 gaudi_print_razwi_info(hdev);
5510 gaudi_print_mmu_error_info(hdev);
5511 }
5512 }
5513
gaudi_soft_reset_late_init(struct hl_device * hdev)5514 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5515 {
5516 struct gaudi_device *gaudi = hdev->asic_specific;
5517
5518 /* Unmask all IRQs since some could have been received
5519 * during the soft reset
5520 */
5521 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5522 }
5523
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device)5524 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5525 {
5526 int ch, err = 0;
5527 u32 base, val, val2;
5528
5529 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5530 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5531 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5532 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5533 if (val) {
5534 err = 1;
5535 dev_err(hdev->dev,
5536 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5537 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5538 (val >> 2) & 0x1, (val >> 3) & 0x1,
5539 (val >> 4) & 0x1);
5540
5541 val2 = RREG32(base + ch * 0x1000 + 0x060);
5542 dev_err(hdev->dev,
5543 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5544 device, ch * 2,
5545 RREG32(base + ch * 0x1000 + 0x064),
5546 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5547 (val2 & 0xFF0000) >> 16,
5548 (val2 & 0xFF000000) >> 24);
5549 }
5550
5551 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5552 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5553 if (val) {
5554 err = 1;
5555 dev_err(hdev->dev,
5556 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5557 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5558 (val >> 2) & 0x1, (val >> 3) & 0x1,
5559 (val >> 4) & 0x1);
5560
5561 val2 = RREG32(base + ch * 0x1000 + 0x070);
5562 dev_err(hdev->dev,
5563 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5564 device, ch * 2 + 1,
5565 RREG32(base + ch * 0x1000 + 0x074),
5566 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5567 (val2 & 0xFF0000) >> 16,
5568 (val2 & 0xFF000000) >> 24);
5569 }
5570
5571 /* Clear interrupts */
5572 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5573 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5574 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5575 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5576 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5577 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5578 }
5579
5580 val = RREG32(base + 0x8F30);
5581 val2 = RREG32(base + 0x8F34);
5582 if (val | val2) {
5583 err = 1;
5584 dev_err(hdev->dev,
5585 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5586 device, val, val2);
5587 }
5588 val = RREG32(base + 0x8F40);
5589 val2 = RREG32(base + 0x8F44);
5590 if (val | val2) {
5591 err = 1;
5592 dev_err(hdev->dev,
5593 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5594 device, val, val2);
5595 }
5596
5597 return err;
5598 }
5599
gaudi_hbm_event_to_dev(u16 hbm_event_type)5600 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5601 {
5602 switch (hbm_event_type) {
5603 case GAUDI_EVENT_HBM0_SPI_0:
5604 case GAUDI_EVENT_HBM0_SPI_1:
5605 return 0;
5606 case GAUDI_EVENT_HBM1_SPI_0:
5607 case GAUDI_EVENT_HBM1_SPI_1:
5608 return 1;
5609 case GAUDI_EVENT_HBM2_SPI_0:
5610 case GAUDI_EVENT_HBM2_SPI_1:
5611 return 2;
5612 case GAUDI_EVENT_HBM3_SPI_0:
5613 case GAUDI_EVENT_HBM3_SPI_1:
5614 return 3;
5615 default:
5616 break;
5617 }
5618
5619 /* Should never happen */
5620 return 0;
5621 }
5622
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)5623 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5624 char *interrupt_name)
5625 {
5626 struct gaudi_device *gaudi = hdev->asic_specific;
5627 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5628 bool soft_reset_required = false;
5629
5630 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5631 * gating, and thus cannot be done in CPU-CP and should be done instead
5632 * by the driver.
5633 */
5634
5635 mutex_lock(&gaudi->clk_gate_mutex);
5636
5637 hdev->asic_funcs->disable_clock_gating(hdev);
5638
5639 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5640 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5641
5642 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5643 if (tpc_interrupts_cause & BIT(i)) {
5644 dev_err_ratelimited(hdev->dev,
5645 "TPC%d_%s interrupt cause: %s\n",
5646 tpc_id, interrupt_name,
5647 gaudi_tpc_interrupts_cause[i]);
5648 /* If this is QM error, we need to soft-reset */
5649 if (i == 15)
5650 soft_reset_required = true;
5651 }
5652
5653 /* Clear interrupts */
5654 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5655
5656 hdev->asic_funcs->set_clock_gating(hdev);
5657
5658 mutex_unlock(&gaudi->clk_gate_mutex);
5659
5660 return soft_reset_required;
5661 }
5662
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)5663 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5664 {
5665 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5666 }
5667
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)5668 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5669 {
5670 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5671 }
5672
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)5673 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5674 u16 event_type)
5675 {
5676 switch (event_type) {
5677 case GAUDI_EVENT_FIX_POWER_ENV_S:
5678 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5679 dev_info_ratelimited(hdev->dev,
5680 "Clock throttling due to power consumption\n");
5681 break;
5682
5683 case GAUDI_EVENT_FIX_POWER_ENV_E:
5684 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5685 dev_info_ratelimited(hdev->dev,
5686 "Power envelop is safe, back to optimal clock\n");
5687 break;
5688
5689 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5690 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5691 dev_info_ratelimited(hdev->dev,
5692 "Clock throttling due to overheating\n");
5693 break;
5694
5695 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5696 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5697 dev_info_ratelimited(hdev->dev,
5698 "Thermal envelop is safe, back to optimal clock\n");
5699 break;
5700
5701 default:
5702 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5703 event_type);
5704 break;
5705 }
5706 }
5707
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)5708 static void gaudi_handle_eqe(struct hl_device *hdev,
5709 struct hl_eq_entry *eq_entry)
5710 {
5711 struct gaudi_device *gaudi = hdev->asic_specific;
5712 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5713 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5714 >> EQ_CTL_EVENT_TYPE_SHIFT);
5715 u8 cause;
5716 bool reset_required;
5717
5718 gaudi->events_stat[event_type]++;
5719 gaudi->events_stat_aggregate[event_type]++;
5720
5721 switch (event_type) {
5722 case GAUDI_EVENT_PCIE_CORE_DERR:
5723 case GAUDI_EVENT_PCIE_IF_DERR:
5724 case GAUDI_EVENT_PCIE_PHY_DERR:
5725 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5726 case GAUDI_EVENT_MME0_ACC_DERR:
5727 case GAUDI_EVENT_MME0_SBAB_DERR:
5728 case GAUDI_EVENT_MME1_ACC_DERR:
5729 case GAUDI_EVENT_MME1_SBAB_DERR:
5730 case GAUDI_EVENT_MME2_ACC_DERR:
5731 case GAUDI_EVENT_MME2_SBAB_DERR:
5732 case GAUDI_EVENT_MME3_ACC_DERR:
5733 case GAUDI_EVENT_MME3_SBAB_DERR:
5734 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5735 fallthrough;
5736 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5737 case GAUDI_EVENT_PSOC_MEM_DERR:
5738 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5739 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5740 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5741 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5742 case GAUDI_EVENT_MMU_DERR:
5743 gaudi_print_irq_info(hdev, event_type, true);
5744 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5745 if (hdev->hard_reset_on_fw_events)
5746 hl_device_reset(hdev, true, false);
5747 break;
5748
5749 case GAUDI_EVENT_GIC500:
5750 case GAUDI_EVENT_AXI_ECC:
5751 case GAUDI_EVENT_L2_RAM_ECC:
5752 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5753 gaudi_print_irq_info(hdev, event_type, false);
5754 if (hdev->hard_reset_on_fw_events)
5755 hl_device_reset(hdev, true, false);
5756 break;
5757
5758 case GAUDI_EVENT_HBM0_SPI_0:
5759 case GAUDI_EVENT_HBM1_SPI_0:
5760 case GAUDI_EVENT_HBM2_SPI_0:
5761 case GAUDI_EVENT_HBM3_SPI_0:
5762 gaudi_print_irq_info(hdev, event_type, false);
5763 gaudi_hbm_read_interrupts(hdev,
5764 gaudi_hbm_event_to_dev(event_type));
5765 if (hdev->hard_reset_on_fw_events)
5766 hl_device_reset(hdev, true, false);
5767 break;
5768
5769 case GAUDI_EVENT_HBM0_SPI_1:
5770 case GAUDI_EVENT_HBM1_SPI_1:
5771 case GAUDI_EVENT_HBM2_SPI_1:
5772 case GAUDI_EVENT_HBM3_SPI_1:
5773 gaudi_print_irq_info(hdev, event_type, false);
5774 gaudi_hbm_read_interrupts(hdev,
5775 gaudi_hbm_event_to_dev(event_type));
5776 break;
5777
5778 case GAUDI_EVENT_TPC0_DEC:
5779 case GAUDI_EVENT_TPC1_DEC:
5780 case GAUDI_EVENT_TPC2_DEC:
5781 case GAUDI_EVENT_TPC3_DEC:
5782 case GAUDI_EVENT_TPC4_DEC:
5783 case GAUDI_EVENT_TPC5_DEC:
5784 case GAUDI_EVENT_TPC6_DEC:
5785 case GAUDI_EVENT_TPC7_DEC:
5786 gaudi_print_irq_info(hdev, event_type, true);
5787 reset_required = gaudi_tpc_read_interrupts(hdev,
5788 tpc_dec_event_to_tpc_id(event_type),
5789 "AXI_SLV_DEC_Error");
5790 if (reset_required) {
5791 dev_err(hdev->dev, "hard reset required due to %s\n",
5792 gaudi_irq_map_table[event_type].name);
5793
5794 if (hdev->hard_reset_on_fw_events)
5795 hl_device_reset(hdev, true, false);
5796 } else {
5797 hl_fw_unmask_irq(hdev, event_type);
5798 }
5799 break;
5800
5801 case GAUDI_EVENT_TPC0_KRN_ERR:
5802 case GAUDI_EVENT_TPC1_KRN_ERR:
5803 case GAUDI_EVENT_TPC2_KRN_ERR:
5804 case GAUDI_EVENT_TPC3_KRN_ERR:
5805 case GAUDI_EVENT_TPC4_KRN_ERR:
5806 case GAUDI_EVENT_TPC5_KRN_ERR:
5807 case GAUDI_EVENT_TPC6_KRN_ERR:
5808 case GAUDI_EVENT_TPC7_KRN_ERR:
5809 gaudi_print_irq_info(hdev, event_type, true);
5810 reset_required = gaudi_tpc_read_interrupts(hdev,
5811 tpc_krn_event_to_tpc_id(event_type),
5812 "KRN_ERR");
5813 if (reset_required) {
5814 dev_err(hdev->dev, "hard reset required due to %s\n",
5815 gaudi_irq_map_table[event_type].name);
5816
5817 if (hdev->hard_reset_on_fw_events)
5818 hl_device_reset(hdev, true, false);
5819 } else {
5820 hl_fw_unmask_irq(hdev, event_type);
5821 }
5822 break;
5823
5824 case GAUDI_EVENT_PCIE_CORE_SERR:
5825 case GAUDI_EVENT_PCIE_IF_SERR:
5826 case GAUDI_EVENT_PCIE_PHY_SERR:
5827 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5828 case GAUDI_EVENT_MME0_ACC_SERR:
5829 case GAUDI_EVENT_MME0_SBAB_SERR:
5830 case GAUDI_EVENT_MME1_ACC_SERR:
5831 case GAUDI_EVENT_MME1_SBAB_SERR:
5832 case GAUDI_EVENT_MME2_ACC_SERR:
5833 case GAUDI_EVENT_MME2_SBAB_SERR:
5834 case GAUDI_EVENT_MME3_ACC_SERR:
5835 case GAUDI_EVENT_MME3_SBAB_SERR:
5836 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5837 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5838 case GAUDI_EVENT_PSOC_MEM_SERR:
5839 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5840 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5841 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5842 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5843 fallthrough;
5844 case GAUDI_EVENT_MMU_SERR:
5845 gaudi_print_irq_info(hdev, event_type, true);
5846 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5847 hl_fw_unmask_irq(hdev, event_type);
5848 break;
5849
5850 case GAUDI_EVENT_PCIE_DEC:
5851 case GAUDI_EVENT_MME0_WBC_RSP:
5852 case GAUDI_EVENT_MME0_SBAB0_RSP:
5853 case GAUDI_EVENT_MME1_WBC_RSP:
5854 case GAUDI_EVENT_MME1_SBAB0_RSP:
5855 case GAUDI_EVENT_MME2_WBC_RSP:
5856 case GAUDI_EVENT_MME2_SBAB0_RSP:
5857 case GAUDI_EVENT_MME3_WBC_RSP:
5858 case GAUDI_EVENT_MME3_SBAB0_RSP:
5859 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5860 case GAUDI_EVENT_PSOC_AXI_DEC:
5861 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5862 case GAUDI_EVENT_MMU_PAGE_FAULT:
5863 case GAUDI_EVENT_MMU_WR_PERM:
5864 case GAUDI_EVENT_RAZWI_OR_ADC:
5865 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5866 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5867 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5868 fallthrough;
5869 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5870 gaudi_print_irq_info(hdev, event_type, true);
5871 gaudi_handle_qman_err(hdev, event_type);
5872 hl_fw_unmask_irq(hdev, event_type);
5873 break;
5874
5875 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5876 gaudi_print_irq_info(hdev, event_type, true);
5877 if (hdev->hard_reset_on_fw_events)
5878 hl_device_reset(hdev, true, false);
5879 break;
5880
5881 case GAUDI_EVENT_TPC0_BMON_SPMU:
5882 case GAUDI_EVENT_TPC1_BMON_SPMU:
5883 case GAUDI_EVENT_TPC2_BMON_SPMU:
5884 case GAUDI_EVENT_TPC3_BMON_SPMU:
5885 case GAUDI_EVENT_TPC4_BMON_SPMU:
5886 case GAUDI_EVENT_TPC5_BMON_SPMU:
5887 case GAUDI_EVENT_TPC6_BMON_SPMU:
5888 case GAUDI_EVENT_TPC7_BMON_SPMU:
5889 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5890 gaudi_print_irq_info(hdev, event_type, false);
5891 hl_fw_unmask_irq(hdev, event_type);
5892 break;
5893
5894 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5895 gaudi_print_clk_change_info(hdev, event_type);
5896 hl_fw_unmask_irq(hdev, event_type);
5897 break;
5898
5899 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5900 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5901 dev_err(hdev->dev,
5902 "Received high temp H/W interrupt %d (cause %d)\n",
5903 event_type, cause);
5904 break;
5905
5906 default:
5907 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5908 event_type);
5909 break;
5910 }
5911 }
5912
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)5913 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5914 u32 *size)
5915 {
5916 struct gaudi_device *gaudi = hdev->asic_specific;
5917
5918 if (aggregate) {
5919 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5920 return gaudi->events_stat_aggregate;
5921 }
5922
5923 *size = (u32) sizeof(gaudi->events_stat);
5924 return gaudi->events_stat;
5925 }
5926
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5927 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5928 u32 flags)
5929 {
5930 struct gaudi_device *gaudi = hdev->asic_specific;
5931 u32 status, timeout_usec;
5932 int rc;
5933
5934 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5935 hdev->hard_reset_pending)
5936 return 0;
5937
5938 if (hdev->pldm)
5939 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5940 else
5941 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5942
5943 mutex_lock(&hdev->mmu_cache_lock);
5944
5945 /* L0 & L1 invalidation */
5946 WREG32(mmSTLB_INV_PS, 3);
5947 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5948 WREG32(mmSTLB_INV_PS, 2);
5949
5950 rc = hl_poll_timeout(
5951 hdev,
5952 mmSTLB_INV_PS,
5953 status,
5954 !status,
5955 1000,
5956 timeout_usec);
5957
5958 WREG32(mmSTLB_INV_SET, 0);
5959
5960 mutex_unlock(&hdev->mmu_cache_lock);
5961
5962 if (rc) {
5963 dev_err_ratelimited(hdev->dev,
5964 "MMU cache invalidation timeout\n");
5965 hl_device_reset(hdev, true, false);
5966 }
5967
5968 return rc;
5969 }
5970
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 asid,u64 va,u64 size)5971 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5972 bool is_hard, u32 asid, u64 va, u64 size)
5973 {
5974 struct gaudi_device *gaudi = hdev->asic_specific;
5975 u32 status, timeout_usec;
5976 u32 inv_data;
5977 u32 pi;
5978 int rc;
5979
5980 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5981 hdev->hard_reset_pending)
5982 return 0;
5983
5984 mutex_lock(&hdev->mmu_cache_lock);
5985
5986 if (hdev->pldm)
5987 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5988 else
5989 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5990
5991 /*
5992 * TODO: currently invalidate entire L0 & L1 as in regular hard
5993 * invalidation. Need to apply invalidation of specific cache
5994 * lines with mask of ASID & VA & size.
5995 * Note that L1 with be flushed entirely in any case.
5996 */
5997
5998 /* L0 & L1 invalidation */
5999 inv_data = RREG32(mmSTLB_CACHE_INV);
6000 /* PI is 8 bit */
6001 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6002 WREG32(mmSTLB_CACHE_INV,
6003 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6004
6005 rc = hl_poll_timeout(
6006 hdev,
6007 mmSTLB_INV_CONSUMER_INDEX,
6008 status,
6009 status == pi,
6010 1000,
6011 timeout_usec);
6012
6013 mutex_unlock(&hdev->mmu_cache_lock);
6014
6015 if (rc) {
6016 dev_err_ratelimited(hdev->dev,
6017 "MMU cache invalidation timeout\n");
6018 hl_device_reset(hdev, true, false);
6019 }
6020
6021 return rc;
6022 }
6023
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)6024 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6025 u32 asid, u64 phys_addr)
6026 {
6027 u32 status, timeout_usec;
6028 int rc;
6029
6030 if (hdev->pldm)
6031 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6032 else
6033 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6034
6035 WREG32(MMU_ASID, asid);
6036 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6037 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6038 WREG32(MMU_BUSY, 0x80000000);
6039
6040 rc = hl_poll_timeout(
6041 hdev,
6042 MMU_BUSY,
6043 status,
6044 !(status & 0x80000000),
6045 1000,
6046 timeout_usec);
6047
6048 if (rc) {
6049 dev_err(hdev->dev,
6050 "Timeout during MMU hop0 config of asid %d\n", asid);
6051 return rc;
6052 }
6053
6054 return 0;
6055 }
6056
gaudi_send_heartbeat(struct hl_device * hdev)6057 static int gaudi_send_heartbeat(struct hl_device *hdev)
6058 {
6059 struct gaudi_device *gaudi = hdev->asic_specific;
6060
6061 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6062 return 0;
6063
6064 return hl_fw_send_heartbeat(hdev);
6065 }
6066
gaudi_cpucp_info_get(struct hl_device * hdev)6067 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6068 {
6069 struct gaudi_device *gaudi = hdev->asic_specific;
6070 struct asic_fixed_properties *prop = &hdev->asic_prop;
6071 int rc;
6072
6073 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6074 return 0;
6075
6076 rc = hl_fw_cpucp_info_get(hdev);
6077 if (rc)
6078 return rc;
6079
6080 if (!strlen(prop->cpucp_info.card_name))
6081 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6082 CARD_NAME_MAX_LEN);
6083
6084 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6085
6086 if (hdev->card_type == cpucp_card_type_pci)
6087 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6088 else if (hdev->card_type == cpucp_card_type_pmc)
6089 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6090
6091 hdev->max_power = prop->max_power_default;
6092
6093 return 0;
6094 }
6095
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask,struct seq_file * s)6096 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6097 struct seq_file *s)
6098 {
6099 struct gaudi_device *gaudi = hdev->asic_specific;
6100 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6101 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6102 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6103 bool is_idle = true, is_eng_idle, is_slave;
6104 u64 offset;
6105 int i, dma_id;
6106
6107 mutex_lock(&gaudi->clk_gate_mutex);
6108
6109 hdev->asic_funcs->disable_clock_gating(hdev);
6110
6111 if (s)
6112 seq_puts(s,
6113 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6114 "--- ------- ------------ ---------- -------------\n");
6115
6116 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6117 dma_id = gaudi_dma_assignment[i];
6118 offset = dma_id * DMA_QMAN_OFFSET;
6119
6120 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6121 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6122 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6123 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6124 IS_DMA_IDLE(dma_core_sts0);
6125 is_idle &= is_eng_idle;
6126
6127 if (mask)
6128 *mask |= ((u64) !is_eng_idle) <<
6129 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6130 if (s)
6131 seq_printf(s, fmt, dma_id,
6132 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6133 qm_cgm_sts, dma_core_sts0);
6134 }
6135
6136 if (s)
6137 seq_puts(s,
6138 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6139 "--- ------- ------------ ---------- ----------\n");
6140
6141 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6142 offset = i * TPC_QMAN_OFFSET;
6143 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6144 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6145 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6146 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6147 IS_TPC_IDLE(tpc_cfg_sts);
6148 is_idle &= is_eng_idle;
6149
6150 if (mask)
6151 *mask |= ((u64) !is_eng_idle) <<
6152 (GAUDI_ENGINE_ID_TPC_0 + i);
6153 if (s)
6154 seq_printf(s, fmt, i,
6155 is_eng_idle ? "Y" : "N",
6156 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6157 }
6158
6159 if (s)
6160 seq_puts(s,
6161 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6162 "--- ------- ------------ ---------- -----------\n");
6163
6164 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6165 offset = i * MME_QMAN_OFFSET;
6166 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6167 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6168
6169 /* MME 1 & 3 are slaves, no need to check their QMANs */
6170 is_slave = i % 2;
6171 if (!is_slave) {
6172 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6173 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6174 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6175 }
6176
6177 is_idle &= is_eng_idle;
6178
6179 if (mask)
6180 *mask |= ((u64) !is_eng_idle) <<
6181 (GAUDI_ENGINE_ID_MME_0 + i);
6182 if (s) {
6183 if (!is_slave)
6184 seq_printf(s, fmt, i,
6185 is_eng_idle ? "Y" : "N",
6186 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6187 else
6188 seq_printf(s, mme_slave_fmt, i,
6189 is_eng_idle ? "Y" : "N", "-",
6190 "-", mme_arch_sts);
6191 }
6192 }
6193
6194 if (s)
6195 seq_puts(s, "\n");
6196
6197 hdev->asic_funcs->set_clock_gating(hdev);
6198
6199 mutex_unlock(&gaudi->clk_gate_mutex);
6200
6201 return is_idle;
6202 }
6203
gaudi_hw_queues_lock(struct hl_device * hdev)6204 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6205 __acquires(&gaudi->hw_queues_lock)
6206 {
6207 struct gaudi_device *gaudi = hdev->asic_specific;
6208
6209 spin_lock(&gaudi->hw_queues_lock);
6210 }
6211
gaudi_hw_queues_unlock(struct hl_device * hdev)6212 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6213 __releases(&gaudi->hw_queues_lock)
6214 {
6215 struct gaudi_device *gaudi = hdev->asic_specific;
6216
6217 spin_unlock(&gaudi->hw_queues_lock);
6218 }
6219
gaudi_get_pci_id(struct hl_device * hdev)6220 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6221 {
6222 return hdev->pdev->device;
6223 }
6224
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)6225 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6226 size_t max_size)
6227 {
6228 struct gaudi_device *gaudi = hdev->asic_specific;
6229
6230 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6231 return 0;
6232
6233 return hl_fw_get_eeprom_data(hdev, data, max_size);
6234 }
6235
6236 /*
6237 * this function should be used only during initialization and/or after reset,
6238 * when there are no active users.
6239 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)6240 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6241 u32 tpc_id)
6242 {
6243 struct gaudi_device *gaudi = hdev->asic_specific;
6244 u64 kernel_timeout;
6245 u32 status, offset;
6246 int rc;
6247
6248 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6249
6250 if (hdev->pldm)
6251 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6252 else
6253 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6254
6255 mutex_lock(&gaudi->clk_gate_mutex);
6256
6257 hdev->asic_funcs->disable_clock_gating(hdev);
6258
6259 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6260 lower_32_bits(tpc_kernel));
6261 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6262 upper_32_bits(tpc_kernel));
6263
6264 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6265 lower_32_bits(tpc_kernel));
6266 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6267 upper_32_bits(tpc_kernel));
6268 /* set a valid LUT pointer, content is of no significance */
6269 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6270 lower_32_bits(tpc_kernel));
6271 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6272 upper_32_bits(tpc_kernel));
6273
6274 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6275 lower_32_bits(CFG_BASE +
6276 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6277
6278 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6279 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6280 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6281 /* wait a bit for the engine to start executing */
6282 usleep_range(1000, 1500);
6283
6284 /* wait until engine has finished executing */
6285 rc = hl_poll_timeout(
6286 hdev,
6287 mmTPC0_CFG_STATUS + offset,
6288 status,
6289 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6290 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6291 1000,
6292 kernel_timeout);
6293
6294 if (rc) {
6295 dev_err(hdev->dev,
6296 "Timeout while waiting for TPC%d icache prefetch\n",
6297 tpc_id);
6298 hdev->asic_funcs->set_clock_gating(hdev);
6299 mutex_unlock(&gaudi->clk_gate_mutex);
6300 return -EIO;
6301 }
6302
6303 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6304 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6305
6306 /* wait a bit for the engine to start executing */
6307 usleep_range(1000, 1500);
6308
6309 /* wait until engine has finished executing */
6310 rc = hl_poll_timeout(
6311 hdev,
6312 mmTPC0_CFG_STATUS + offset,
6313 status,
6314 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6315 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6316 1000,
6317 kernel_timeout);
6318
6319 if (rc) {
6320 dev_err(hdev->dev,
6321 "Timeout while waiting for TPC%d vector pipe\n",
6322 tpc_id);
6323 hdev->asic_funcs->set_clock_gating(hdev);
6324 mutex_unlock(&gaudi->clk_gate_mutex);
6325 return -EIO;
6326 }
6327
6328 rc = hl_poll_timeout(
6329 hdev,
6330 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6331 status,
6332 (status == 0),
6333 1000,
6334 kernel_timeout);
6335
6336 hdev->asic_funcs->set_clock_gating(hdev);
6337 mutex_unlock(&gaudi->clk_gate_mutex);
6338
6339 if (rc) {
6340 dev_err(hdev->dev,
6341 "Timeout while waiting for TPC%d kernel to execute\n",
6342 tpc_id);
6343 return -EIO;
6344 }
6345
6346 return 0;
6347 }
6348
gaudi_get_hw_state(struct hl_device * hdev)6349 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6350 {
6351 return RREG32(mmHW_STATE);
6352 }
6353
gaudi_ctx_init(struct hl_ctx * ctx)6354 static int gaudi_ctx_init(struct hl_ctx *ctx)
6355 {
6356 return 0;
6357 }
6358
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)6359 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6360 {
6361 return gaudi_cq_assignment[cq_idx];
6362 }
6363
gaudi_get_signal_cb_size(struct hl_device * hdev)6364 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6365 {
6366 return sizeof(struct packet_msg_short) +
6367 sizeof(struct packet_msg_prot) * 2;
6368 }
6369
gaudi_get_wait_cb_size(struct hl_device * hdev)6370 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6371 {
6372 return sizeof(struct packet_msg_short) * 4 +
6373 sizeof(struct packet_fence) +
6374 sizeof(struct packet_msg_prot) * 2;
6375 }
6376
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id)6377 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6378 {
6379 struct hl_cb *cb = (struct hl_cb *) data;
6380 struct packet_msg_short *pkt;
6381 u32 value, ctl;
6382
6383 pkt = cb->kernel_address;
6384 memset(pkt, 0, sizeof(*pkt));
6385
6386 /* Inc by 1, Mode ADD */
6387 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6388 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6389
6390 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6391 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6392 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6393 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6394 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6395 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6396 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6397
6398 pkt->value = cpu_to_le32(value);
6399 pkt->ctl = cpu_to_le32(ctl);
6400 }
6401
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)6402 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6403 u16 addr)
6404 {
6405 u32 ctl, pkt_size = sizeof(*pkt);
6406
6407 memset(pkt, 0, pkt_size);
6408
6409 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6410 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6411 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6412 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6413 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6414 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6415
6416 pkt->value = cpu_to_le32(value);
6417 pkt->ctl = cpu_to_le32(ctl);
6418
6419 return pkt_size;
6420 }
6421
gaudi_add_arm_monitor_pkt(struct packet_msg_short * pkt,u16 sob_id,u16 sob_val,u16 addr)6422 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6423 u16 sob_val, u16 addr)
6424 {
6425 u32 ctl, value, pkt_size = sizeof(*pkt);
6426 u8 mask = ~(1 << (sob_id & 0x7));
6427
6428 memset(pkt, 0, pkt_size);
6429
6430 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6431 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6432 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6433 0); /* GREATER OR EQUAL*/
6434 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6435
6436 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6437 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6438 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6439 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6440 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6441 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6442 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6443
6444 pkt->value = cpu_to_le32(value);
6445 pkt->ctl = cpu_to_le32(ctl);
6446
6447 return pkt_size;
6448 }
6449
gaudi_add_fence_pkt(struct packet_fence * pkt)6450 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6451 {
6452 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6453
6454 memset(pkt, 0, pkt_size);
6455
6456 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6457 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6458 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6459
6460 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6461 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6462 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6463 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6464
6465 pkt->cfg = cpu_to_le32(cfg);
6466 pkt->ctl = cpu_to_le32(ctl);
6467
6468 return pkt_size;
6469 }
6470
gaudi_gen_wait_cb(struct hl_device * hdev,void * data,u16 sob_id,u16 sob_val,u16 mon_id,u32 q_idx)6471 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6472 u16 sob_val, u16 mon_id, u32 q_idx)
6473 {
6474 struct hl_cb *cb = (struct hl_cb *) data;
6475 void *buf = cb->kernel_address;
6476 u64 monitor_base, fence_addr = 0;
6477 u32 size = 0;
6478 u16 msg_addr_offset;
6479
6480 switch (q_idx) {
6481 case GAUDI_QUEUE_ID_DMA_0_0:
6482 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6483 break;
6484 case GAUDI_QUEUE_ID_DMA_0_1:
6485 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6486 break;
6487 case GAUDI_QUEUE_ID_DMA_0_2:
6488 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6489 break;
6490 case GAUDI_QUEUE_ID_DMA_0_3:
6491 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6492 break;
6493 case GAUDI_QUEUE_ID_DMA_1_0:
6494 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6495 break;
6496 case GAUDI_QUEUE_ID_DMA_1_1:
6497 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6498 break;
6499 case GAUDI_QUEUE_ID_DMA_1_2:
6500 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6501 break;
6502 case GAUDI_QUEUE_ID_DMA_1_3:
6503 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6504 break;
6505 case GAUDI_QUEUE_ID_DMA_5_0:
6506 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6507 break;
6508 case GAUDI_QUEUE_ID_DMA_5_1:
6509 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6510 break;
6511 case GAUDI_QUEUE_ID_DMA_5_2:
6512 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6513 break;
6514 case GAUDI_QUEUE_ID_DMA_5_3:
6515 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6516 break;
6517 default:
6518 /* queue index should be valid here */
6519 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6520 q_idx);
6521 return;
6522 }
6523
6524 fence_addr += CFG_BASE;
6525
6526 /*
6527 * monitor_base should be the content of the base0 address registers,
6528 * so it will be added to the msg short offsets
6529 */
6530 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6531
6532 /* First monitor config packet: low address of the sync */
6533 msg_addr_offset =
6534 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6535 monitor_base;
6536
6537 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6538 msg_addr_offset);
6539
6540 /* Second monitor config packet: high address of the sync */
6541 msg_addr_offset =
6542 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6543 monitor_base;
6544
6545 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6546 msg_addr_offset);
6547
6548 /*
6549 * Third monitor config packet: the payload, i.e. what to write when the
6550 * sync triggers
6551 */
6552 msg_addr_offset =
6553 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6554 monitor_base;
6555
6556 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6557
6558 /* Fourth monitor config packet: bind the monitor to a sync object */
6559 msg_addr_offset =
6560 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6561 monitor_base;
6562 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6563 msg_addr_offset);
6564
6565 /* Fence packet */
6566 size += gaudi_add_fence_pkt(buf + size);
6567 }
6568
gaudi_reset_sob(struct hl_device * hdev,void * data)6569 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6570 {
6571 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6572
6573 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6574 hw_sob->sob_id);
6575
6576 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6577 0);
6578
6579 kref_init(&hw_sob->kref);
6580 }
6581
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)6582 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6583 {
6584 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6585 HL_POWER9_HOST_MAGIC) {
6586 hdev->power9_64bit_dma_enable = 1;
6587 hdev->dma_mask = 64;
6588 } else {
6589 hdev->power9_64bit_dma_enable = 0;
6590 hdev->dma_mask = 48;
6591 }
6592 }
6593
gaudi_get_device_time(struct hl_device * hdev)6594 static u64 gaudi_get_device_time(struct hl_device *hdev)
6595 {
6596 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6597
6598 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6599 }
6600
6601 static const struct hl_asic_funcs gaudi_funcs = {
6602 .early_init = gaudi_early_init,
6603 .early_fini = gaudi_early_fini,
6604 .late_init = gaudi_late_init,
6605 .late_fini = gaudi_late_fini,
6606 .sw_init = gaudi_sw_init,
6607 .sw_fini = gaudi_sw_fini,
6608 .hw_init = gaudi_hw_init,
6609 .hw_fini = gaudi_hw_fini,
6610 .halt_engines = gaudi_halt_engines,
6611 .suspend = gaudi_suspend,
6612 .resume = gaudi_resume,
6613 .cb_mmap = gaudi_cb_mmap,
6614 .ring_doorbell = gaudi_ring_doorbell,
6615 .pqe_write = gaudi_pqe_write,
6616 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6617 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6618 .get_int_queue_base = gaudi_get_int_queue_base,
6619 .test_queues = gaudi_test_queues,
6620 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6621 .asic_dma_pool_free = gaudi_dma_pool_free,
6622 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6623 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6624 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6625 .cs_parser = gaudi_cs_parser,
6626 .asic_dma_map_sg = gaudi_dma_map_sg,
6627 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6628 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6629 .update_eq_ci = gaudi_update_eq_ci,
6630 .context_switch = gaudi_context_switch,
6631 .restore_phase_topology = gaudi_restore_phase_topology,
6632 .debugfs_read32 = gaudi_debugfs_read32,
6633 .debugfs_write32 = gaudi_debugfs_write32,
6634 .debugfs_read64 = gaudi_debugfs_read64,
6635 .debugfs_write64 = gaudi_debugfs_write64,
6636 .add_device_attr = gaudi_add_device_attr,
6637 .handle_eqe = gaudi_handle_eqe,
6638 .set_pll_profile = gaudi_set_pll_profile,
6639 .get_events_stat = gaudi_get_events_stat,
6640 .read_pte = gaudi_read_pte,
6641 .write_pte = gaudi_write_pte,
6642 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6643 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6644 .send_heartbeat = gaudi_send_heartbeat,
6645 .set_clock_gating = gaudi_set_clock_gating,
6646 .disable_clock_gating = gaudi_disable_clock_gating,
6647 .debug_coresight = gaudi_debug_coresight,
6648 .is_device_idle = gaudi_is_device_idle,
6649 .soft_reset_late_init = gaudi_soft_reset_late_init,
6650 .hw_queues_lock = gaudi_hw_queues_lock,
6651 .hw_queues_unlock = gaudi_hw_queues_unlock,
6652 .get_pci_id = gaudi_get_pci_id,
6653 .get_eeprom_data = gaudi_get_eeprom_data,
6654 .send_cpu_message = gaudi_send_cpu_message,
6655 .get_hw_state = gaudi_get_hw_state,
6656 .pci_bars_map = gaudi_pci_bars_map,
6657 .init_iatu = gaudi_init_iatu,
6658 .rreg = hl_rreg,
6659 .wreg = hl_wreg,
6660 .halt_coresight = gaudi_halt_coresight,
6661 .ctx_init = gaudi_ctx_init,
6662 .get_clk_rate = gaudi_get_clk_rate,
6663 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6664 .read_device_fw_version = gaudi_read_device_fw_version,
6665 .load_firmware_to_device = gaudi_load_firmware_to_device,
6666 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6667 .get_signal_cb_size = gaudi_get_signal_cb_size,
6668 .get_wait_cb_size = gaudi_get_wait_cb_size,
6669 .gen_signal_cb = gaudi_gen_signal_cb,
6670 .gen_wait_cb = gaudi_gen_wait_cb,
6671 .reset_sob = gaudi_reset_sob,
6672 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6673 .get_device_time = gaudi_get_device_time
6674 };
6675
6676 /**
6677 * gaudi_set_asic_funcs - set GAUDI function pointers
6678 *
6679 * @hdev: pointer to hl_device structure
6680 *
6681 */
gaudi_set_asic_funcs(struct hl_device * hdev)6682 void gaudi_set_asic_funcs(struct hl_device *hdev)
6683 {
6684 hdev->asic_funcs = &gaudi_funcs;
6685 }
6686