1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
31 
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
35 #include "kfd_priv.h"
36 #include "kfd_pm4_opcodes.h"
37 #include "cik_regs.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41 
dbgdev_address_watch_disable_nodiq(struct kfd_dev * dev)42 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
43 {
44 	dev->kfd2kgd->address_watch_disable(dev->kgd);
45 }
46 
dbgdev_diq_submit_ib(struct kfd_dbgdev * dbgdev,unsigned int pasid,uint64_t vmid0_address,uint32_t * packet_buff,size_t size_in_bytes)47 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
48 				unsigned int pasid, uint64_t vmid0_address,
49 				uint32_t *packet_buff, size_t size_in_bytes)
50 {
51 	struct pm4__release_mem *rm_packet;
52 	struct pm4__indirect_buffer_pasid *ib_packet;
53 	struct kfd_mem_obj *mem_obj;
54 	size_t pq_packets_size_in_bytes;
55 	union ULARGE_INTEGER *largep;
56 	union ULARGE_INTEGER addr;
57 	struct kernel_queue *kq;
58 	uint64_t *rm_state;
59 	unsigned int *ib_packet_buff;
60 	int status;
61 
62 	if (WARN_ON(!size_in_bytes))
63 		return -EINVAL;
64 
65 	kq = dbgdev->kq;
66 
67 	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
68 				sizeof(struct pm4__indirect_buffer_pasid);
69 
70 	/*
71 	 * We acquire a buffer from DIQ
72 	 * The receive packet buff will be sitting on the Indirect Buffer
73 	 * and in the PQ we put the IB packet + sync packet(s).
74 	 */
75 	status = kq->ops.acquire_packet_buffer(kq,
76 				pq_packets_size_in_bytes / sizeof(uint32_t),
77 				&ib_packet_buff);
78 	if (status) {
79 		pr_err("acquire_packet_buffer failed\n");
80 		return status;
81 	}
82 
83 	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
84 
85 	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
86 
87 	ib_packet->header.count = 3;
88 	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
89 	ib_packet->header.type = PM4_TYPE_3;
90 
91 	largep = (union ULARGE_INTEGER *) &vmid0_address;
92 
93 	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
94 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
95 
96 	ib_packet->control = (1 << 23) | (1 << 31) |
97 			((size_in_bytes / 4) & 0xfffff);
98 
99 	ib_packet->bitfields5.pasid = pasid;
100 
101 	/*
102 	 * for now we use release mem for GPU-CPU synchronization
103 	 * Consider WaitRegMem + WriteData as a better alternative
104 	 * we get a GART allocations ( gpu/cpu mapping),
105 	 * for the sync variable, and wait until:
106 	 * (a) Sync with HW
107 	 * (b) Sync var is written by CP to mem.
108 	 */
109 	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
110 			(sizeof(struct pm4__indirect_buffer_pasid) /
111 					sizeof(unsigned int)));
112 
113 	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
114 					&mem_obj);
115 
116 	if (status) {
117 		pr_err("Failed to allocate GART memory\n");
118 		kq->ops.rollback_packet(kq);
119 		return status;
120 	}
121 
122 	rm_state = (uint64_t *) mem_obj->cpu_ptr;
123 
124 	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
125 
126 	rm_packet->header.opcode = IT_RELEASE_MEM;
127 	rm_packet->header.type = PM4_TYPE_3;
128 	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
129 
130 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
131 	rm_packet->bitfields2.event_index =
132 				event_index___release_mem__end_of_pipe;
133 
134 	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
135 	rm_packet->bitfields2.atc = 0;
136 	rm_packet->bitfields2.tc_wb_action_ena = 1;
137 
138 	addr.quad_part = mem_obj->gpu_addr;
139 
140 	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
141 	rm_packet->address_hi = addr.u.high_part;
142 
143 	rm_packet->bitfields3.data_sel =
144 				data_sel___release_mem__send_64_bit_data;
145 
146 	rm_packet->bitfields3.int_sel =
147 			int_sel___release_mem__send_data_after_write_confirm;
148 
149 	rm_packet->bitfields3.dst_sel =
150 			dst_sel___release_mem__memory_controller;
151 
152 	rm_packet->data_lo = QUEUESTATE__ACTIVE;
153 
154 	kq->ops.submit_packet(kq);
155 
156 	/* Wait till CP writes sync code: */
157 	status = amdkfd_fence_wait_timeout(
158 			(unsigned int *) rm_state,
159 			QUEUESTATE__ACTIVE, 1500);
160 
161 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
162 
163 	return status;
164 }
165 
dbgdev_register_nodiq(struct kfd_dbgdev * dbgdev)166 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
167 {
168 	/*
169 	 * no action is needed in this case,
170 	 * just make sure diq will not be used
171 	 */
172 
173 	dbgdev->kq = NULL;
174 
175 	return 0;
176 }
177 
dbgdev_register_diq(struct kfd_dbgdev * dbgdev)178 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
179 {
180 	struct queue_properties properties;
181 	unsigned int qid;
182 	struct kernel_queue *kq = NULL;
183 	int status;
184 
185 	properties.type = KFD_QUEUE_TYPE_DIQ;
186 
187 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188 				&properties, &qid);
189 
190 	if (status) {
191 		pr_err("Failed to create DIQ\n");
192 		return status;
193 	}
194 
195 	pr_debug("DIQ Created with queue id: %d\n", qid);
196 
197 	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
198 
199 	if (!kq) {
200 		pr_err("Error getting DIQ\n");
201 		pqm_destroy_queue(dbgdev->pqm, qid);
202 		return -EFAULT;
203 	}
204 
205 	dbgdev->kq = kq;
206 
207 	return status;
208 }
209 
dbgdev_unregister_nodiq(struct kfd_dbgdev * dbgdev)210 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
211 {
212 	/* disable watch address */
213 	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
214 	return 0;
215 }
216 
dbgdev_unregister_diq(struct kfd_dbgdev * dbgdev)217 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
218 {
219 	/* todo - disable address watch */
220 	int status;
221 
222 	status = pqm_destroy_queue(dbgdev->pqm,
223 			dbgdev->kq->queue->properties.queue_id);
224 	dbgdev->kq = NULL;
225 
226 	return status;
227 }
228 
dbgdev_address_watch_set_registers(const struct dbg_address_watch_info * adw_info,union TCP_WATCH_ADDR_H_BITS * addrHi,union TCP_WATCH_ADDR_L_BITS * addrLo,union TCP_WATCH_CNTL_BITS * cntl,unsigned int index,unsigned int vmid)229 static void dbgdev_address_watch_set_registers(
230 			const struct dbg_address_watch_info *adw_info,
231 			union TCP_WATCH_ADDR_H_BITS *addrHi,
232 			union TCP_WATCH_ADDR_L_BITS *addrLo,
233 			union TCP_WATCH_CNTL_BITS *cntl,
234 			unsigned int index, unsigned int vmid)
235 {
236 	union ULARGE_INTEGER addr;
237 
238 	addr.quad_part = 0;
239 	addrHi->u32All = 0;
240 	addrLo->u32All = 0;
241 	cntl->u32All = 0;
242 
243 	if (adw_info->watch_mask)
244 		cntl->bitfields.mask =
245 			(uint32_t) (adw_info->watch_mask[index] &
246 					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
247 	else
248 		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
249 
250 	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
251 
252 	addrHi->bitfields.addr = addr.u.high_part &
253 					ADDRESS_WATCH_REG_ADDHIGH_MASK;
254 	addrLo->bitfields.addr =
255 			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
256 
257 	cntl->bitfields.mode = adw_info->watch_mode[index];
258 	cntl->bitfields.vmid = (uint32_t) vmid;
259 	/* for now assume it is an ATC address */
260 	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
261 
262 	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
263 	pr_debug("\t\t%20s %08x\n", "set reg add high :",
264 			addrHi->bitfields.addr);
265 	pr_debug("\t\t%20s %08x\n", "set reg add low :",
266 			addrLo->bitfields.addr);
267 }
268 
dbgdev_address_watch_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)269 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
270 				      struct dbg_address_watch_info *adw_info)
271 {
272 	union TCP_WATCH_ADDR_H_BITS addrHi;
273 	union TCP_WATCH_ADDR_L_BITS addrLo;
274 	union TCP_WATCH_CNTL_BITS cntl;
275 	struct kfd_process_device *pdd;
276 	unsigned int i;
277 
278 	/* taking the vmid for that process on the safe way using pdd */
279 	pdd = kfd_get_process_device_data(dbgdev->dev,
280 					adw_info->process);
281 	if (!pdd) {
282 		pr_err("Failed to get pdd for wave control no DIQ\n");
283 		return -EFAULT;
284 	}
285 
286 	addrHi.u32All = 0;
287 	addrLo.u32All = 0;
288 	cntl.u32All = 0;
289 
290 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
291 			(adw_info->num_watch_points == 0)) {
292 		pr_err("num_watch_points is invalid\n");
293 		return -EINVAL;
294 	}
295 
296 	if (!adw_info->watch_mode || !adw_info->watch_address) {
297 		pr_err("adw_info fields are not valid\n");
298 		return -EINVAL;
299 	}
300 
301 	for (i = 0; i < adw_info->num_watch_points; i++) {
302 		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
303 						&cntl, i, pdd->qpd.vmid);
304 
305 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
306 		pr_debug("\t\t%20s %08x\n", "register index :", i);
307 		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
308 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
309 				addrLo.bitfields.addr);
310 		pr_debug("\t\t%20s %08x\n", "Address high is :",
311 				addrHi.bitfields.addr);
312 		pr_debug("\t\t%20s %08x\n", "Address high is :",
313 				addrHi.bitfields.addr);
314 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
315 				cntl.bitfields.mask);
316 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
317 				cntl.bitfields.mode);
318 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
319 				cntl.bitfields.vmid);
320 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
321 				cntl.bitfields.atc);
322 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
323 
324 		pdd->dev->kfd2kgd->address_watch_execute(
325 						dbgdev->dev->kgd,
326 						i,
327 						cntl.u32All,
328 						addrHi.u32All,
329 						addrLo.u32All);
330 	}
331 
332 	return 0;
333 }
334 
dbgdev_address_watch_diq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)335 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
336 				    struct dbg_address_watch_info *adw_info)
337 {
338 	struct pm4__set_config_reg *packets_vec;
339 	union TCP_WATCH_ADDR_H_BITS addrHi;
340 	union TCP_WATCH_ADDR_L_BITS addrLo;
341 	union TCP_WATCH_CNTL_BITS cntl;
342 	struct kfd_mem_obj *mem_obj;
343 	unsigned int aw_reg_add_dword;
344 	uint32_t *packet_buff_uint;
345 	unsigned int i;
346 	int status;
347 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
348 	/* we do not control the vmid in DIQ mode, just a place holder */
349 	unsigned int vmid = 0;
350 
351 	addrHi.u32All = 0;
352 	addrLo.u32All = 0;
353 	cntl.u32All = 0;
354 
355 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
356 			(adw_info->num_watch_points == 0)) {
357 		pr_err("num_watch_points is invalid\n");
358 		return -EINVAL;
359 	}
360 
361 	if (!adw_info->watch_mode || !adw_info->watch_address) {
362 		pr_err("adw_info fields are not valid\n");
363 		return -EINVAL;
364 	}
365 
366 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
367 
368 	if (status) {
369 		pr_err("Failed to allocate GART memory\n");
370 		return status;
371 	}
372 
373 	packet_buff_uint = mem_obj->cpu_ptr;
374 
375 	memset(packet_buff_uint, 0, ib_size);
376 
377 	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
378 
379 	packets_vec[0].header.count = 1;
380 	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
381 	packets_vec[0].header.type = PM4_TYPE_3;
382 	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
383 	packets_vec[0].bitfields2.insert_vmid = 1;
384 	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
385 	packets_vec[1].bitfields2.insert_vmid = 0;
386 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
387 	packets_vec[2].bitfields2.insert_vmid = 0;
388 	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
389 	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
390 	packets_vec[3].bitfields2.insert_vmid = 1;
391 
392 	for (i = 0; i < adw_info->num_watch_points; i++) {
393 		dbgdev_address_watch_set_registers(adw_info,
394 						&addrHi,
395 						&addrLo,
396 						&cntl,
397 						i,
398 						vmid);
399 
400 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
401 		pr_debug("\t\t%20s %08x\n", "register index :", i);
402 		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
403 		pr_debug("\t\t%20s %p\n", "Add ptr is :",
404 				adw_info->watch_address);
405 		pr_debug("\t\t%20s %08llx\n", "Add     is :",
406 				adw_info->watch_address[i]);
407 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
408 				addrLo.bitfields.addr);
409 		pr_debug("\t\t%20s %08x\n", "Address high is :",
410 				addrHi.bitfields.addr);
411 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
412 				cntl.bitfields.mask);
413 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
414 				cntl.bitfields.mode);
415 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
416 				cntl.bitfields.vmid);
417 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
418 				cntl.bitfields.atc);
419 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
420 
421 		aw_reg_add_dword =
422 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
423 					dbgdev->dev->kgd,
424 					i,
425 					ADDRESS_WATCH_REG_CNTL);
426 
427 		packets_vec[0].bitfields2.reg_offset =
428 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
429 
430 		packets_vec[0].reg_data[0] = cntl.u32All;
431 
432 		aw_reg_add_dword =
433 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
434 					dbgdev->dev->kgd,
435 					i,
436 					ADDRESS_WATCH_REG_ADDR_HI);
437 
438 		packets_vec[1].bitfields2.reg_offset =
439 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
440 		packets_vec[1].reg_data[0] = addrHi.u32All;
441 
442 		aw_reg_add_dword =
443 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
444 					dbgdev->dev->kgd,
445 					i,
446 					ADDRESS_WATCH_REG_ADDR_LO);
447 
448 		packets_vec[2].bitfields2.reg_offset =
449 				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
450 		packets_vec[2].reg_data[0] = addrLo.u32All;
451 
452 		/* enable watch flag if address is not zero*/
453 		if (adw_info->watch_address[i] > 0)
454 			cntl.bitfields.valid = 1;
455 		else
456 			cntl.bitfields.valid = 0;
457 
458 		aw_reg_add_dword =
459 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
460 					dbgdev->dev->kgd,
461 					i,
462 					ADDRESS_WATCH_REG_CNTL);
463 
464 		packets_vec[3].bitfields2.reg_offset =
465 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
466 		packets_vec[3].reg_data[0] = cntl.u32All;
467 
468 		status = dbgdev_diq_submit_ib(
469 					dbgdev,
470 					adw_info->process->pasid,
471 					mem_obj->gpu_addr,
472 					packet_buff_uint,
473 					ib_size);
474 
475 		if (status) {
476 			pr_err("Failed to submit IB to DIQ\n");
477 			break;
478 		}
479 	}
480 
481 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
482 	return status;
483 }
484 
dbgdev_wave_control_set_registers(struct dbg_wave_control_info * wac_info,union SQ_CMD_BITS * in_reg_sq_cmd,union GRBM_GFX_INDEX_BITS * in_reg_gfx_index)485 static int dbgdev_wave_control_set_registers(
486 				struct dbg_wave_control_info *wac_info,
487 				union SQ_CMD_BITS *in_reg_sq_cmd,
488 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
489 {
490 	int status = 0;
491 	union SQ_CMD_BITS reg_sq_cmd;
492 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
493 	struct HsaDbgWaveMsgAMDGen2 *pMsg;
494 
495 	reg_sq_cmd.u32All = 0;
496 	reg_gfx_index.u32All = 0;
497 	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
498 
499 	switch (wac_info->mode) {
500 	/* Send command to single wave */
501 	case HSA_DBG_WAVEMODE_SINGLE:
502 		/*
503 		 * Limit access to the process waves only,
504 		 * by setting vmid check
505 		 */
506 		reg_sq_cmd.bits.check_vmid = 1;
507 		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
508 		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
509 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
510 
511 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
512 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
513 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
514 
515 		break;
516 
517 	/* Send command to all waves with matching VMID */
518 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
519 
520 		reg_gfx_index.bits.sh_broadcast_writes = 1;
521 		reg_gfx_index.bits.se_broadcast_writes = 1;
522 		reg_gfx_index.bits.instance_broadcast_writes = 1;
523 
524 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
525 
526 		break;
527 
528 	/* Send command to all CU waves with matching VMID */
529 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
530 
531 		reg_sq_cmd.bits.check_vmid = 1;
532 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
533 
534 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
535 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
536 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
537 
538 		break;
539 
540 	default:
541 		return -EINVAL;
542 	}
543 
544 	switch (wac_info->operand) {
545 	case HSA_DBG_WAVEOP_HALT:
546 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
547 		break;
548 
549 	case HSA_DBG_WAVEOP_RESUME:
550 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
551 		break;
552 
553 	case HSA_DBG_WAVEOP_KILL:
554 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
555 		break;
556 
557 	case HSA_DBG_WAVEOP_DEBUG:
558 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
559 		break;
560 
561 	case HSA_DBG_WAVEOP_TRAP:
562 		if (wac_info->trapId < MAX_TRAPID) {
563 			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
564 			reg_sq_cmd.bits.trap_id = wac_info->trapId;
565 		} else {
566 			status = -EINVAL;
567 		}
568 		break;
569 
570 	default:
571 		status = -EINVAL;
572 		break;
573 	}
574 
575 	if (status == 0) {
576 		*in_reg_sq_cmd = reg_sq_cmd;
577 		*in_reg_gfx_index = reg_gfx_index;
578 	}
579 
580 	return status;
581 }
582 
dbgdev_wave_control_diq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)583 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
584 					struct dbg_wave_control_info *wac_info)
585 {
586 
587 	int status;
588 	union SQ_CMD_BITS reg_sq_cmd;
589 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
590 	struct kfd_mem_obj *mem_obj;
591 	uint32_t *packet_buff_uint;
592 	struct pm4__set_config_reg *packets_vec;
593 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
594 
595 	reg_sq_cmd.u32All = 0;
596 
597 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
598 							&reg_gfx_index);
599 	if (status) {
600 		pr_err("Failed to set wave control registers\n");
601 		return status;
602 	}
603 
604 	/* we do not control the VMID in DIQ, so reset it to a known value */
605 	reg_sq_cmd.bits.vm_id = 0;
606 
607 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
608 
609 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
610 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
611 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
612 	pr_debug("\t\t msg value is: %u\n",
613 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
614 	pr_debug("\t\t vmid      is: N/A\n");
615 
616 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
617 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
618 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
619 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
620 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
621 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
622 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
623 
624 	pr_debug("\t\t ibw       is : %u\n",
625 			reg_gfx_index.bitfields.instance_broadcast_writes);
626 	pr_debug("\t\t ii        is : %u\n",
627 			reg_gfx_index.bitfields.instance_index);
628 	pr_debug("\t\t sebw      is : %u\n",
629 			reg_gfx_index.bitfields.se_broadcast_writes);
630 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
631 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
632 	pr_debug("\t\t sbw       is : %u\n",
633 			reg_gfx_index.bitfields.sh_broadcast_writes);
634 
635 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
636 
637 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
638 
639 	if (status != 0) {
640 		pr_err("Failed to allocate GART memory\n");
641 		return status;
642 	}
643 
644 	packet_buff_uint = mem_obj->cpu_ptr;
645 
646 	memset(packet_buff_uint, 0, ib_size);
647 
648 	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
649 	packets_vec[0].header.count = 1;
650 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
651 	packets_vec[0].header.type = PM4_TYPE_3;
652 	packets_vec[0].bitfields2.reg_offset =
653 			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
654 
655 	packets_vec[0].bitfields2.insert_vmid = 0;
656 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
657 
658 	packets_vec[1].header.count = 1;
659 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
660 	packets_vec[1].header.type = PM4_TYPE_3;
661 	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
662 
663 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
664 	packets_vec[1].bitfields2.insert_vmid = 1;
665 	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
666 
667 	/* Restore the GRBM_GFX_INDEX register */
668 
669 	reg_gfx_index.u32All = 0;
670 	reg_gfx_index.bits.sh_broadcast_writes = 1;
671 	reg_gfx_index.bits.instance_broadcast_writes = 1;
672 	reg_gfx_index.bits.se_broadcast_writes = 1;
673 
674 
675 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
676 	packets_vec[2].bitfields2.reg_offset =
677 				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
678 
679 	packets_vec[2].bitfields2.insert_vmid = 0;
680 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
681 
682 	status = dbgdev_diq_submit_ib(
683 			dbgdev,
684 			wac_info->process->pasid,
685 			mem_obj->gpu_addr,
686 			packet_buff_uint,
687 			ib_size);
688 
689 	if (status)
690 		pr_err("Failed to submit IB to DIQ\n");
691 
692 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
693 
694 	return status;
695 }
696 
dbgdev_wave_control_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)697 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
698 					struct dbg_wave_control_info *wac_info)
699 {
700 	int status;
701 	union SQ_CMD_BITS reg_sq_cmd;
702 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
703 	struct kfd_process_device *pdd;
704 
705 	reg_sq_cmd.u32All = 0;
706 
707 	/* taking the VMID for that process on the safe way using PDD */
708 	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
709 
710 	if (!pdd) {
711 		pr_err("Failed to get pdd for wave control no DIQ\n");
712 		return -EFAULT;
713 	}
714 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
715 							&reg_gfx_index);
716 	if (status) {
717 		pr_err("Failed to set wave control registers\n");
718 		return status;
719 	}
720 
721 	/* for non DIQ we need to patch the VMID: */
722 
723 	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
724 
725 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
726 
727 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
728 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
729 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
730 	pr_debug("\t\t msg value is: %u\n",
731 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
732 	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
733 
734 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
735 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
736 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
737 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
738 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
739 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
740 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
741 
742 	pr_debug("\t\t ibw       is : %u\n",
743 			reg_gfx_index.bitfields.instance_broadcast_writes);
744 	pr_debug("\t\t ii        is : %u\n",
745 			reg_gfx_index.bitfields.instance_index);
746 	pr_debug("\t\t sebw      is : %u\n",
747 			reg_gfx_index.bitfields.se_broadcast_writes);
748 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
749 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
750 	pr_debug("\t\t sbw       is : %u\n",
751 			reg_gfx_index.bitfields.sh_broadcast_writes);
752 
753 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
754 
755 	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
756 							reg_gfx_index.u32All,
757 							reg_sq_cmd.u32All);
758 }
759 
dbgdev_wave_reset_wavefronts(struct kfd_dev * dev,struct kfd_process * p)760 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
761 {
762 	int status = 0;
763 	unsigned int vmid;
764 	union SQ_CMD_BITS reg_sq_cmd;
765 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
766 	struct kfd_process_device *pdd;
767 	struct dbg_wave_control_info wac_info;
768 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
769 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
770 
771 	reg_sq_cmd.u32All = 0;
772 	status = 0;
773 
774 	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
775 	wac_info.operand = HSA_DBG_WAVEOP_KILL;
776 
777 	pr_debug("Killing all process wavefronts\n");
778 
779 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
780 	 * ATC_VMID15_PASID_MAPPING
781 	 * to check which VMID the current process is mapped to.
782 	 */
783 
784 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
785 		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
786 				(dev->kgd, vmid)) {
787 			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
788 					(dev->kgd, vmid) == p->pasid) {
789 				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
790 						vmid, p->pasid);
791 				break;
792 			}
793 		}
794 	}
795 
796 	if (vmid > last_vmid_to_scan) {
797 		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
798 		return -EFAULT;
799 	}
800 
801 	/* taking the VMID for that process on the safe way using PDD */
802 	pdd = kfd_get_process_device_data(dev, p);
803 	if (!pdd)
804 		return -EFAULT;
805 
806 	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
807 			&reg_gfx_index);
808 	if (status != 0)
809 		return -EINVAL;
810 
811 	/* for non DIQ we need to patch the VMID: */
812 	reg_sq_cmd.bits.vm_id = vmid;
813 
814 	dev->kfd2kgd->wave_control_execute(dev->kgd,
815 					reg_gfx_index.u32All,
816 					reg_sq_cmd.u32All);
817 
818 	return 0;
819 }
820 
kfd_dbgdev_init(struct kfd_dbgdev * pdbgdev,struct kfd_dev * pdev,enum DBGDEV_TYPE type)821 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
822 			enum DBGDEV_TYPE type)
823 {
824 	pdbgdev->dev = pdev;
825 	pdbgdev->kq = NULL;
826 	pdbgdev->type = type;
827 	pdbgdev->pqm = NULL;
828 
829 	switch (type) {
830 	case DBGDEV_TYPE_NODIQ:
831 		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
832 		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
833 		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
834 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
835 		break;
836 	case DBGDEV_TYPE_DIQ:
837 	default:
838 		pdbgdev->dbgdev_register = dbgdev_register_diq;
839 		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
840 		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
841 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
842 		break;
843 	}
844 
845 }
846