1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "habanalabs.h"
9 
10 #include <linux/slab.h>
11 
12 /**
13  * This structure is used to schedule work of EQ entry and armcp_reset event
14  *
15  * @eq_work          - workqueue object to run when EQ entry is received
16  * @hdev             - pointer to device structure
17  * @eq_entry         - copy of the EQ entry
18  */
19 struct hl_eqe_work {
20 	struct work_struct	eq_work;
21 	struct hl_device	*hdev;
22 	struct hl_eq_entry	eq_entry;
23 };
24 
25 /*
26  * hl_cq_inc_ptr - increment ci or pi of cq
27  *
28  * @ptr: the current ci or pi value of the completion queue
29  *
30  * Increment ptr by 1. If it reaches the number of completion queue
31  * entries, set it to 0
32  */
hl_cq_inc_ptr(u32 ptr)33 inline u32 hl_cq_inc_ptr(u32 ptr)
34 {
35 	ptr++;
36 	if (unlikely(ptr == HL_CQ_LENGTH))
37 		ptr = 0;
38 	return ptr;
39 }
40 
41 /*
42  * hl_eq_inc_ptr - increment ci of eq
43  *
44  * @ptr: the current ci value of the event queue
45  *
46  * Increment ptr by 1. If it reaches the number of event queue
47  * entries, set it to 0
48  */
hl_eq_inc_ptr(u32 ptr)49 inline u32 hl_eq_inc_ptr(u32 ptr)
50 {
51 	ptr++;
52 	if (unlikely(ptr == HL_EQ_LENGTH))
53 		ptr = 0;
54 	return ptr;
55 }
56 
irq_handle_eqe(struct work_struct * work)57 static void irq_handle_eqe(struct work_struct *work)
58 {
59 	struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
60 							eq_work);
61 	struct hl_device *hdev = eqe_work->hdev;
62 
63 	hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
64 
65 	kfree(eqe_work);
66 }
67 
68 /*
69  * hl_irq_handler_cq - irq handler for completion queue
70  *
71  * @irq: irq number
72  * @arg: pointer to completion queue structure
73  *
74  */
hl_irq_handler_cq(int irq,void * arg)75 irqreturn_t hl_irq_handler_cq(int irq, void *arg)
76 {
77 	struct hl_cq *cq = arg;
78 	struct hl_device *hdev = cq->hdev;
79 	struct hl_hw_queue *queue;
80 	struct hl_cs_job *job;
81 	bool shadow_index_valid;
82 	u16 shadow_index;
83 	struct hl_cq_entry *cq_entry, *cq_base;
84 
85 	if (hdev->disabled) {
86 		dev_dbg(hdev->dev,
87 			"Device disabled but received IRQ %d for CQ %d\n",
88 			irq, cq->hw_queue_id);
89 		return IRQ_HANDLED;
90 	}
91 
92 	cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
93 
94 	while (1) {
95 		bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
96 					CQ_ENTRY_READY_MASK)
97 						>> CQ_ENTRY_READY_SHIFT);
98 
99 		if (!entry_ready)
100 			break;
101 
102 		cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
103 
104 		/* Make sure we read CQ entry contents after we've
105 		 * checked the ownership bit.
106 		 */
107 		dma_rmb();
108 
109 		shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
110 					CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
111 					>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
112 
113 		shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
114 					CQ_ENTRY_SHADOW_INDEX_MASK)
115 					>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
116 
117 		queue = &hdev->kernel_queues[cq->hw_queue_id];
118 
119 		if ((shadow_index_valid) && (!hdev->disabled)) {
120 			job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
121 			queue_work(hdev->cq_wq, &job->finish_work);
122 		}
123 
124 		/* Update ci of the context's queue. There is no
125 		 * need to protect it with spinlock because this update is
126 		 * done only inside IRQ and there is a different IRQ per
127 		 * queue
128 		 */
129 		queue->ci = hl_queue_inc_ptr(queue->ci);
130 
131 		/* Clear CQ entry ready bit */
132 		cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
133 						~CQ_ENTRY_READY_MASK);
134 
135 		cq->ci = hl_cq_inc_ptr(cq->ci);
136 
137 		/* Increment free slots */
138 		atomic_inc(&cq->free_slots_cnt);
139 	}
140 
141 	return IRQ_HANDLED;
142 }
143 
144 /*
145  * hl_irq_handler_eq - irq handler for event queue
146  *
147  * @irq: irq number
148  * @arg: pointer to event queue structure
149  *
150  */
hl_irq_handler_eq(int irq,void * arg)151 irqreturn_t hl_irq_handler_eq(int irq, void *arg)
152 {
153 	struct hl_eq *eq = arg;
154 	struct hl_device *hdev = eq->hdev;
155 	struct hl_eq_entry *eq_entry;
156 	struct hl_eq_entry *eq_base;
157 	struct hl_eqe_work *handle_eqe_work;
158 
159 	eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address;
160 
161 	while (1) {
162 		bool entry_ready =
163 			((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
164 				EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
165 
166 		if (!entry_ready)
167 			break;
168 
169 		eq_entry = &eq_base[eq->ci];
170 
171 		/*
172 		 * Make sure we read EQ entry contents after we've
173 		 * checked the ownership bit.
174 		 */
175 		dma_rmb();
176 
177 		if (hdev->disabled) {
178 			dev_warn(hdev->dev,
179 				"Device disabled but received IRQ %d for EQ\n",
180 					irq);
181 			goto skip_irq;
182 		}
183 
184 		handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
185 		if (handle_eqe_work) {
186 			INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
187 			handle_eqe_work->hdev = hdev;
188 
189 			memcpy(&handle_eqe_work->eq_entry, eq_entry,
190 					sizeof(*eq_entry));
191 
192 			queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
193 		}
194 skip_irq:
195 		/* Clear EQ entry ready bit */
196 		eq_entry->hdr.ctl =
197 			cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
198 							~EQ_CTL_READY_MASK);
199 
200 		eq->ci = hl_eq_inc_ptr(eq->ci);
201 
202 		hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
203 	}
204 
205 	return IRQ_HANDLED;
206 }
207 
208 /*
209  * hl_cq_init - main initialization function for an cq object
210  *
211  * @hdev: pointer to device structure
212  * @q: pointer to cq structure
213  * @hw_queue_id: The H/W queue ID this completion queue belongs to
214  *
215  * Allocate dma-able memory for the completion queue and initialize fields
216  * Returns 0 on success
217  */
hl_cq_init(struct hl_device * hdev,struct hl_cq * q,u32 hw_queue_id)218 int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
219 {
220 	void *p;
221 
222 	BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
223 
224 	p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
225 				&q->bus_address, GFP_KERNEL | __GFP_ZERO);
226 	if (!p)
227 		return -ENOMEM;
228 
229 	q->hdev = hdev;
230 	q->kernel_address = (u64) (uintptr_t) p;
231 	q->hw_queue_id = hw_queue_id;
232 	q->ci = 0;
233 	q->pi = 0;
234 
235 	atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
236 
237 	return 0;
238 }
239 
240 /*
241  * hl_cq_fini - destroy completion queue
242  *
243  * @hdev: pointer to device structure
244  * @q: pointer to cq structure
245  *
246  * Free the completion queue memory
247  */
hl_cq_fini(struct hl_device * hdev,struct hl_cq * q)248 void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
249 {
250 	hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
251 			(void *) (uintptr_t) q->kernel_address, q->bus_address);
252 }
253 
hl_cq_reset(struct hl_device * hdev,struct hl_cq * q)254 void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
255 {
256 	q->ci = 0;
257 	q->pi = 0;
258 
259 	atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
260 
261 	/*
262 	 * It's not enough to just reset the PI/CI because the H/W may have
263 	 * written valid completion entries before it was halted and therefore
264 	 * we need to clean the actual queues so we won't process old entries
265 	 * when the device is operational again
266 	 */
267 
268 	memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
269 }
270 
271 /*
272  * hl_eq_init - main initialization function for an event queue object
273  *
274  * @hdev: pointer to device structure
275  * @q: pointer to eq structure
276  *
277  * Allocate dma-able memory for the event queue and initialize fields
278  * Returns 0 on success
279  */
hl_eq_init(struct hl_device * hdev,struct hl_eq * q)280 int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
281 {
282 	void *p;
283 
284 	BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
285 
286 	p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
287 							HL_EQ_SIZE_IN_BYTES,
288 							&q->bus_address);
289 	if (!p)
290 		return -ENOMEM;
291 
292 	q->hdev = hdev;
293 	q->kernel_address = (u64) (uintptr_t) p;
294 	q->ci = 0;
295 
296 	return 0;
297 }
298 
299 /*
300  * hl_eq_fini - destroy event queue
301  *
302  * @hdev: pointer to device structure
303  * @q: pointer to eq structure
304  *
305  * Free the event queue memory
306  */
hl_eq_fini(struct hl_device * hdev,struct hl_eq * q)307 void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
308 {
309 	flush_workqueue(hdev->eq_wq);
310 
311 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
312 					HL_EQ_SIZE_IN_BYTES,
313 					(void *) (uintptr_t) q->kernel_address);
314 }
315 
hl_eq_reset(struct hl_device * hdev,struct hl_eq * q)316 void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
317 {
318 	q->ci = 0;
319 
320 	/*
321 	 * It's not enough to just reset the PI/CI because the H/W may have
322 	 * written valid completion entries before it was halted and therefore
323 	 * we need to clean the actual queues so we won't process old entries
324 	 * when the device is operational again
325 	 */
326 
327 	memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
328 }
329