1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2016 Intel Corporation.
6  *
7  * Intel Virtio Over PCIe (VOP) driver.
8  */
9 #include <linux/sched.h>
10 #include <linux/poll.h>
11 #include <linux/dma-mapping.h>
12 
13 #include <linux/mic_common.h>
14 #include "../common/mic_dev.h"
15 
16 #include <linux/mic_ioctl.h>
17 #include "vop_main.h"
18 
19 /* Helper API to obtain the VOP PCIe device */
vop_dev(struct vop_vdev * vdev)20 static inline struct device *vop_dev(struct vop_vdev *vdev)
21 {
22 	return vdev->vpdev->dev.parent;
23 }
24 
25 /* Helper API to check if a virtio device is initialized */
vop_vdev_inited(struct vop_vdev * vdev)26 static inline int vop_vdev_inited(struct vop_vdev *vdev)
27 {
28 	if (!vdev)
29 		return -EINVAL;
30 	/* Device has not been created yet */
31 	if (!vdev->dd || !vdev->dd->type) {
32 		dev_err(vop_dev(vdev), "%s %d err %d\n",
33 			__func__, __LINE__, -EINVAL);
34 		return -EINVAL;
35 	}
36 	/* Device has been removed/deleted */
37 	if (vdev->dd->type == -1) {
38 		dev_dbg(vop_dev(vdev), "%s %d err %d\n",
39 			__func__, __LINE__, -ENODEV);
40 		return -ENODEV;
41 	}
42 	return 0;
43 }
44 
_vop_notify(struct vringh * vrh)45 static void _vop_notify(struct vringh *vrh)
46 {
47 	struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
48 	struct vop_vdev *vdev = vvrh->vdev;
49 	struct vop_device *vpdev = vdev->vpdev;
50 	s8 db = vdev->dc->h2c_vdev_db;
51 
52 	if (db != -1)
53 		vpdev->hw_ops->send_intr(vpdev, db);
54 }
55 
vop_virtio_init_post(struct vop_vdev * vdev)56 static void vop_virtio_init_post(struct vop_vdev *vdev)
57 {
58 	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
59 	struct vop_device *vpdev = vdev->vpdev;
60 	int i, used_size;
61 
62 	for (i = 0; i < vdev->dd->num_vq; i++) {
63 		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
64 				sizeof(struct vring_used_elem) *
65 				le16_to_cpu(vqconfig->num));
66 		if (!le64_to_cpu(vqconfig[i].used_address)) {
67 			dev_warn(vop_dev(vdev), "used_address zero??\n");
68 			continue;
69 		}
70 		vdev->vvr[i].vrh.vring.used =
71 			(void __force *)vpdev->hw_ops->remap(
72 			vpdev,
73 			le64_to_cpu(vqconfig[i].used_address),
74 			used_size);
75 	}
76 
77 	vdev->dc->used_address_updated = 0;
78 
79 	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
80 		 __func__, vdev->virtio_id);
81 }
82 
vop_virtio_device_reset(struct vop_vdev * vdev)83 static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
84 {
85 	int i;
86 
87 	dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
88 		__func__, vdev->dd->status, vdev->virtio_id);
89 
90 	for (i = 0; i < vdev->dd->num_vq; i++)
91 		/*
92 		 * Avoid lockdep false positive. The + 1 is for the vop
93 		 * mutex which is held in the reset devices code path.
94 		 */
95 		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
96 
97 	/* 0 status means "reset" */
98 	vdev->dd->status = 0;
99 	vdev->dc->vdev_reset = 0;
100 	vdev->dc->host_ack = 1;
101 
102 	for (i = 0; i < vdev->dd->num_vq; i++) {
103 		struct vringh *vrh = &vdev->vvr[i].vrh;
104 
105 		vdev->vvr[i].vring.info->avail_idx = 0;
106 		vrh->completed = 0;
107 		vrh->last_avail_idx = 0;
108 		vrh->last_used_idx = 0;
109 	}
110 
111 	for (i = 0; i < vdev->dd->num_vq; i++)
112 		mutex_unlock(&vdev->vvr[i].vr_mutex);
113 }
114 
vop_virtio_reset_devices(struct vop_info * vi)115 static void vop_virtio_reset_devices(struct vop_info *vi)
116 {
117 	struct list_head *pos, *tmp;
118 	struct vop_vdev *vdev;
119 
120 	list_for_each_safe(pos, tmp, &vi->vdev_list) {
121 		vdev = list_entry(pos, struct vop_vdev, list);
122 		vop_virtio_device_reset(vdev);
123 		vdev->poll_wake = 1;
124 		wake_up(&vdev->waitq);
125 	}
126 }
127 
vop_bh_handler(struct work_struct * work)128 static void vop_bh_handler(struct work_struct *work)
129 {
130 	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
131 			virtio_bh_work);
132 
133 	if (vdev->dc->used_address_updated)
134 		vop_virtio_init_post(vdev);
135 
136 	if (vdev->dc->vdev_reset)
137 		vop_virtio_device_reset(vdev);
138 
139 	vdev->poll_wake = 1;
140 	wake_up(&vdev->waitq);
141 }
142 
_vop_virtio_intr_handler(int irq,void * data)143 static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
144 {
145 	struct vop_vdev *vdev = data;
146 	struct vop_device *vpdev = vdev->vpdev;
147 
148 	vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
149 	schedule_work(&vdev->virtio_bh_work);
150 	return IRQ_HANDLED;
151 }
152 
vop_virtio_config_change(struct vop_vdev * vdev,void * argp)153 static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
154 {
155 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
156 	int ret = 0, retry, i;
157 	struct vop_device *vpdev = vdev->vpdev;
158 	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
159 	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
160 	s8 db = bootparam->h2c_config_db;
161 
162 	mutex_lock(&vi->vop_mutex);
163 	for (i = 0; i < vdev->dd->num_vq; i++)
164 		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
165 
166 	if (db == -1 || vdev->dd->type == -1) {
167 		ret = -EIO;
168 		goto exit;
169 	}
170 
171 	memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
172 	vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
173 	vpdev->hw_ops->send_intr(vpdev, db);
174 
175 	for (retry = 100; retry--;) {
176 		ret = wait_event_timeout(wake, vdev->dc->guest_ack,
177 					 msecs_to_jiffies(100));
178 		if (ret)
179 			break;
180 	}
181 
182 	dev_dbg(vop_dev(vdev),
183 		"%s %d retry: %d\n", __func__, __LINE__, retry);
184 	vdev->dc->config_change = 0;
185 	vdev->dc->guest_ack = 0;
186 exit:
187 	for (i = 0; i < vdev->dd->num_vq; i++)
188 		mutex_unlock(&vdev->vvr[i].vr_mutex);
189 	mutex_unlock(&vi->vop_mutex);
190 	return ret;
191 }
192 
vop_copy_dp_entry(struct vop_vdev * vdev,struct mic_device_desc * argp,__u8 * type,struct mic_device_desc ** devpage)193 static int vop_copy_dp_entry(struct vop_vdev *vdev,
194 			     struct mic_device_desc *argp, __u8 *type,
195 			     struct mic_device_desc **devpage)
196 {
197 	struct vop_device *vpdev = vdev->vpdev;
198 	struct mic_device_desc *devp;
199 	struct mic_vqconfig *vqconfig;
200 	int ret = 0, i;
201 	bool slot_found = false;
202 
203 	vqconfig = mic_vq_config(argp);
204 	for (i = 0; i < argp->num_vq; i++) {
205 		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
206 			ret =  -EINVAL;
207 			dev_err(vop_dev(vdev), "%s %d err %d\n",
208 				__func__, __LINE__, ret);
209 			goto exit;
210 		}
211 	}
212 
213 	/* Find the first free device page entry */
214 	for (i = sizeof(struct mic_bootparam);
215 		i < MIC_DP_SIZE - mic_total_desc_size(argp);
216 		i += mic_total_desc_size(devp)) {
217 		devp = vpdev->hw_ops->get_dp(vpdev) + i;
218 		if (devp->type == 0 || devp->type == -1) {
219 			slot_found = true;
220 			break;
221 		}
222 	}
223 	if (!slot_found) {
224 		ret =  -EINVAL;
225 		dev_err(vop_dev(vdev), "%s %d err %d\n",
226 			__func__, __LINE__, ret);
227 		goto exit;
228 	}
229 	/*
230 	 * Save off the type before doing the memcpy. Type will be set in the
231 	 * end after completing all initialization for the new device.
232 	 */
233 	*type = argp->type;
234 	argp->type = 0;
235 	memcpy(devp, argp, mic_desc_size(argp));
236 
237 	*devpage = devp;
238 exit:
239 	return ret;
240 }
241 
vop_init_device_ctrl(struct vop_vdev * vdev,struct mic_device_desc * devpage)242 static void vop_init_device_ctrl(struct vop_vdev *vdev,
243 				 struct mic_device_desc *devpage)
244 {
245 	struct mic_device_ctrl *dc;
246 
247 	dc = (void *)devpage + mic_aligned_desc_size(devpage);
248 
249 	dc->config_change = 0;
250 	dc->guest_ack = 0;
251 	dc->vdev_reset = 0;
252 	dc->host_ack = 0;
253 	dc->used_address_updated = 0;
254 	dc->c2h_vdev_db = -1;
255 	dc->h2c_vdev_db = -1;
256 	vdev->dc = dc;
257 }
258 
vop_virtio_add_device(struct vop_vdev * vdev,struct mic_device_desc * argp)259 static int vop_virtio_add_device(struct vop_vdev *vdev,
260 				 struct mic_device_desc *argp)
261 {
262 	struct vop_info *vi = vdev->vi;
263 	struct vop_device *vpdev = vi->vpdev;
264 	struct mic_device_desc *dd = NULL;
265 	struct mic_vqconfig *vqconfig;
266 	int vr_size, i, j, ret;
267 	u8 type = 0;
268 	s8 db = -1;
269 	char irqname[16];
270 	struct mic_bootparam *bootparam;
271 	u16 num;
272 	dma_addr_t vr_addr;
273 
274 	bootparam = vpdev->hw_ops->get_dp(vpdev);
275 	init_waitqueue_head(&vdev->waitq);
276 	INIT_LIST_HEAD(&vdev->list);
277 	vdev->vpdev = vpdev;
278 
279 	ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
280 	if (ret) {
281 		dev_err(vop_dev(vdev), "%s %d err %d\n",
282 			__func__, __LINE__, ret);
283 		return ret;
284 	}
285 
286 	vop_init_device_ctrl(vdev, dd);
287 
288 	vdev->dd = dd;
289 	vdev->virtio_id = type;
290 	vqconfig = mic_vq_config(dd);
291 	INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
292 
293 	for (i = 0; i < dd->num_vq; i++) {
294 		struct vop_vringh *vvr = &vdev->vvr[i];
295 		struct mic_vring *vr = &vdev->vvr[i].vring;
296 
297 		num = le16_to_cpu(vqconfig[i].num);
298 		mutex_init(&vvr->vr_mutex);
299 		vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
300 			sizeof(struct _mic_vring_info));
301 		vr->va = (void *)
302 			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
303 					 get_order(vr_size));
304 		if (!vr->va) {
305 			ret = -ENOMEM;
306 			dev_err(vop_dev(vdev), "%s %d err %d\n",
307 				__func__, __LINE__, ret);
308 			goto err;
309 		}
310 		vr->len = vr_size;
311 		vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
312 		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
313 		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
314 					 DMA_BIDIRECTIONAL);
315 		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
316 			free_pages((unsigned long)vr->va, get_order(vr_size));
317 			ret = -ENOMEM;
318 			dev_err(vop_dev(vdev), "%s %d err %d\n",
319 				__func__, __LINE__, ret);
320 			goto err;
321 		}
322 		vqconfig[i].address = cpu_to_le64(vr_addr);
323 
324 		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
325 		ret = vringh_init_kern(&vvr->vrh,
326 				       *(u32 *)mic_vq_features(vdev->dd),
327 				       num, false, vr->vr.desc, vr->vr.avail,
328 				       vr->vr.used);
329 		if (ret) {
330 			dev_err(vop_dev(vdev), "%s %d err %d\n",
331 				__func__, __LINE__, ret);
332 			goto err;
333 		}
334 		vringh_kiov_init(&vvr->riov, NULL, 0);
335 		vringh_kiov_init(&vvr->wiov, NULL, 0);
336 		vvr->head = USHRT_MAX;
337 		vvr->vdev = vdev;
338 		vvr->vrh.notify = _vop_notify;
339 		dev_dbg(&vpdev->dev,
340 			"%s %d index %d va %p info %p vr_size 0x%x\n",
341 			__func__, __LINE__, i, vr->va, vr->info, vr_size);
342 		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
343 					get_order(VOP_INT_DMA_BUF_SIZE));
344 		vvr->buf_da = dma_map_single(&vpdev->dev,
345 					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
346 					  DMA_BIDIRECTIONAL);
347 	}
348 
349 	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
350 		 vdev->virtio_id);
351 	vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
352 	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
353 			_vop_virtio_intr_handler, irqname, vdev,
354 			vdev->virtio_db);
355 	if (IS_ERR(vdev->virtio_cookie)) {
356 		ret = PTR_ERR(vdev->virtio_cookie);
357 		dev_dbg(&vpdev->dev, "request irq failed\n");
358 		goto err;
359 	}
360 
361 	vdev->dc->c2h_vdev_db = vdev->virtio_db;
362 
363 	/*
364 	 * Order the type update with previous stores. This write barrier
365 	 * is paired with the corresponding read barrier before the uncached
366 	 * system memory read of the type, on the card while scanning the
367 	 * device page.
368 	 */
369 	smp_wmb();
370 	dd->type = type;
371 	argp->type = type;
372 
373 	if (bootparam) {
374 		db = bootparam->h2c_config_db;
375 		if (db != -1)
376 			vpdev->hw_ops->send_intr(vpdev, db);
377 	}
378 	dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
379 	return 0;
380 err:
381 	vqconfig = mic_vq_config(dd);
382 	for (j = 0; j < i; j++) {
383 		struct vop_vringh *vvr = &vdev->vvr[j];
384 
385 		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
386 				 vvr->vring.len, DMA_BIDIRECTIONAL);
387 		free_pages((unsigned long)vvr->vring.va,
388 			   get_order(vvr->vring.len));
389 	}
390 	return ret;
391 }
392 
vop_dev_remove(struct vop_info * pvi,struct mic_device_ctrl * devp,struct vop_device * vpdev)393 static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
394 			   struct vop_device *vpdev)
395 {
396 	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
397 	s8 db;
398 	int ret, retry;
399 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
400 
401 	devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
402 	db = bootparam->h2c_config_db;
403 	if (db != -1)
404 		vpdev->hw_ops->send_intr(vpdev, db);
405 	else
406 		goto done;
407 	for (retry = 15; retry--;) {
408 		ret = wait_event_timeout(wake, devp->guest_ack,
409 					 msecs_to_jiffies(1000));
410 		if (ret)
411 			break;
412 	}
413 done:
414 	devp->config_change = 0;
415 	devp->guest_ack = 0;
416 }
417 
vop_virtio_del_device(struct vop_vdev * vdev)418 static void vop_virtio_del_device(struct vop_vdev *vdev)
419 {
420 	struct vop_info *vi = vdev->vi;
421 	struct vop_device *vpdev = vdev->vpdev;
422 	int i;
423 	struct mic_vqconfig *vqconfig;
424 	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
425 
426 	if (!bootparam)
427 		goto skip_hot_remove;
428 	vop_dev_remove(vi, vdev->dc, vpdev);
429 skip_hot_remove:
430 	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
431 	flush_work(&vdev->virtio_bh_work);
432 	vqconfig = mic_vq_config(vdev->dd);
433 	for (i = 0; i < vdev->dd->num_vq; i++) {
434 		struct vop_vringh *vvr = &vdev->vvr[i];
435 
436 		dma_unmap_single(&vpdev->dev,
437 				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
438 				 DMA_BIDIRECTIONAL);
439 		free_pages((unsigned long)vvr->buf,
440 			   get_order(VOP_INT_DMA_BUF_SIZE));
441 		vringh_kiov_cleanup(&vvr->riov);
442 		vringh_kiov_cleanup(&vvr->wiov);
443 		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
444 				 vvr->vring.len, DMA_BIDIRECTIONAL);
445 		free_pages((unsigned long)vvr->vring.va,
446 			   get_order(vvr->vring.len));
447 	}
448 	/*
449 	 * Order the type update with previous stores. This write barrier
450 	 * is paired with the corresponding read barrier before the uncached
451 	 * system memory read of the type, on the card while scanning the
452 	 * device page.
453 	 */
454 	smp_wmb();
455 	vdev->dd->type = -1;
456 }
457 
458 /*
459  * vop_sync_dma - Wrapper for synchronous DMAs.
460  *
461  * @dev - The address of the pointer to the device instance used
462  * for DMA registration.
463  * @dst - destination DMA address.
464  * @src - source DMA address.
465  * @len - size of the transfer.
466  *
467  * Return DMA_SUCCESS on success
468  */
vop_sync_dma(struct vop_vdev * vdev,dma_addr_t dst,dma_addr_t src,size_t len)469 static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
470 			size_t len)
471 {
472 	int err = 0;
473 	struct dma_device *ddev;
474 	struct dma_async_tx_descriptor *tx;
475 	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
476 	struct dma_chan *vop_ch = vi->dma_ch;
477 
478 	if (!vop_ch) {
479 		err = -EBUSY;
480 		goto error;
481 	}
482 	ddev = vop_ch->device;
483 	tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
484 		DMA_PREP_FENCE);
485 	if (!tx) {
486 		err = -ENOMEM;
487 		goto error;
488 	} else {
489 		dma_cookie_t cookie;
490 
491 		cookie = tx->tx_submit(tx);
492 		if (dma_submit_error(cookie)) {
493 			err = -ENOMEM;
494 			goto error;
495 		}
496 		dma_async_issue_pending(vop_ch);
497 		err = dma_sync_wait(vop_ch, cookie);
498 	}
499 error:
500 	if (err)
501 		dev_err(&vi->vpdev->dev, "%s %d err %d\n",
502 			__func__, __LINE__, err);
503 	return err;
504 }
505 
506 #define VOP_USE_DMA true
507 
508 /*
509  * Initiates the copies across the PCIe bus from card memory to a user
510  * space buffer. When transfers are done using DMA, source/destination
511  * addresses and transfer length must follow the alignment requirements of
512  * the MIC DMA engine.
513  */
vop_virtio_copy_to_user(struct vop_vdev * vdev,void __user * ubuf,size_t len,u64 daddr,size_t dlen,int vr_idx)514 static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
515 				   size_t len, u64 daddr, size_t dlen,
516 				   int vr_idx)
517 {
518 	struct vop_device *vpdev = vdev->vpdev;
519 	void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
520 	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
521 	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
522 	size_t dma_alignment;
523 	bool x200;
524 	size_t dma_offset, partlen;
525 	int err;
526 
527 	if (!VOP_USE_DMA || !vi->dma_ch) {
528 		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
529 			err = -EFAULT;
530 			dev_err(vop_dev(vdev), "%s %d err %d\n",
531 				__func__, __LINE__, err);
532 			goto err;
533 		}
534 		vdev->in_bytes += len;
535 		err = 0;
536 		goto err;
537 	}
538 
539 	dma_alignment = 1 << vi->dma_ch->device->copy_align;
540 	x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
541 
542 	dma_offset = daddr - round_down(daddr, dma_alignment);
543 	daddr -= dma_offset;
544 	len += dma_offset;
545 	/*
546 	 * X100 uses DMA addresses as seen by the card so adding
547 	 * the aperture base is not required for DMA. However x200
548 	 * requires DMA addresses to be an offset into the bar so
549 	 * add the aperture base for x200.
550 	 */
551 	if (x200)
552 		daddr += vpdev->aper->pa;
553 	while (len) {
554 		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
555 		err = vop_sync_dma(vdev, vvr->buf_da, daddr,
556 				   ALIGN(partlen, dma_alignment));
557 		if (err) {
558 			dev_err(vop_dev(vdev), "%s %d err %d\n",
559 				__func__, __LINE__, err);
560 			goto err;
561 		}
562 		if (copy_to_user(ubuf, vvr->buf + dma_offset,
563 				 partlen - dma_offset)) {
564 			err = -EFAULT;
565 			dev_err(vop_dev(vdev), "%s %d err %d\n",
566 				__func__, __LINE__, err);
567 			goto err;
568 		}
569 		daddr += partlen;
570 		ubuf += partlen;
571 		dbuf += partlen;
572 		vdev->in_bytes_dma += partlen;
573 		vdev->in_bytes += partlen;
574 		len -= partlen;
575 		dma_offset = 0;
576 	}
577 	err = 0;
578 err:
579 	vpdev->hw_ops->unmap(vpdev, dbuf);
580 	dev_dbg(vop_dev(vdev),
581 		"%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
582 		__func__, ubuf, dbuf, len, vr_idx);
583 	return err;
584 }
585 
586 /*
587  * Initiates copies across the PCIe bus from a user space buffer to card
588  * memory. When transfers are done using DMA, source/destination addresses
589  * and transfer length must follow the alignment requirements of the MIC
590  * DMA engine.
591  */
vop_virtio_copy_from_user(struct vop_vdev * vdev,void __user * ubuf,size_t len,u64 daddr,size_t dlen,int vr_idx)592 static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
593 				     size_t len, u64 daddr, size_t dlen,
594 				     int vr_idx)
595 {
596 	struct vop_device *vpdev = vdev->vpdev;
597 	void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
598 	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
599 	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
600 	size_t dma_alignment;
601 	bool x200;
602 	size_t partlen;
603 	bool dma = VOP_USE_DMA && vi->dma_ch;
604 	int err = 0;
605 
606 	if (dma) {
607 		dma_alignment = 1 << vi->dma_ch->device->copy_align;
608 		x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
609 
610 		if (daddr & (dma_alignment - 1)) {
611 			vdev->tx_dst_unaligned += len;
612 			dma = false;
613 		} else if (ALIGN(len, dma_alignment) > dlen) {
614 			vdev->tx_len_unaligned += len;
615 			dma = false;
616 		}
617 	}
618 
619 	if (!dma)
620 		goto memcpy;
621 
622 	/*
623 	 * X100 uses DMA addresses as seen by the card so adding
624 	 * the aperture base is not required for DMA. However x200
625 	 * requires DMA addresses to be an offset into the bar so
626 	 * add the aperture base for x200.
627 	 */
628 	if (x200)
629 		daddr += vpdev->aper->pa;
630 	while (len) {
631 		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
632 
633 		if (copy_from_user(vvr->buf, ubuf, partlen)) {
634 			err = -EFAULT;
635 			dev_err(vop_dev(vdev), "%s %d err %d\n",
636 				__func__, __LINE__, err);
637 			goto err;
638 		}
639 		err = vop_sync_dma(vdev, daddr, vvr->buf_da,
640 				   ALIGN(partlen, dma_alignment));
641 		if (err) {
642 			dev_err(vop_dev(vdev), "%s %d err %d\n",
643 				__func__, __LINE__, err);
644 			goto err;
645 		}
646 		daddr += partlen;
647 		ubuf += partlen;
648 		dbuf += partlen;
649 		vdev->out_bytes_dma += partlen;
650 		vdev->out_bytes += partlen;
651 		len -= partlen;
652 	}
653 memcpy:
654 	/*
655 	 * We are copying to IO below and should ideally use something
656 	 * like copy_from_user_toio(..) if it existed.
657 	 */
658 	if (copy_from_user((void __force *)dbuf, ubuf, len)) {
659 		err = -EFAULT;
660 		dev_err(vop_dev(vdev), "%s %d err %d\n",
661 			__func__, __LINE__, err);
662 		goto err;
663 	}
664 	vdev->out_bytes += len;
665 	err = 0;
666 err:
667 	vpdev->hw_ops->unmap(vpdev, dbuf);
668 	dev_dbg(vop_dev(vdev),
669 		"%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
670 		__func__, ubuf, dbuf, len, vr_idx);
671 	return err;
672 }
673 
674 #define MIC_VRINGH_READ true
675 
676 /* Determine the total number of bytes consumed in a VRINGH KIOV */
vop_vringh_iov_consumed(struct vringh_kiov * iov)677 static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
678 {
679 	int i;
680 	u32 total = iov->consumed;
681 
682 	for (i = 0; i < iov->i; i++)
683 		total += iov->iov[i].iov_len;
684 	return total;
685 }
686 
687 /*
688  * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
689  * This API is heavily based on the vringh_iov_xfer(..) implementation
690  * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
691  * and vringh_iov_push_kern(..) directly is because there is no
692  * way to override the VRINGH xfer(..) routines as of v3.10.
693  */
vop_vringh_copy(struct vop_vdev * vdev,struct vringh_kiov * iov,void __user * ubuf,size_t len,bool read,int vr_idx,size_t * out_len)694 static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
695 			   void __user *ubuf, size_t len, bool read, int vr_idx,
696 			   size_t *out_len)
697 {
698 	int ret = 0;
699 	size_t partlen, tot_len = 0;
700 
701 	while (len && iov->i < iov->used) {
702 		struct kvec *kiov = &iov->iov[iov->i];
703 		unsigned long daddr = (unsigned long)kiov->iov_base;
704 
705 		partlen = min(kiov->iov_len, len);
706 		if (read)
707 			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
708 						      daddr,
709 						      kiov->iov_len,
710 						      vr_idx);
711 		else
712 			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
713 							daddr,
714 							kiov->iov_len,
715 							vr_idx);
716 		if (ret) {
717 			dev_err(vop_dev(vdev), "%s %d err %d\n",
718 				__func__, __LINE__, ret);
719 			break;
720 		}
721 		len -= partlen;
722 		ubuf += partlen;
723 		tot_len += partlen;
724 		iov->consumed += partlen;
725 		kiov->iov_len -= partlen;
726 		kiov->iov_base += partlen;
727 		if (!kiov->iov_len) {
728 			/* Fix up old iov element then increment. */
729 			kiov->iov_len = iov->consumed;
730 			kiov->iov_base -= iov->consumed;
731 
732 			iov->consumed = 0;
733 			iov->i++;
734 		}
735 	}
736 	*out_len = tot_len;
737 	return ret;
738 }
739 
740 /*
741  * Use the standard VRINGH infrastructure in the kernel to fetch new
742  * descriptors, initiate the copies and update the used ring.
743  */
_vop_virtio_copy(struct vop_vdev * vdev,struct mic_copy_desc * copy)744 static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
745 {
746 	int ret = 0;
747 	u32 iovcnt = copy->iovcnt;
748 	struct iovec iov;
749 	struct iovec __user *u_iov = copy->iov;
750 	void __user *ubuf = NULL;
751 	struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
752 	struct vringh_kiov *riov = &vvr->riov;
753 	struct vringh_kiov *wiov = &vvr->wiov;
754 	struct vringh *vrh = &vvr->vrh;
755 	u16 *head = &vvr->head;
756 	struct mic_vring *vr = &vvr->vring;
757 	size_t len = 0, out_len;
758 
759 	copy->out_len = 0;
760 	/* Fetch a new IOVEC if all previous elements have been processed */
761 	if (riov->i == riov->used && wiov->i == wiov->used) {
762 		ret = vringh_getdesc_kern(vrh, riov, wiov,
763 					  head, GFP_KERNEL);
764 		/* Check if there are available descriptors */
765 		if (ret <= 0)
766 			return ret;
767 	}
768 	while (iovcnt) {
769 		if (!len) {
770 			/* Copy over a new iovec from user space. */
771 			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
772 			if (ret) {
773 				ret = -EINVAL;
774 				dev_err(vop_dev(vdev), "%s %d err %d\n",
775 					__func__, __LINE__, ret);
776 				break;
777 			}
778 			len = iov.iov_len;
779 			ubuf = iov.iov_base;
780 		}
781 		/* Issue all the read descriptors first */
782 		ret = vop_vringh_copy(vdev, riov, ubuf, len,
783 				      MIC_VRINGH_READ, copy->vr_idx, &out_len);
784 		if (ret) {
785 			dev_err(vop_dev(vdev), "%s %d err %d\n",
786 				__func__, __LINE__, ret);
787 			break;
788 		}
789 		len -= out_len;
790 		ubuf += out_len;
791 		copy->out_len += out_len;
792 		/* Issue the write descriptors next */
793 		ret = vop_vringh_copy(vdev, wiov, ubuf, len,
794 				      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
795 		if (ret) {
796 			dev_err(vop_dev(vdev), "%s %d err %d\n",
797 				__func__, __LINE__, ret);
798 			break;
799 		}
800 		len -= out_len;
801 		ubuf += out_len;
802 		copy->out_len += out_len;
803 		if (!len) {
804 			/* One user space iovec is now completed */
805 			iovcnt--;
806 			u_iov++;
807 		}
808 		/* Exit loop if all elements in KIOVs have been processed. */
809 		if (riov->i == riov->used && wiov->i == wiov->used)
810 			break;
811 	}
812 	/*
813 	 * Update the used ring if a descriptor was available and some data was
814 	 * copied in/out and the user asked for a used ring update.
815 	 */
816 	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
817 		u32 total = 0;
818 
819 		/* Determine the total data consumed */
820 		total += vop_vringh_iov_consumed(riov);
821 		total += vop_vringh_iov_consumed(wiov);
822 		vringh_complete_kern(vrh, *head, total);
823 		*head = USHRT_MAX;
824 		if (vringh_need_notify_kern(vrh) > 0)
825 			vringh_notify(vrh);
826 		vringh_kiov_cleanup(riov);
827 		vringh_kiov_cleanup(wiov);
828 		/* Update avail idx for user space */
829 		vr->info->avail_idx = vrh->last_avail_idx;
830 	}
831 	return ret;
832 }
833 
vop_verify_copy_args(struct vop_vdev * vdev,struct mic_copy_desc * copy)834 static inline int vop_verify_copy_args(struct vop_vdev *vdev,
835 				       struct mic_copy_desc *copy)
836 {
837 	if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
838 		return -EINVAL;
839 	return 0;
840 }
841 
842 /* Copy a specified number of virtio descriptors in a chain */
vop_virtio_copy_desc(struct vop_vdev * vdev,struct mic_copy_desc * copy)843 static int vop_virtio_copy_desc(struct vop_vdev *vdev,
844 				struct mic_copy_desc *copy)
845 {
846 	int err;
847 	struct vop_vringh *vvr;
848 
849 	err = vop_verify_copy_args(vdev, copy);
850 	if (err)
851 		return err;
852 
853 	vvr = &vdev->vvr[copy->vr_idx];
854 	mutex_lock(&vvr->vr_mutex);
855 	if (!vop_vdevup(vdev)) {
856 		err = -ENODEV;
857 		dev_err(vop_dev(vdev), "%s %d err %d\n",
858 			__func__, __LINE__, err);
859 		goto err;
860 	}
861 	err = _vop_virtio_copy(vdev, copy);
862 	if (err) {
863 		dev_err(vop_dev(vdev), "%s %d err %d\n",
864 			__func__, __LINE__, err);
865 	}
866 err:
867 	mutex_unlock(&vvr->vr_mutex);
868 	return err;
869 }
870 
vop_open(struct inode * inode,struct file * f)871 static int vop_open(struct inode *inode, struct file *f)
872 {
873 	struct vop_vdev *vdev;
874 	struct vop_info *vi = container_of(f->private_data,
875 		struct vop_info, miscdev);
876 
877 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
878 	if (!vdev)
879 		return -ENOMEM;
880 	vdev->vi = vi;
881 	mutex_init(&vdev->vdev_mutex);
882 	f->private_data = vdev;
883 	init_completion(&vdev->destroy);
884 	complete(&vdev->destroy);
885 	return 0;
886 }
887 
vop_release(struct inode * inode,struct file * f)888 static int vop_release(struct inode *inode, struct file *f)
889 {
890 	struct vop_vdev *vdev = f->private_data, *vdev_tmp;
891 	struct vop_info *vi = vdev->vi;
892 	struct list_head *pos, *tmp;
893 	bool found = false;
894 
895 	mutex_lock(&vdev->vdev_mutex);
896 	if (vdev->deleted)
897 		goto unlock;
898 	mutex_lock(&vi->vop_mutex);
899 	list_for_each_safe(pos, tmp, &vi->vdev_list) {
900 		vdev_tmp = list_entry(pos, struct vop_vdev, list);
901 		if (vdev == vdev_tmp) {
902 			vop_virtio_del_device(vdev);
903 			list_del(pos);
904 			found = true;
905 			break;
906 		}
907 	}
908 	mutex_unlock(&vi->vop_mutex);
909 unlock:
910 	mutex_unlock(&vdev->vdev_mutex);
911 	if (!found)
912 		wait_for_completion(&vdev->destroy);
913 	f->private_data = NULL;
914 	kfree(vdev);
915 	return 0;
916 }
917 
vop_ioctl(struct file * f,unsigned int cmd,unsigned long arg)918 static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
919 {
920 	struct vop_vdev *vdev = f->private_data;
921 	struct vop_info *vi = vdev->vi;
922 	void __user *argp = (void __user *)arg;
923 	int ret;
924 
925 	switch (cmd) {
926 	case MIC_VIRTIO_ADD_DEVICE:
927 	{
928 		struct mic_device_desc dd, *dd_config;
929 
930 		if (copy_from_user(&dd, argp, sizeof(dd)))
931 			return -EFAULT;
932 
933 		if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
934 		    dd.num_vq > MIC_MAX_VRINGS)
935 			return -EINVAL;
936 
937 		dd_config = memdup_user(argp, mic_desc_size(&dd));
938 		if (IS_ERR(dd_config))
939 			return PTR_ERR(dd_config);
940 
941 		/* Ensure desc has not changed between the two reads */
942 		if (memcmp(&dd, dd_config, sizeof(dd))) {
943 			ret = -EINVAL;
944 			goto free_ret;
945 		}
946 		mutex_lock(&vdev->vdev_mutex);
947 		mutex_lock(&vi->vop_mutex);
948 		ret = vop_virtio_add_device(vdev, dd_config);
949 		if (ret)
950 			goto unlock_ret;
951 		list_add_tail(&vdev->list, &vi->vdev_list);
952 unlock_ret:
953 		mutex_unlock(&vi->vop_mutex);
954 		mutex_unlock(&vdev->vdev_mutex);
955 free_ret:
956 		kfree(dd_config);
957 		return ret;
958 	}
959 	case MIC_VIRTIO_COPY_DESC:
960 	{
961 		struct mic_copy_desc copy;
962 
963 		mutex_lock(&vdev->vdev_mutex);
964 		ret = vop_vdev_inited(vdev);
965 		if (ret)
966 			goto _unlock_ret;
967 
968 		if (copy_from_user(&copy, argp, sizeof(copy))) {
969 			ret = -EFAULT;
970 			goto _unlock_ret;
971 		}
972 
973 		ret = vop_virtio_copy_desc(vdev, &copy);
974 		if (ret < 0)
975 			goto _unlock_ret;
976 		if (copy_to_user(
977 			&((struct mic_copy_desc __user *)argp)->out_len,
978 			&copy.out_len, sizeof(copy.out_len)))
979 			ret = -EFAULT;
980 _unlock_ret:
981 		mutex_unlock(&vdev->vdev_mutex);
982 		return ret;
983 	}
984 	case MIC_VIRTIO_CONFIG_CHANGE:
985 	{
986 		void *buf;
987 
988 		mutex_lock(&vdev->vdev_mutex);
989 		ret = vop_vdev_inited(vdev);
990 		if (ret)
991 			goto __unlock_ret;
992 		buf = memdup_user(argp, vdev->dd->config_len);
993 		if (IS_ERR(buf)) {
994 			ret = PTR_ERR(buf);
995 			goto __unlock_ret;
996 		}
997 		ret = vop_virtio_config_change(vdev, buf);
998 		kfree(buf);
999 __unlock_ret:
1000 		mutex_unlock(&vdev->vdev_mutex);
1001 		return ret;
1002 	}
1003 	default:
1004 		return -ENOIOCTLCMD;
1005 	};
1006 	return 0;
1007 }
1008 
1009 /*
1010  * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1011  * not when previously enqueued buffers may be available. This means that
1012  * in the card->host (TX) path, when userspace is unblocked by poll it
1013  * must drain all available descriptors or it can stall.
1014  */
vop_poll(struct file * f,poll_table * wait)1015 static __poll_t vop_poll(struct file *f, poll_table *wait)
1016 {
1017 	struct vop_vdev *vdev = f->private_data;
1018 	__poll_t mask = 0;
1019 
1020 	mutex_lock(&vdev->vdev_mutex);
1021 	if (vop_vdev_inited(vdev)) {
1022 		mask = EPOLLERR;
1023 		goto done;
1024 	}
1025 	poll_wait(f, &vdev->waitq, wait);
1026 	if (vop_vdev_inited(vdev)) {
1027 		mask = EPOLLERR;
1028 	} else if (vdev->poll_wake) {
1029 		vdev->poll_wake = 0;
1030 		mask = EPOLLIN | EPOLLOUT;
1031 	}
1032 done:
1033 	mutex_unlock(&vdev->vdev_mutex);
1034 	return mask;
1035 }
1036 
1037 static inline int
vop_query_offset(struct vop_vdev * vdev,unsigned long offset,unsigned long * size,unsigned long * pa)1038 vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1039 		 unsigned long *size, unsigned long *pa)
1040 {
1041 	struct vop_device *vpdev = vdev->vpdev;
1042 	unsigned long start = MIC_DP_SIZE;
1043 	int i;
1044 
1045 	/*
1046 	 * MMAP interface is as follows:
1047 	 * offset				region
1048 	 * 0x0					virtio device_page
1049 	 * 0x1000				first vring
1050 	 * 0x1000 + size of 1st vring		second vring
1051 	 * ....
1052 	 */
1053 	if (!offset) {
1054 		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1055 		*size = MIC_DP_SIZE;
1056 		return 0;
1057 	}
1058 
1059 	for (i = 0; i < vdev->dd->num_vq; i++) {
1060 		struct vop_vringh *vvr = &vdev->vvr[i];
1061 
1062 		if (offset == start) {
1063 			*pa = virt_to_phys(vvr->vring.va);
1064 			*size = vvr->vring.len;
1065 			return 0;
1066 		}
1067 		start += vvr->vring.len;
1068 	}
1069 	return -1;
1070 }
1071 
1072 /*
1073  * Maps the device page and virtio rings to user space for readonly access.
1074  */
vop_mmap(struct file * f,struct vm_area_struct * vma)1075 static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1076 {
1077 	struct vop_vdev *vdev = f->private_data;
1078 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1079 	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1080 	int i, err;
1081 
1082 	err = vop_vdev_inited(vdev);
1083 	if (err)
1084 		goto ret;
1085 	if (vma->vm_flags & VM_WRITE) {
1086 		err = -EACCES;
1087 		goto ret;
1088 	}
1089 	while (size_rem) {
1090 		i = vop_query_offset(vdev, offset, &size, &pa);
1091 		if (i < 0) {
1092 			err = -EINVAL;
1093 			goto ret;
1094 		}
1095 		err = remap_pfn_range(vma, vma->vm_start + offset,
1096 				      pa >> PAGE_SHIFT, size,
1097 				      vma->vm_page_prot);
1098 		if (err)
1099 			goto ret;
1100 		size_rem -= size;
1101 		offset += size;
1102 	}
1103 ret:
1104 	return err;
1105 }
1106 
1107 static const struct file_operations vop_fops = {
1108 	.open = vop_open,
1109 	.release = vop_release,
1110 	.unlocked_ioctl = vop_ioctl,
1111 	.poll = vop_poll,
1112 	.mmap = vop_mmap,
1113 	.owner = THIS_MODULE,
1114 };
1115 
vop_host_init(struct vop_info * vi)1116 int vop_host_init(struct vop_info *vi)
1117 {
1118 	int rc;
1119 	struct miscdevice *mdev;
1120 	struct vop_device *vpdev = vi->vpdev;
1121 
1122 	INIT_LIST_HEAD(&vi->vdev_list);
1123 	vi->dma_ch = vpdev->dma_ch;
1124 	mdev = &vi->miscdev;
1125 	mdev->minor = MISC_DYNAMIC_MINOR;
1126 	snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1127 	mdev->name = vi->name;
1128 	mdev->fops = &vop_fops;
1129 	mdev->parent = &vpdev->dev;
1130 
1131 	rc = misc_register(mdev);
1132 	if (rc)
1133 		dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1134 	return rc;
1135 }
1136 
vop_host_uninit(struct vop_info * vi)1137 void vop_host_uninit(struct vop_info *vi)
1138 {
1139 	struct list_head *pos, *tmp;
1140 	struct vop_vdev *vdev;
1141 
1142 	mutex_lock(&vi->vop_mutex);
1143 	vop_virtio_reset_devices(vi);
1144 	list_for_each_safe(pos, tmp, &vi->vdev_list) {
1145 		vdev = list_entry(pos, struct vop_vdev, list);
1146 		list_del(pos);
1147 		reinit_completion(&vdev->destroy);
1148 		mutex_unlock(&vi->vop_mutex);
1149 		mutex_lock(&vdev->vdev_mutex);
1150 		vop_virtio_del_device(vdev);
1151 		vdev->deleted = true;
1152 		mutex_unlock(&vdev->vdev_mutex);
1153 		complete(&vdev->destroy);
1154 		mutex_lock(&vi->vop_mutex);
1155 	}
1156 	mutex_unlock(&vi->vop_mutex);
1157 	misc_deregister(&vi->miscdev);
1158 }
1159