1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/eventfd.h>
16 #include <linux/slab.h>
17 #include <linux/wait.h>
18 #include <linux/dma-map-ops.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/uio.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <linux/vmalloc.h>
25 #include <linux/sched/mm.h>
26 #include <uapi/linux/vduse.h>
27 #include <uapi/linux/vdpa.h>
28 #include <uapi/linux/virtio_config.h>
29 #include <uapi/linux/virtio_ids.h>
30 #include <uapi/linux/virtio_blk.h>
31 #include <linux/mod_devicetable.h>
32
33 #include "iova_domain.h"
34
35 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
36 #define DRV_DESC "vDPA Device in Userspace"
37 #define DRV_LICENSE "GPL v2"
38
39 #define VDUSE_DEV_MAX (1U << MINORBITS)
40 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
41 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
42 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
43
44 struct vduse_virtqueue {
45 u16 index;
46 u16 num_max;
47 u32 num;
48 u64 desc_addr;
49 u64 driver_addr;
50 u64 device_addr;
51 struct vdpa_vq_state state;
52 bool ready;
53 bool kicked;
54 spinlock_t kick_lock;
55 spinlock_t irq_lock;
56 struct eventfd_ctx *kickfd;
57 struct vdpa_callback cb;
58 struct work_struct inject;
59 struct work_struct kick;
60 };
61
62 struct vduse_dev;
63
64 struct vduse_vdpa {
65 struct vdpa_device vdpa;
66 struct vduse_dev *dev;
67 };
68
69 struct vduse_umem {
70 unsigned long iova;
71 unsigned long npages;
72 struct page **pages;
73 struct mm_struct *mm;
74 };
75
76 struct vduse_dev {
77 struct vduse_vdpa *vdev;
78 struct device *dev;
79 struct vduse_virtqueue *vqs;
80 struct vduse_iova_domain *domain;
81 char *name;
82 struct mutex lock;
83 spinlock_t msg_lock;
84 u64 msg_unique;
85 u32 msg_timeout;
86 wait_queue_head_t waitq;
87 struct list_head send_list;
88 struct list_head recv_list;
89 struct vdpa_callback config_cb;
90 struct work_struct inject;
91 spinlock_t irq_lock;
92 struct rw_semaphore rwsem;
93 int minor;
94 bool broken;
95 bool connected;
96 u64 api_version;
97 u64 device_features;
98 u64 driver_features;
99 u32 device_id;
100 u32 vendor_id;
101 u32 generation;
102 u32 config_size;
103 void *config;
104 u8 status;
105 u32 vq_num;
106 u32 vq_align;
107 struct vduse_umem *umem;
108 struct mutex mem_lock;
109 };
110
111 struct vduse_dev_msg {
112 struct vduse_dev_request req;
113 struct vduse_dev_response resp;
114 struct list_head list;
115 wait_queue_head_t waitq;
116 bool completed;
117 };
118
119 struct vduse_control {
120 u64 api_version;
121 };
122
123 static DEFINE_MUTEX(vduse_lock);
124 static DEFINE_IDR(vduse_idr);
125
126 static dev_t vduse_major;
127 static struct class *vduse_class;
128 static struct cdev vduse_ctrl_cdev;
129 static struct cdev vduse_cdev;
130 static struct workqueue_struct *vduse_irq_wq;
131
132 static u32 allowed_device_id[] = {
133 VIRTIO_ID_BLOCK,
134 };
135
vdpa_to_vduse(struct vdpa_device * vdpa)136 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
137 {
138 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
139
140 return vdev->dev;
141 }
142
dev_to_vduse(struct device * dev)143 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
144 {
145 struct vdpa_device *vdpa = dev_to_vdpa(dev);
146
147 return vdpa_to_vduse(vdpa);
148 }
149
vduse_find_msg(struct list_head * head,uint32_t request_id)150 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
151 uint32_t request_id)
152 {
153 struct vduse_dev_msg *msg;
154
155 list_for_each_entry(msg, head, list) {
156 if (msg->req.request_id == request_id) {
157 list_del(&msg->list);
158 return msg;
159 }
160 }
161
162 return NULL;
163 }
164
vduse_dequeue_msg(struct list_head * head)165 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
166 {
167 struct vduse_dev_msg *msg = NULL;
168
169 if (!list_empty(head)) {
170 msg = list_first_entry(head, struct vduse_dev_msg, list);
171 list_del(&msg->list);
172 }
173
174 return msg;
175 }
176
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)177 static void vduse_enqueue_msg(struct list_head *head,
178 struct vduse_dev_msg *msg)
179 {
180 list_add_tail(&msg->list, head);
181 }
182
vduse_dev_broken(struct vduse_dev * dev)183 static void vduse_dev_broken(struct vduse_dev *dev)
184 {
185 struct vduse_dev_msg *msg, *tmp;
186
187 if (unlikely(dev->broken))
188 return;
189
190 list_splice_init(&dev->recv_list, &dev->send_list);
191 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
192 list_del(&msg->list);
193 msg->completed = 1;
194 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
195 wake_up(&msg->waitq);
196 }
197 dev->broken = true;
198 wake_up(&dev->waitq);
199 }
200
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)201 static int vduse_dev_msg_sync(struct vduse_dev *dev,
202 struct vduse_dev_msg *msg)
203 {
204 int ret;
205
206 if (unlikely(dev->broken))
207 return -EIO;
208
209 init_waitqueue_head(&msg->waitq);
210 spin_lock(&dev->msg_lock);
211 if (unlikely(dev->broken)) {
212 spin_unlock(&dev->msg_lock);
213 return -EIO;
214 }
215 msg->req.request_id = dev->msg_unique++;
216 vduse_enqueue_msg(&dev->send_list, msg);
217 wake_up(&dev->waitq);
218 spin_unlock(&dev->msg_lock);
219 if (dev->msg_timeout)
220 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
221 (long)dev->msg_timeout * HZ);
222 else
223 ret = wait_event_killable(msg->waitq, msg->completed);
224
225 spin_lock(&dev->msg_lock);
226 if (!msg->completed) {
227 list_del(&msg->list);
228 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
229 /* Mark the device as malfunction when there is a timeout */
230 if (!ret)
231 vduse_dev_broken(dev);
232 }
233 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
234 spin_unlock(&dev->msg_lock);
235
236 return ret;
237 }
238
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)239 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
240 struct vduse_virtqueue *vq,
241 struct vdpa_vq_state_packed *packed)
242 {
243 struct vduse_dev_msg msg = { 0 };
244 int ret;
245
246 msg.req.type = VDUSE_GET_VQ_STATE;
247 msg.req.vq_state.index = vq->index;
248
249 ret = vduse_dev_msg_sync(dev, &msg);
250 if (ret)
251 return ret;
252
253 packed->last_avail_counter =
254 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
255 packed->last_avail_idx =
256 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
257 packed->last_used_counter =
258 msg.resp.vq_state.packed.last_used_counter & 0x0001;
259 packed->last_used_idx =
260 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
261
262 return 0;
263 }
264
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)265 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
266 struct vduse_virtqueue *vq,
267 struct vdpa_vq_state_split *split)
268 {
269 struct vduse_dev_msg msg = { 0 };
270 int ret;
271
272 msg.req.type = VDUSE_GET_VQ_STATE;
273 msg.req.vq_state.index = vq->index;
274
275 ret = vduse_dev_msg_sync(dev, &msg);
276 if (ret)
277 return ret;
278
279 split->avail_index = msg.resp.vq_state.split.avail_index;
280
281 return 0;
282 }
283
vduse_dev_set_status(struct vduse_dev * dev,u8 status)284 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
285 {
286 struct vduse_dev_msg msg = { 0 };
287
288 msg.req.type = VDUSE_SET_STATUS;
289 msg.req.s.status = status;
290
291 return vduse_dev_msg_sync(dev, &msg);
292 }
293
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)294 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
295 u64 start, u64 last)
296 {
297 struct vduse_dev_msg msg = { 0 };
298
299 if (last < start)
300 return -EINVAL;
301
302 msg.req.type = VDUSE_UPDATE_IOTLB;
303 msg.req.iova.start = start;
304 msg.req.iova.last = last;
305
306 return vduse_dev_msg_sync(dev, &msg);
307 }
308
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)309 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
310 {
311 struct file *file = iocb->ki_filp;
312 struct vduse_dev *dev = file->private_data;
313 struct vduse_dev_msg *msg;
314 int size = sizeof(struct vduse_dev_request);
315 ssize_t ret;
316
317 if (iov_iter_count(to) < size)
318 return -EINVAL;
319
320 spin_lock(&dev->msg_lock);
321 while (1) {
322 msg = vduse_dequeue_msg(&dev->send_list);
323 if (msg)
324 break;
325
326 ret = -EAGAIN;
327 if (file->f_flags & O_NONBLOCK)
328 goto unlock;
329
330 spin_unlock(&dev->msg_lock);
331 ret = wait_event_interruptible_exclusive(dev->waitq,
332 !list_empty(&dev->send_list));
333 if (ret)
334 return ret;
335
336 spin_lock(&dev->msg_lock);
337 }
338 spin_unlock(&dev->msg_lock);
339 ret = copy_to_iter(&msg->req, size, to);
340 spin_lock(&dev->msg_lock);
341 if (ret != size) {
342 ret = -EFAULT;
343 vduse_enqueue_msg(&dev->send_list, msg);
344 goto unlock;
345 }
346 vduse_enqueue_msg(&dev->recv_list, msg);
347 unlock:
348 spin_unlock(&dev->msg_lock);
349
350 return ret;
351 }
352
is_mem_zero(const char * ptr,int size)353 static bool is_mem_zero(const char *ptr, int size)
354 {
355 int i;
356
357 for (i = 0; i < size; i++) {
358 if (ptr[i])
359 return false;
360 }
361 return true;
362 }
363
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)364 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
365 {
366 struct file *file = iocb->ki_filp;
367 struct vduse_dev *dev = file->private_data;
368 struct vduse_dev_response resp;
369 struct vduse_dev_msg *msg;
370 size_t ret;
371
372 ret = copy_from_iter(&resp, sizeof(resp), from);
373 if (ret != sizeof(resp))
374 return -EINVAL;
375
376 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
377 return -EINVAL;
378
379 spin_lock(&dev->msg_lock);
380 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
381 if (!msg) {
382 ret = -ENOENT;
383 goto unlock;
384 }
385
386 memcpy(&msg->resp, &resp, sizeof(resp));
387 msg->completed = 1;
388 wake_up(&msg->waitq);
389 unlock:
390 spin_unlock(&dev->msg_lock);
391
392 return ret;
393 }
394
vduse_dev_poll(struct file * file,poll_table * wait)395 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
396 {
397 struct vduse_dev *dev = file->private_data;
398 __poll_t mask = 0;
399
400 poll_wait(file, &dev->waitq, wait);
401
402 spin_lock(&dev->msg_lock);
403
404 if (unlikely(dev->broken))
405 mask |= EPOLLERR;
406 if (!list_empty(&dev->send_list))
407 mask |= EPOLLIN | EPOLLRDNORM;
408 if (!list_empty(&dev->recv_list))
409 mask |= EPOLLOUT | EPOLLWRNORM;
410
411 spin_unlock(&dev->msg_lock);
412
413 return mask;
414 }
415
vduse_dev_reset(struct vduse_dev * dev)416 static void vduse_dev_reset(struct vduse_dev *dev)
417 {
418 int i;
419 struct vduse_iova_domain *domain = dev->domain;
420
421 /* The coherent mappings are handled in vduse_dev_free_coherent() */
422 if (domain->bounce_map)
423 vduse_domain_reset_bounce_map(domain);
424
425 down_write(&dev->rwsem);
426
427 dev->status = 0;
428 dev->driver_features = 0;
429 dev->generation++;
430 spin_lock(&dev->irq_lock);
431 dev->config_cb.callback = NULL;
432 dev->config_cb.private = NULL;
433 spin_unlock(&dev->irq_lock);
434 flush_work(&dev->inject);
435
436 for (i = 0; i < dev->vq_num; i++) {
437 struct vduse_virtqueue *vq = &dev->vqs[i];
438
439 vq->ready = false;
440 vq->desc_addr = 0;
441 vq->driver_addr = 0;
442 vq->device_addr = 0;
443 vq->num = 0;
444 memset(&vq->state, 0, sizeof(vq->state));
445
446 spin_lock(&vq->kick_lock);
447 vq->kicked = false;
448 if (vq->kickfd)
449 eventfd_ctx_put(vq->kickfd);
450 vq->kickfd = NULL;
451 spin_unlock(&vq->kick_lock);
452
453 spin_lock(&vq->irq_lock);
454 vq->cb.callback = NULL;
455 vq->cb.private = NULL;
456 spin_unlock(&vq->irq_lock);
457 flush_work(&vq->inject);
458 flush_work(&vq->kick);
459 }
460
461 up_write(&dev->rwsem);
462 }
463
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)464 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
465 u64 desc_area, u64 driver_area,
466 u64 device_area)
467 {
468 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
469 struct vduse_virtqueue *vq = &dev->vqs[idx];
470
471 vq->desc_addr = desc_area;
472 vq->driver_addr = driver_area;
473 vq->device_addr = device_area;
474
475 return 0;
476 }
477
vduse_vq_kick(struct vduse_virtqueue * vq)478 static void vduse_vq_kick(struct vduse_virtqueue *vq)
479 {
480 spin_lock(&vq->kick_lock);
481 if (!vq->ready)
482 goto unlock;
483
484 if (vq->kickfd)
485 eventfd_signal(vq->kickfd, 1);
486 else
487 vq->kicked = true;
488 unlock:
489 spin_unlock(&vq->kick_lock);
490 }
491
vduse_vq_kick_work(struct work_struct * work)492 static void vduse_vq_kick_work(struct work_struct *work)
493 {
494 struct vduse_virtqueue *vq = container_of(work,
495 struct vduse_virtqueue, kick);
496
497 vduse_vq_kick(vq);
498 }
499
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
501 {
502 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
503 struct vduse_virtqueue *vq = &dev->vqs[idx];
504
505 if (!eventfd_signal_allowed()) {
506 schedule_work(&vq->kick);
507 return;
508 }
509 vduse_vq_kick(vq);
510 }
511
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)512 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
513 struct vdpa_callback *cb)
514 {
515 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
516 struct vduse_virtqueue *vq = &dev->vqs[idx];
517
518 spin_lock(&vq->irq_lock);
519 vq->cb.callback = cb->callback;
520 vq->cb.private = cb->private;
521 spin_unlock(&vq->irq_lock);
522 }
523
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)524 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
525 {
526 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
527 struct vduse_virtqueue *vq = &dev->vqs[idx];
528
529 vq->num = num;
530 }
531
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)532 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
533 u16 idx, bool ready)
534 {
535 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
536 struct vduse_virtqueue *vq = &dev->vqs[idx];
537
538 vq->ready = ready;
539 }
540
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
542 {
543 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
544 struct vduse_virtqueue *vq = &dev->vqs[idx];
545
546 return vq->ready;
547 }
548
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)549 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
550 const struct vdpa_vq_state *state)
551 {
552 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
553 struct vduse_virtqueue *vq = &dev->vqs[idx];
554
555 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
556 vq->state.packed.last_avail_counter =
557 state->packed.last_avail_counter;
558 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
559 vq->state.packed.last_used_counter =
560 state->packed.last_used_counter;
561 vq->state.packed.last_used_idx = state->packed.last_used_idx;
562 } else
563 vq->state.split.avail_index = state->split.avail_index;
564
565 return 0;
566 }
567
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)568 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
569 struct vdpa_vq_state *state)
570 {
571 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
572 struct vduse_virtqueue *vq = &dev->vqs[idx];
573
574 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
575 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
576
577 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
578 }
579
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)580 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
581 {
582 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
583
584 return dev->vq_align;
585 }
586
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)587 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
588 {
589 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
590
591 return dev->device_features;
592 }
593
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)594 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
595 {
596 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
597
598 dev->driver_features = features;
599 return 0;
600 }
601
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)602 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
603 {
604 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
605
606 return dev->driver_features;
607 }
608
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)609 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
610 struct vdpa_callback *cb)
611 {
612 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
613
614 spin_lock(&dev->irq_lock);
615 dev->config_cb.callback = cb->callback;
616 dev->config_cb.private = cb->private;
617 spin_unlock(&dev->irq_lock);
618 }
619
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)620 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
621 {
622 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
623 u16 num_max = 0;
624 int i;
625
626 for (i = 0; i < dev->vq_num; i++)
627 if (num_max < dev->vqs[i].num_max)
628 num_max = dev->vqs[i].num_max;
629
630 return num_max;
631 }
632
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)633 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
634 {
635 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
636
637 return dev->device_id;
638 }
639
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)640 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
641 {
642 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
643
644 return dev->vendor_id;
645 }
646
vduse_vdpa_get_status(struct vdpa_device * vdpa)647 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
648 {
649 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
650
651 return dev->status;
652 }
653
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)654 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
655 {
656 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
657
658 if (vduse_dev_set_status(dev, status))
659 return;
660
661 dev->status = status;
662 }
663
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)664 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
665 {
666 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
667
668 return dev->config_size;
669 }
670
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)671 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
672 void *buf, unsigned int len)
673 {
674 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
675
676 /* Initialize the buffer in case of partial copy. */
677 memset(buf, 0, len);
678
679 if (offset > dev->config_size)
680 return;
681
682 if (len > dev->config_size - offset)
683 len = dev->config_size - offset;
684
685 memcpy(buf, dev->config + offset, len);
686 }
687
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)688 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
689 const void *buf, unsigned int len)
690 {
691 /* Now we only support read-only configuration space */
692 }
693
vduse_vdpa_reset(struct vdpa_device * vdpa)694 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
695 {
696 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
697 int ret = vduse_dev_set_status(dev, 0);
698
699 vduse_dev_reset(dev);
700
701 return ret;
702 }
703
vduse_vdpa_get_generation(struct vdpa_device * vdpa)704 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
705 {
706 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
707
708 return dev->generation;
709 }
710
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)711 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
712 unsigned int asid,
713 struct vhost_iotlb *iotlb)
714 {
715 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
716 int ret;
717
718 ret = vduse_domain_set_map(dev->domain, iotlb);
719 if (ret)
720 return ret;
721
722 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
723 if (ret) {
724 vduse_domain_clear_map(dev->domain, iotlb);
725 return ret;
726 }
727
728 return 0;
729 }
730
vduse_vdpa_free(struct vdpa_device * vdpa)731 static void vduse_vdpa_free(struct vdpa_device *vdpa)
732 {
733 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
734
735 dev->vdev = NULL;
736 }
737
738 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
739 .set_vq_address = vduse_vdpa_set_vq_address,
740 .kick_vq = vduse_vdpa_kick_vq,
741 .set_vq_cb = vduse_vdpa_set_vq_cb,
742 .set_vq_num = vduse_vdpa_set_vq_num,
743 .set_vq_ready = vduse_vdpa_set_vq_ready,
744 .get_vq_ready = vduse_vdpa_get_vq_ready,
745 .set_vq_state = vduse_vdpa_set_vq_state,
746 .get_vq_state = vduse_vdpa_get_vq_state,
747 .get_vq_align = vduse_vdpa_get_vq_align,
748 .get_device_features = vduse_vdpa_get_device_features,
749 .set_driver_features = vduse_vdpa_set_driver_features,
750 .get_driver_features = vduse_vdpa_get_driver_features,
751 .set_config_cb = vduse_vdpa_set_config_cb,
752 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
753 .get_device_id = vduse_vdpa_get_device_id,
754 .get_vendor_id = vduse_vdpa_get_vendor_id,
755 .get_status = vduse_vdpa_get_status,
756 .set_status = vduse_vdpa_set_status,
757 .get_config_size = vduse_vdpa_get_config_size,
758 .get_config = vduse_vdpa_get_config,
759 .set_config = vduse_vdpa_set_config,
760 .get_generation = vduse_vdpa_get_generation,
761 .reset = vduse_vdpa_reset,
762 .set_map = vduse_vdpa_set_map,
763 .free = vduse_vdpa_free,
764 };
765
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)766 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
767 unsigned long offset, size_t size,
768 enum dma_data_direction dir,
769 unsigned long attrs)
770 {
771 struct vduse_dev *vdev = dev_to_vduse(dev);
772 struct vduse_iova_domain *domain = vdev->domain;
773
774 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
775 }
776
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)777 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
778 size_t size, enum dma_data_direction dir,
779 unsigned long attrs)
780 {
781 struct vduse_dev *vdev = dev_to_vduse(dev);
782 struct vduse_iova_domain *domain = vdev->domain;
783
784 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
785 }
786
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)787 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
788 dma_addr_t *dma_addr, gfp_t flag,
789 unsigned long attrs)
790 {
791 struct vduse_dev *vdev = dev_to_vduse(dev);
792 struct vduse_iova_domain *domain = vdev->domain;
793 unsigned long iova;
794 void *addr;
795
796 *dma_addr = DMA_MAPPING_ERROR;
797 addr = vduse_domain_alloc_coherent(domain, size,
798 (dma_addr_t *)&iova, flag, attrs);
799 if (!addr)
800 return NULL;
801
802 *dma_addr = (dma_addr_t)iova;
803
804 return addr;
805 }
806
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)807 static void vduse_dev_free_coherent(struct device *dev, size_t size,
808 void *vaddr, dma_addr_t dma_addr,
809 unsigned long attrs)
810 {
811 struct vduse_dev *vdev = dev_to_vduse(dev);
812 struct vduse_iova_domain *domain = vdev->domain;
813
814 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
815 }
816
vduse_dev_max_mapping_size(struct device * dev)817 static size_t vduse_dev_max_mapping_size(struct device *dev)
818 {
819 struct vduse_dev *vdev = dev_to_vduse(dev);
820 struct vduse_iova_domain *domain = vdev->domain;
821
822 return domain->bounce_size;
823 }
824
825 static const struct dma_map_ops vduse_dev_dma_ops = {
826 .map_page = vduse_dev_map_page,
827 .unmap_page = vduse_dev_unmap_page,
828 .alloc = vduse_dev_alloc_coherent,
829 .free = vduse_dev_free_coherent,
830 .max_mapping_size = vduse_dev_max_mapping_size,
831 };
832
perm_to_file_flags(u8 perm)833 static unsigned int perm_to_file_flags(u8 perm)
834 {
835 unsigned int flags = 0;
836
837 switch (perm) {
838 case VDUSE_ACCESS_WO:
839 flags |= O_WRONLY;
840 break;
841 case VDUSE_ACCESS_RO:
842 flags |= O_RDONLY;
843 break;
844 case VDUSE_ACCESS_RW:
845 flags |= O_RDWR;
846 break;
847 default:
848 WARN(1, "invalidate vhost IOTLB permission\n");
849 break;
850 }
851
852 return flags;
853 }
854
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)855 static int vduse_kickfd_setup(struct vduse_dev *dev,
856 struct vduse_vq_eventfd *eventfd)
857 {
858 struct eventfd_ctx *ctx = NULL;
859 struct vduse_virtqueue *vq;
860 u32 index;
861
862 if (eventfd->index >= dev->vq_num)
863 return -EINVAL;
864
865 index = array_index_nospec(eventfd->index, dev->vq_num);
866 vq = &dev->vqs[index];
867 if (eventfd->fd >= 0) {
868 ctx = eventfd_ctx_fdget(eventfd->fd);
869 if (IS_ERR(ctx))
870 return PTR_ERR(ctx);
871 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
872 return 0;
873
874 spin_lock(&vq->kick_lock);
875 if (vq->kickfd)
876 eventfd_ctx_put(vq->kickfd);
877 vq->kickfd = ctx;
878 if (vq->ready && vq->kicked && vq->kickfd) {
879 eventfd_signal(vq->kickfd, 1);
880 vq->kicked = false;
881 }
882 spin_unlock(&vq->kick_lock);
883
884 return 0;
885 }
886
vduse_dev_is_ready(struct vduse_dev * dev)887 static bool vduse_dev_is_ready(struct vduse_dev *dev)
888 {
889 int i;
890
891 for (i = 0; i < dev->vq_num; i++)
892 if (!dev->vqs[i].num_max)
893 return false;
894
895 return true;
896 }
897
vduse_dev_irq_inject(struct work_struct * work)898 static void vduse_dev_irq_inject(struct work_struct *work)
899 {
900 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
901
902 spin_lock_irq(&dev->irq_lock);
903 if (dev->config_cb.callback)
904 dev->config_cb.callback(dev->config_cb.private);
905 spin_unlock_irq(&dev->irq_lock);
906 }
907
vduse_vq_irq_inject(struct work_struct * work)908 static void vduse_vq_irq_inject(struct work_struct *work)
909 {
910 struct vduse_virtqueue *vq = container_of(work,
911 struct vduse_virtqueue, inject);
912
913 spin_lock_irq(&vq->irq_lock);
914 if (vq->ready && vq->cb.callback)
915 vq->cb.callback(vq->cb.private);
916 spin_unlock_irq(&vq->irq_lock);
917 }
918
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work)919 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
920 struct work_struct *irq_work)
921 {
922 int ret = -EINVAL;
923
924 down_read(&dev->rwsem);
925 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
926 goto unlock;
927
928 ret = 0;
929 queue_work(vduse_irq_wq, irq_work);
930 unlock:
931 up_read(&dev->rwsem);
932
933 return ret;
934 }
935
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)936 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
937 u64 iova, u64 size)
938 {
939 int ret;
940
941 mutex_lock(&dev->mem_lock);
942 ret = -ENOENT;
943 if (!dev->umem)
944 goto unlock;
945
946 ret = -EINVAL;
947 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
948 goto unlock;
949
950 vduse_domain_remove_user_bounce_pages(dev->domain);
951 unpin_user_pages_dirty_lock(dev->umem->pages,
952 dev->umem->npages, true);
953 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
954 mmdrop(dev->umem->mm);
955 vfree(dev->umem->pages);
956 kfree(dev->umem);
957 dev->umem = NULL;
958 ret = 0;
959 unlock:
960 mutex_unlock(&dev->mem_lock);
961 return ret;
962 }
963
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)964 static int vduse_dev_reg_umem(struct vduse_dev *dev,
965 u64 iova, u64 uaddr, u64 size)
966 {
967 struct page **page_list = NULL;
968 struct vduse_umem *umem = NULL;
969 long pinned = 0;
970 unsigned long npages, lock_limit;
971 int ret;
972
973 if (!dev->domain->bounce_map ||
974 size != dev->domain->bounce_size ||
975 iova != 0 || uaddr & ~PAGE_MASK)
976 return -EINVAL;
977
978 mutex_lock(&dev->mem_lock);
979 ret = -EEXIST;
980 if (dev->umem)
981 goto unlock;
982
983 ret = -ENOMEM;
984 npages = size >> PAGE_SHIFT;
985 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
986 GFP_KERNEL_ACCOUNT);
987 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
988 if (!page_list || !umem)
989 goto unlock;
990
991 mmap_read_lock(current->mm);
992
993 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
994 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
995 goto out;
996
997 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
998 page_list, NULL);
999 if (pinned != npages) {
1000 ret = pinned < 0 ? pinned : -ENOMEM;
1001 goto out;
1002 }
1003
1004 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1005 page_list, pinned);
1006 if (ret)
1007 goto out;
1008
1009 atomic64_add(npages, ¤t->mm->pinned_vm);
1010
1011 umem->pages = page_list;
1012 umem->npages = pinned;
1013 umem->iova = iova;
1014 umem->mm = current->mm;
1015 mmgrab(current->mm);
1016
1017 dev->umem = umem;
1018 out:
1019 if (ret && pinned > 0)
1020 unpin_user_pages(page_list, pinned);
1021
1022 mmap_read_unlock(current->mm);
1023 unlock:
1024 if (ret) {
1025 vfree(page_list);
1026 kfree(umem);
1027 }
1028 mutex_unlock(&dev->mem_lock);
1029 return ret;
1030 }
1031
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1032 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1033 unsigned long arg)
1034 {
1035 struct vduse_dev *dev = file->private_data;
1036 void __user *argp = (void __user *)arg;
1037 int ret;
1038
1039 if (unlikely(dev->broken))
1040 return -EPERM;
1041
1042 switch (cmd) {
1043 case VDUSE_IOTLB_GET_FD: {
1044 struct vduse_iotlb_entry entry;
1045 struct vhost_iotlb_map *map;
1046 struct vdpa_map_file *map_file;
1047 struct vduse_iova_domain *domain = dev->domain;
1048 struct file *f = NULL;
1049
1050 ret = -EFAULT;
1051 if (copy_from_user(&entry, argp, sizeof(entry)))
1052 break;
1053
1054 ret = -EINVAL;
1055 if (entry.start > entry.last)
1056 break;
1057
1058 spin_lock(&domain->iotlb_lock);
1059 map = vhost_iotlb_itree_first(domain->iotlb,
1060 entry.start, entry.last);
1061 if (map) {
1062 map_file = (struct vdpa_map_file *)map->opaque;
1063 f = get_file(map_file->file);
1064 entry.offset = map_file->offset;
1065 entry.start = map->start;
1066 entry.last = map->last;
1067 entry.perm = map->perm;
1068 }
1069 spin_unlock(&domain->iotlb_lock);
1070 ret = -EINVAL;
1071 if (!f)
1072 break;
1073
1074 ret = -EFAULT;
1075 if (copy_to_user(argp, &entry, sizeof(entry))) {
1076 fput(f);
1077 break;
1078 }
1079 ret = receive_fd(f, perm_to_file_flags(entry.perm));
1080 fput(f);
1081 break;
1082 }
1083 case VDUSE_DEV_GET_FEATURES:
1084 /*
1085 * Just mirror what driver wrote here.
1086 * The driver is expected to check FEATURE_OK later.
1087 */
1088 ret = put_user(dev->driver_features, (u64 __user *)argp);
1089 break;
1090 case VDUSE_DEV_SET_CONFIG: {
1091 struct vduse_config_data config;
1092 unsigned long size = offsetof(struct vduse_config_data,
1093 buffer);
1094
1095 ret = -EFAULT;
1096 if (copy_from_user(&config, argp, size))
1097 break;
1098
1099 ret = -EINVAL;
1100 if (config.offset > dev->config_size ||
1101 config.length == 0 ||
1102 config.length > dev->config_size - config.offset)
1103 break;
1104
1105 ret = -EFAULT;
1106 if (copy_from_user(dev->config + config.offset, argp + size,
1107 config.length))
1108 break;
1109
1110 ret = 0;
1111 break;
1112 }
1113 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1114 ret = vduse_dev_queue_irq_work(dev, &dev->inject);
1115 break;
1116 case VDUSE_VQ_SETUP: {
1117 struct vduse_vq_config config;
1118 u32 index;
1119
1120 ret = -EFAULT;
1121 if (copy_from_user(&config, argp, sizeof(config)))
1122 break;
1123
1124 ret = -EINVAL;
1125 if (config.index >= dev->vq_num)
1126 break;
1127
1128 if (!is_mem_zero((const char *)config.reserved,
1129 sizeof(config.reserved)))
1130 break;
1131
1132 index = array_index_nospec(config.index, dev->vq_num);
1133 dev->vqs[index].num_max = config.max_size;
1134 ret = 0;
1135 break;
1136 }
1137 case VDUSE_VQ_GET_INFO: {
1138 struct vduse_vq_info vq_info;
1139 struct vduse_virtqueue *vq;
1140 u32 index;
1141
1142 ret = -EFAULT;
1143 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1144 break;
1145
1146 ret = -EINVAL;
1147 if (vq_info.index >= dev->vq_num)
1148 break;
1149
1150 index = array_index_nospec(vq_info.index, dev->vq_num);
1151 vq = &dev->vqs[index];
1152 vq_info.desc_addr = vq->desc_addr;
1153 vq_info.driver_addr = vq->driver_addr;
1154 vq_info.device_addr = vq->device_addr;
1155 vq_info.num = vq->num;
1156
1157 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1158 vq_info.packed.last_avail_counter =
1159 vq->state.packed.last_avail_counter;
1160 vq_info.packed.last_avail_idx =
1161 vq->state.packed.last_avail_idx;
1162 vq_info.packed.last_used_counter =
1163 vq->state.packed.last_used_counter;
1164 vq_info.packed.last_used_idx =
1165 vq->state.packed.last_used_idx;
1166 } else
1167 vq_info.split.avail_index =
1168 vq->state.split.avail_index;
1169
1170 vq_info.ready = vq->ready;
1171
1172 ret = -EFAULT;
1173 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1174 break;
1175
1176 ret = 0;
1177 break;
1178 }
1179 case VDUSE_VQ_SETUP_KICKFD: {
1180 struct vduse_vq_eventfd eventfd;
1181
1182 ret = -EFAULT;
1183 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1184 break;
1185
1186 ret = vduse_kickfd_setup(dev, &eventfd);
1187 break;
1188 }
1189 case VDUSE_VQ_INJECT_IRQ: {
1190 u32 index;
1191
1192 ret = -EFAULT;
1193 if (get_user(index, (u32 __user *)argp))
1194 break;
1195
1196 ret = -EINVAL;
1197 if (index >= dev->vq_num)
1198 break;
1199
1200 index = array_index_nospec(index, dev->vq_num);
1201 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1202 break;
1203 }
1204 case VDUSE_IOTLB_REG_UMEM: {
1205 struct vduse_iova_umem umem;
1206
1207 ret = -EFAULT;
1208 if (copy_from_user(&umem, argp, sizeof(umem)))
1209 break;
1210
1211 ret = -EINVAL;
1212 if (!is_mem_zero((const char *)umem.reserved,
1213 sizeof(umem.reserved)))
1214 break;
1215
1216 ret = vduse_dev_reg_umem(dev, umem.iova,
1217 umem.uaddr, umem.size);
1218 break;
1219 }
1220 case VDUSE_IOTLB_DEREG_UMEM: {
1221 struct vduse_iova_umem umem;
1222
1223 ret = -EFAULT;
1224 if (copy_from_user(&umem, argp, sizeof(umem)))
1225 break;
1226
1227 ret = -EINVAL;
1228 if (!is_mem_zero((const char *)umem.reserved,
1229 sizeof(umem.reserved)))
1230 break;
1231
1232 ret = vduse_dev_dereg_umem(dev, umem.iova,
1233 umem.size);
1234 break;
1235 }
1236 case VDUSE_IOTLB_GET_INFO: {
1237 struct vduse_iova_info info;
1238 struct vhost_iotlb_map *map;
1239 struct vduse_iova_domain *domain = dev->domain;
1240
1241 ret = -EFAULT;
1242 if (copy_from_user(&info, argp, sizeof(info)))
1243 break;
1244
1245 ret = -EINVAL;
1246 if (info.start > info.last)
1247 break;
1248
1249 if (!is_mem_zero((const char *)info.reserved,
1250 sizeof(info.reserved)))
1251 break;
1252
1253 spin_lock(&domain->iotlb_lock);
1254 map = vhost_iotlb_itree_first(domain->iotlb,
1255 info.start, info.last);
1256 if (map) {
1257 info.start = map->start;
1258 info.last = map->last;
1259 info.capability = 0;
1260 if (domain->bounce_map && map->start == 0 &&
1261 map->last == domain->bounce_size - 1)
1262 info.capability |= VDUSE_IOVA_CAP_UMEM;
1263 }
1264 spin_unlock(&domain->iotlb_lock);
1265 if (!map)
1266 break;
1267
1268 ret = -EFAULT;
1269 if (copy_to_user(argp, &info, sizeof(info)))
1270 break;
1271
1272 ret = 0;
1273 break;
1274 }
1275 default:
1276 ret = -ENOIOCTLCMD;
1277 break;
1278 }
1279
1280 return ret;
1281 }
1282
vduse_dev_release(struct inode * inode,struct file * file)1283 static int vduse_dev_release(struct inode *inode, struct file *file)
1284 {
1285 struct vduse_dev *dev = file->private_data;
1286
1287 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1288 spin_lock(&dev->msg_lock);
1289 /* Make sure the inflight messages can processed after reconncection */
1290 list_splice_init(&dev->recv_list, &dev->send_list);
1291 spin_unlock(&dev->msg_lock);
1292 dev->connected = false;
1293
1294 return 0;
1295 }
1296
vduse_dev_get_from_minor(int minor)1297 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1298 {
1299 struct vduse_dev *dev;
1300
1301 mutex_lock(&vduse_lock);
1302 dev = idr_find(&vduse_idr, minor);
1303 mutex_unlock(&vduse_lock);
1304
1305 return dev;
1306 }
1307
vduse_dev_open(struct inode * inode,struct file * file)1308 static int vduse_dev_open(struct inode *inode, struct file *file)
1309 {
1310 int ret;
1311 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1312
1313 if (!dev)
1314 return -ENODEV;
1315
1316 ret = -EBUSY;
1317 mutex_lock(&dev->lock);
1318 if (dev->connected)
1319 goto unlock;
1320
1321 ret = 0;
1322 dev->connected = true;
1323 file->private_data = dev;
1324 unlock:
1325 mutex_unlock(&dev->lock);
1326
1327 return ret;
1328 }
1329
1330 static const struct file_operations vduse_dev_fops = {
1331 .owner = THIS_MODULE,
1332 .open = vduse_dev_open,
1333 .release = vduse_dev_release,
1334 .read_iter = vduse_dev_read_iter,
1335 .write_iter = vduse_dev_write_iter,
1336 .poll = vduse_dev_poll,
1337 .unlocked_ioctl = vduse_dev_ioctl,
1338 .compat_ioctl = compat_ptr_ioctl,
1339 .llseek = noop_llseek,
1340 };
1341
vduse_dev_create(void)1342 static struct vduse_dev *vduse_dev_create(void)
1343 {
1344 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1345
1346 if (!dev)
1347 return NULL;
1348
1349 mutex_init(&dev->lock);
1350 mutex_init(&dev->mem_lock);
1351 spin_lock_init(&dev->msg_lock);
1352 INIT_LIST_HEAD(&dev->send_list);
1353 INIT_LIST_HEAD(&dev->recv_list);
1354 spin_lock_init(&dev->irq_lock);
1355 init_rwsem(&dev->rwsem);
1356
1357 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1358 init_waitqueue_head(&dev->waitq);
1359
1360 return dev;
1361 }
1362
vduse_dev_destroy(struct vduse_dev * dev)1363 static void vduse_dev_destroy(struct vduse_dev *dev)
1364 {
1365 kfree(dev);
1366 }
1367
vduse_find_dev(const char * name)1368 static struct vduse_dev *vduse_find_dev(const char *name)
1369 {
1370 struct vduse_dev *dev;
1371 int id;
1372
1373 idr_for_each_entry(&vduse_idr, dev, id)
1374 if (!strcmp(dev->name, name))
1375 return dev;
1376
1377 return NULL;
1378 }
1379
vduse_destroy_dev(char * name)1380 static int vduse_destroy_dev(char *name)
1381 {
1382 struct vduse_dev *dev = vduse_find_dev(name);
1383
1384 if (!dev)
1385 return -EINVAL;
1386
1387 mutex_lock(&dev->lock);
1388 if (dev->vdev || dev->connected) {
1389 mutex_unlock(&dev->lock);
1390 return -EBUSY;
1391 }
1392 dev->connected = true;
1393 mutex_unlock(&dev->lock);
1394
1395 vduse_dev_reset(dev);
1396 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1397 idr_remove(&vduse_idr, dev->minor);
1398 kvfree(dev->config);
1399 kfree(dev->vqs);
1400 vduse_domain_destroy(dev->domain);
1401 kfree(dev->name);
1402 vduse_dev_destroy(dev);
1403 module_put(THIS_MODULE);
1404
1405 return 0;
1406 }
1407
device_is_allowed(u32 device_id)1408 static bool device_is_allowed(u32 device_id)
1409 {
1410 int i;
1411
1412 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1413 if (allowed_device_id[i] == device_id)
1414 return true;
1415
1416 return false;
1417 }
1418
features_is_valid(u64 features)1419 static bool features_is_valid(u64 features)
1420 {
1421 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1422 return false;
1423
1424 /* Now we only support read-only configuration space */
1425 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1426 return false;
1427
1428 return true;
1429 }
1430
vduse_validate_config(struct vduse_dev_config * config)1431 static bool vduse_validate_config(struct vduse_dev_config *config)
1432 {
1433 if (!is_mem_zero((const char *)config->reserved,
1434 sizeof(config->reserved)))
1435 return false;
1436
1437 if (config->vq_align > PAGE_SIZE)
1438 return false;
1439
1440 if (config->config_size > PAGE_SIZE)
1441 return false;
1442
1443 if (!device_is_allowed(config->device_id))
1444 return false;
1445
1446 if (!features_is_valid(config->features))
1447 return false;
1448
1449 return true;
1450 }
1451
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1452 static ssize_t msg_timeout_show(struct device *device,
1453 struct device_attribute *attr, char *buf)
1454 {
1455 struct vduse_dev *dev = dev_get_drvdata(device);
1456
1457 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1458 }
1459
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1460 static ssize_t msg_timeout_store(struct device *device,
1461 struct device_attribute *attr,
1462 const char *buf, size_t count)
1463 {
1464 struct vduse_dev *dev = dev_get_drvdata(device);
1465 int ret;
1466
1467 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1468 if (ret < 0)
1469 return ret;
1470
1471 return count;
1472 }
1473
1474 static DEVICE_ATTR_RW(msg_timeout);
1475
1476 static struct attribute *vduse_dev_attrs[] = {
1477 &dev_attr_msg_timeout.attr,
1478 NULL
1479 };
1480
1481 ATTRIBUTE_GROUPS(vduse_dev);
1482
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1483 static int vduse_create_dev(struct vduse_dev_config *config,
1484 void *config_buf, u64 api_version)
1485 {
1486 int i, ret;
1487 struct vduse_dev *dev;
1488
1489 ret = -EEXIST;
1490 if (vduse_find_dev(config->name))
1491 goto err;
1492
1493 ret = -ENOMEM;
1494 dev = vduse_dev_create();
1495 if (!dev)
1496 goto err;
1497
1498 dev->api_version = api_version;
1499 dev->device_features = config->features;
1500 dev->device_id = config->device_id;
1501 dev->vendor_id = config->vendor_id;
1502 dev->name = kstrdup(config->name, GFP_KERNEL);
1503 if (!dev->name)
1504 goto err_str;
1505
1506 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1507 VDUSE_BOUNCE_SIZE);
1508 if (!dev->domain)
1509 goto err_domain;
1510
1511 dev->config = config_buf;
1512 dev->config_size = config->config_size;
1513 dev->vq_align = config->vq_align;
1514 dev->vq_num = config->vq_num;
1515 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1516 if (!dev->vqs)
1517 goto err_vqs;
1518
1519 for (i = 0; i < dev->vq_num; i++) {
1520 dev->vqs[i].index = i;
1521 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1522 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1523 spin_lock_init(&dev->vqs[i].kick_lock);
1524 spin_lock_init(&dev->vqs[i].irq_lock);
1525 }
1526
1527 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1528 if (ret < 0)
1529 goto err_idr;
1530
1531 dev->minor = ret;
1532 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1533 dev->dev = device_create_with_groups(vduse_class, NULL,
1534 MKDEV(MAJOR(vduse_major), dev->minor),
1535 dev, vduse_dev_groups, "%s", config->name);
1536 if (IS_ERR(dev->dev)) {
1537 ret = PTR_ERR(dev->dev);
1538 goto err_dev;
1539 }
1540 __module_get(THIS_MODULE);
1541
1542 return 0;
1543 err_dev:
1544 idr_remove(&vduse_idr, dev->minor);
1545 err_idr:
1546 kfree(dev->vqs);
1547 err_vqs:
1548 vduse_domain_destroy(dev->domain);
1549 err_domain:
1550 kfree(dev->name);
1551 err_str:
1552 vduse_dev_destroy(dev);
1553 err:
1554 return ret;
1555 }
1556
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1557 static long vduse_ioctl(struct file *file, unsigned int cmd,
1558 unsigned long arg)
1559 {
1560 int ret;
1561 void __user *argp = (void __user *)arg;
1562 struct vduse_control *control = file->private_data;
1563
1564 mutex_lock(&vduse_lock);
1565 switch (cmd) {
1566 case VDUSE_GET_API_VERSION:
1567 ret = put_user(control->api_version, (u64 __user *)argp);
1568 break;
1569 case VDUSE_SET_API_VERSION: {
1570 u64 api_version;
1571
1572 ret = -EFAULT;
1573 if (get_user(api_version, (u64 __user *)argp))
1574 break;
1575
1576 ret = -EINVAL;
1577 if (api_version > VDUSE_API_VERSION)
1578 break;
1579
1580 ret = 0;
1581 control->api_version = api_version;
1582 break;
1583 }
1584 case VDUSE_CREATE_DEV: {
1585 struct vduse_dev_config config;
1586 unsigned long size = offsetof(struct vduse_dev_config, config);
1587 void *buf;
1588
1589 ret = -EFAULT;
1590 if (copy_from_user(&config, argp, size))
1591 break;
1592
1593 ret = -EINVAL;
1594 if (vduse_validate_config(&config) == false)
1595 break;
1596
1597 buf = vmemdup_user(argp + size, config.config_size);
1598 if (IS_ERR(buf)) {
1599 ret = PTR_ERR(buf);
1600 break;
1601 }
1602 config.name[VDUSE_NAME_MAX - 1] = '\0';
1603 ret = vduse_create_dev(&config, buf, control->api_version);
1604 if (ret)
1605 kvfree(buf);
1606 break;
1607 }
1608 case VDUSE_DESTROY_DEV: {
1609 char name[VDUSE_NAME_MAX];
1610
1611 ret = -EFAULT;
1612 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1613 break;
1614
1615 name[VDUSE_NAME_MAX - 1] = '\0';
1616 ret = vduse_destroy_dev(name);
1617 break;
1618 }
1619 default:
1620 ret = -EINVAL;
1621 break;
1622 }
1623 mutex_unlock(&vduse_lock);
1624
1625 return ret;
1626 }
1627
vduse_release(struct inode * inode,struct file * file)1628 static int vduse_release(struct inode *inode, struct file *file)
1629 {
1630 struct vduse_control *control = file->private_data;
1631
1632 kfree(control);
1633 return 0;
1634 }
1635
vduse_open(struct inode * inode,struct file * file)1636 static int vduse_open(struct inode *inode, struct file *file)
1637 {
1638 struct vduse_control *control;
1639
1640 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1641 if (!control)
1642 return -ENOMEM;
1643
1644 control->api_version = VDUSE_API_VERSION;
1645 file->private_data = control;
1646
1647 return 0;
1648 }
1649
1650 static const struct file_operations vduse_ctrl_fops = {
1651 .owner = THIS_MODULE,
1652 .open = vduse_open,
1653 .release = vduse_release,
1654 .unlocked_ioctl = vduse_ioctl,
1655 .compat_ioctl = compat_ptr_ioctl,
1656 .llseek = noop_llseek,
1657 };
1658
vduse_devnode(struct device * dev,umode_t * mode)1659 static char *vduse_devnode(struct device *dev, umode_t *mode)
1660 {
1661 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1662 }
1663
1664 struct vduse_mgmt_dev {
1665 struct vdpa_mgmt_dev mgmt_dev;
1666 struct device dev;
1667 };
1668
1669 static struct vduse_mgmt_dev *vduse_mgmt;
1670
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)1671 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1672 {
1673 struct vduse_vdpa *vdev;
1674 int ret;
1675
1676 if (dev->vdev)
1677 return -EEXIST;
1678
1679 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1680 &vduse_vdpa_config_ops, 1, 1, name, true);
1681 if (IS_ERR(vdev))
1682 return PTR_ERR(vdev);
1683
1684 dev->vdev = vdev;
1685 vdev->dev = dev;
1686 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1687 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1688 if (ret) {
1689 put_device(&vdev->vdpa.dev);
1690 return ret;
1691 }
1692 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1693 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1694 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1695
1696 return 0;
1697 }
1698
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)1699 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1700 const struct vdpa_dev_set_config *config)
1701 {
1702 struct vduse_dev *dev;
1703 int ret;
1704
1705 mutex_lock(&vduse_lock);
1706 dev = vduse_find_dev(name);
1707 if (!dev || !vduse_dev_is_ready(dev)) {
1708 mutex_unlock(&vduse_lock);
1709 return -EINVAL;
1710 }
1711 ret = vduse_dev_init_vdpa(dev, name);
1712 mutex_unlock(&vduse_lock);
1713 if (ret)
1714 return ret;
1715
1716 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1717 if (ret) {
1718 put_device(&dev->vdev->vdpa.dev);
1719 return ret;
1720 }
1721
1722 return 0;
1723 }
1724
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)1725 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1726 {
1727 _vdpa_unregister_device(dev);
1728 }
1729
1730 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1731 .dev_add = vdpa_dev_add,
1732 .dev_del = vdpa_dev_del,
1733 };
1734
1735 static struct virtio_device_id id_table[] = {
1736 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1737 { 0 },
1738 };
1739
vduse_mgmtdev_release(struct device * dev)1740 static void vduse_mgmtdev_release(struct device *dev)
1741 {
1742 struct vduse_mgmt_dev *mgmt_dev;
1743
1744 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
1745 kfree(mgmt_dev);
1746 }
1747
vduse_mgmtdev_init(void)1748 static int vduse_mgmtdev_init(void)
1749 {
1750 int ret;
1751
1752 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
1753 if (!vduse_mgmt)
1754 return -ENOMEM;
1755
1756 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
1757 if (ret) {
1758 kfree(vduse_mgmt);
1759 return ret;
1760 }
1761
1762 vduse_mgmt->dev.release = vduse_mgmtdev_release;
1763
1764 ret = device_register(&vduse_mgmt->dev);
1765 if (ret)
1766 goto dev_reg_err;
1767
1768 vduse_mgmt->mgmt_dev.id_table = id_table;
1769 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
1770 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
1771 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
1772 if (ret)
1773 device_unregister(&vduse_mgmt->dev);
1774
1775 return ret;
1776
1777 dev_reg_err:
1778 put_device(&vduse_mgmt->dev);
1779 return ret;
1780 }
1781
vduse_mgmtdev_exit(void)1782 static void vduse_mgmtdev_exit(void)
1783 {
1784 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
1785 device_unregister(&vduse_mgmt->dev);
1786 }
1787
vduse_init(void)1788 static int vduse_init(void)
1789 {
1790 int ret;
1791 struct device *dev;
1792
1793 vduse_class = class_create(THIS_MODULE, "vduse");
1794 if (IS_ERR(vduse_class))
1795 return PTR_ERR(vduse_class);
1796
1797 vduse_class->devnode = vduse_devnode;
1798
1799 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1800 if (ret)
1801 goto err_chardev_region;
1802
1803 /* /dev/vduse/control */
1804 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1805 vduse_ctrl_cdev.owner = THIS_MODULE;
1806 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1807 if (ret)
1808 goto err_ctrl_cdev;
1809
1810 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1811 if (IS_ERR(dev)) {
1812 ret = PTR_ERR(dev);
1813 goto err_device;
1814 }
1815
1816 /* /dev/vduse/$DEVICE */
1817 cdev_init(&vduse_cdev, &vduse_dev_fops);
1818 vduse_cdev.owner = THIS_MODULE;
1819 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1820 VDUSE_DEV_MAX - 1);
1821 if (ret)
1822 goto err_cdev;
1823
1824 vduse_irq_wq = alloc_workqueue("vduse-irq",
1825 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1826 if (!vduse_irq_wq) {
1827 ret = -ENOMEM;
1828 goto err_wq;
1829 }
1830
1831 ret = vduse_domain_init();
1832 if (ret)
1833 goto err_domain;
1834
1835 ret = vduse_mgmtdev_init();
1836 if (ret)
1837 goto err_mgmtdev;
1838
1839 return 0;
1840 err_mgmtdev:
1841 vduse_domain_exit();
1842 err_domain:
1843 destroy_workqueue(vduse_irq_wq);
1844 err_wq:
1845 cdev_del(&vduse_cdev);
1846 err_cdev:
1847 device_destroy(vduse_class, vduse_major);
1848 err_device:
1849 cdev_del(&vduse_ctrl_cdev);
1850 err_ctrl_cdev:
1851 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1852 err_chardev_region:
1853 class_destroy(vduse_class);
1854 return ret;
1855 }
1856 module_init(vduse_init);
1857
vduse_exit(void)1858 static void vduse_exit(void)
1859 {
1860 vduse_mgmtdev_exit();
1861 vduse_domain_exit();
1862 destroy_workqueue(vduse_irq_wq);
1863 cdev_del(&vduse_cdev);
1864 device_destroy(vduse_class, vduse_major);
1865 cdev_del(&vduse_ctrl_cdev);
1866 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1867 class_destroy(vduse_class);
1868 }
1869 module_exit(vduse_exit);
1870
1871 MODULE_LICENSE(DRV_LICENSE);
1872 MODULE_AUTHOR(DRV_AUTHOR);
1873 MODULE_DESCRIPTION(DRV_DESC);
1874