1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/eventfd.h>
16 #include <linux/slab.h>
17 #include <linux/wait.h>
18 #include <linux/dma-map-ops.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/uio.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <uapi/linux/vduse.h>
25 #include <uapi/linux/vdpa.h>
26 #include <uapi/linux/virtio_config.h>
27 #include <uapi/linux/virtio_ids.h>
28 #include <uapi/linux/virtio_blk.h>
29 #include <linux/mod_devicetable.h>
30
31 #include "iova_domain.h"
32
33 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
34 #define DRV_DESC "vDPA Device in Userspace"
35 #define DRV_LICENSE "GPL v2"
36
37 #define VDUSE_DEV_MAX (1U << MINORBITS)
38 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
39 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
40 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
41
42 struct vduse_virtqueue {
43 u16 index;
44 u16 num_max;
45 u32 num;
46 u64 desc_addr;
47 u64 driver_addr;
48 u64 device_addr;
49 struct vdpa_vq_state state;
50 bool ready;
51 bool kicked;
52 spinlock_t kick_lock;
53 spinlock_t irq_lock;
54 struct eventfd_ctx *kickfd;
55 struct vdpa_callback cb;
56 struct work_struct inject;
57 struct work_struct kick;
58 };
59
60 struct vduse_dev;
61
62 struct vduse_vdpa {
63 struct vdpa_device vdpa;
64 struct vduse_dev *dev;
65 };
66
67 struct vduse_dev {
68 struct vduse_vdpa *vdev;
69 struct device *dev;
70 struct vduse_virtqueue *vqs;
71 struct vduse_iova_domain *domain;
72 char *name;
73 struct mutex lock;
74 spinlock_t msg_lock;
75 u64 msg_unique;
76 u32 msg_timeout;
77 wait_queue_head_t waitq;
78 struct list_head send_list;
79 struct list_head recv_list;
80 struct vdpa_callback config_cb;
81 struct work_struct inject;
82 spinlock_t irq_lock;
83 struct rw_semaphore rwsem;
84 int minor;
85 bool broken;
86 bool connected;
87 u64 api_version;
88 u64 device_features;
89 u64 driver_features;
90 u32 device_id;
91 u32 vendor_id;
92 u32 generation;
93 u32 config_size;
94 void *config;
95 u8 status;
96 u32 vq_num;
97 u32 vq_align;
98 };
99
100 struct vduse_dev_msg {
101 struct vduse_dev_request req;
102 struct vduse_dev_response resp;
103 struct list_head list;
104 wait_queue_head_t waitq;
105 bool completed;
106 };
107
108 struct vduse_control {
109 u64 api_version;
110 };
111
112 static DEFINE_MUTEX(vduse_lock);
113 static DEFINE_IDR(vduse_idr);
114
115 static dev_t vduse_major;
116 static struct class *vduse_class;
117 static struct cdev vduse_ctrl_cdev;
118 static struct cdev vduse_cdev;
119 static struct workqueue_struct *vduse_irq_wq;
120
121 static u32 allowed_device_id[] = {
122 VIRTIO_ID_BLOCK,
123 };
124
vdpa_to_vduse(struct vdpa_device * vdpa)125 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
126 {
127 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
128
129 return vdev->dev;
130 }
131
dev_to_vduse(struct device * dev)132 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
133 {
134 struct vdpa_device *vdpa = dev_to_vdpa(dev);
135
136 return vdpa_to_vduse(vdpa);
137 }
138
vduse_find_msg(struct list_head * head,uint32_t request_id)139 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
140 uint32_t request_id)
141 {
142 struct vduse_dev_msg *msg;
143
144 list_for_each_entry(msg, head, list) {
145 if (msg->req.request_id == request_id) {
146 list_del(&msg->list);
147 return msg;
148 }
149 }
150
151 return NULL;
152 }
153
vduse_dequeue_msg(struct list_head * head)154 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
155 {
156 struct vduse_dev_msg *msg = NULL;
157
158 if (!list_empty(head)) {
159 msg = list_first_entry(head, struct vduse_dev_msg, list);
160 list_del(&msg->list);
161 }
162
163 return msg;
164 }
165
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)166 static void vduse_enqueue_msg(struct list_head *head,
167 struct vduse_dev_msg *msg)
168 {
169 list_add_tail(&msg->list, head);
170 }
171
vduse_dev_broken(struct vduse_dev * dev)172 static void vduse_dev_broken(struct vduse_dev *dev)
173 {
174 struct vduse_dev_msg *msg, *tmp;
175
176 if (unlikely(dev->broken))
177 return;
178
179 list_splice_init(&dev->recv_list, &dev->send_list);
180 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
181 list_del(&msg->list);
182 msg->completed = 1;
183 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
184 wake_up(&msg->waitq);
185 }
186 dev->broken = true;
187 wake_up(&dev->waitq);
188 }
189
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)190 static int vduse_dev_msg_sync(struct vduse_dev *dev,
191 struct vduse_dev_msg *msg)
192 {
193 int ret;
194
195 if (unlikely(dev->broken))
196 return -EIO;
197
198 init_waitqueue_head(&msg->waitq);
199 spin_lock(&dev->msg_lock);
200 if (unlikely(dev->broken)) {
201 spin_unlock(&dev->msg_lock);
202 return -EIO;
203 }
204 msg->req.request_id = dev->msg_unique++;
205 vduse_enqueue_msg(&dev->send_list, msg);
206 wake_up(&dev->waitq);
207 spin_unlock(&dev->msg_lock);
208 if (dev->msg_timeout)
209 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
210 (long)dev->msg_timeout * HZ);
211 else
212 ret = wait_event_killable(msg->waitq, msg->completed);
213
214 spin_lock(&dev->msg_lock);
215 if (!msg->completed) {
216 list_del(&msg->list);
217 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
218 /* Mark the device as malfunction when there is a timeout */
219 if (!ret)
220 vduse_dev_broken(dev);
221 }
222 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
223 spin_unlock(&dev->msg_lock);
224
225 return ret;
226 }
227
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)228 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
229 struct vduse_virtqueue *vq,
230 struct vdpa_vq_state_packed *packed)
231 {
232 struct vduse_dev_msg msg = { 0 };
233 int ret;
234
235 msg.req.type = VDUSE_GET_VQ_STATE;
236 msg.req.vq_state.index = vq->index;
237
238 ret = vduse_dev_msg_sync(dev, &msg);
239 if (ret)
240 return ret;
241
242 packed->last_avail_counter =
243 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
244 packed->last_avail_idx =
245 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
246 packed->last_used_counter =
247 msg.resp.vq_state.packed.last_used_counter & 0x0001;
248 packed->last_used_idx =
249 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
250
251 return 0;
252 }
253
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)254 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
255 struct vduse_virtqueue *vq,
256 struct vdpa_vq_state_split *split)
257 {
258 struct vduse_dev_msg msg = { 0 };
259 int ret;
260
261 msg.req.type = VDUSE_GET_VQ_STATE;
262 msg.req.vq_state.index = vq->index;
263
264 ret = vduse_dev_msg_sync(dev, &msg);
265 if (ret)
266 return ret;
267
268 split->avail_index = msg.resp.vq_state.split.avail_index;
269
270 return 0;
271 }
272
vduse_dev_set_status(struct vduse_dev * dev,u8 status)273 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
274 {
275 struct vduse_dev_msg msg = { 0 };
276
277 msg.req.type = VDUSE_SET_STATUS;
278 msg.req.s.status = status;
279
280 return vduse_dev_msg_sync(dev, &msg);
281 }
282
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)283 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
284 u64 start, u64 last)
285 {
286 struct vduse_dev_msg msg = { 0 };
287
288 if (last < start)
289 return -EINVAL;
290
291 msg.req.type = VDUSE_UPDATE_IOTLB;
292 msg.req.iova.start = start;
293 msg.req.iova.last = last;
294
295 return vduse_dev_msg_sync(dev, &msg);
296 }
297
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)298 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
299 {
300 struct file *file = iocb->ki_filp;
301 struct vduse_dev *dev = file->private_data;
302 struct vduse_dev_msg *msg;
303 int size = sizeof(struct vduse_dev_request);
304 ssize_t ret;
305
306 if (iov_iter_count(to) < size)
307 return -EINVAL;
308
309 spin_lock(&dev->msg_lock);
310 while (1) {
311 msg = vduse_dequeue_msg(&dev->send_list);
312 if (msg)
313 break;
314
315 ret = -EAGAIN;
316 if (file->f_flags & O_NONBLOCK)
317 goto unlock;
318
319 spin_unlock(&dev->msg_lock);
320 ret = wait_event_interruptible_exclusive(dev->waitq,
321 !list_empty(&dev->send_list));
322 if (ret)
323 return ret;
324
325 spin_lock(&dev->msg_lock);
326 }
327 spin_unlock(&dev->msg_lock);
328 ret = copy_to_iter(&msg->req, size, to);
329 spin_lock(&dev->msg_lock);
330 if (ret != size) {
331 ret = -EFAULT;
332 vduse_enqueue_msg(&dev->send_list, msg);
333 goto unlock;
334 }
335 vduse_enqueue_msg(&dev->recv_list, msg);
336 unlock:
337 spin_unlock(&dev->msg_lock);
338
339 return ret;
340 }
341
is_mem_zero(const char * ptr,int size)342 static bool is_mem_zero(const char *ptr, int size)
343 {
344 int i;
345
346 for (i = 0; i < size; i++) {
347 if (ptr[i])
348 return false;
349 }
350 return true;
351 }
352
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)353 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
354 {
355 struct file *file = iocb->ki_filp;
356 struct vduse_dev *dev = file->private_data;
357 struct vduse_dev_response resp;
358 struct vduse_dev_msg *msg;
359 size_t ret;
360
361 ret = copy_from_iter(&resp, sizeof(resp), from);
362 if (ret != sizeof(resp))
363 return -EINVAL;
364
365 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
366 return -EINVAL;
367
368 spin_lock(&dev->msg_lock);
369 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
370 if (!msg) {
371 ret = -ENOENT;
372 goto unlock;
373 }
374
375 memcpy(&msg->resp, &resp, sizeof(resp));
376 msg->completed = 1;
377 wake_up(&msg->waitq);
378 unlock:
379 spin_unlock(&dev->msg_lock);
380
381 return ret;
382 }
383
vduse_dev_poll(struct file * file,poll_table * wait)384 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
385 {
386 struct vduse_dev *dev = file->private_data;
387 __poll_t mask = 0;
388
389 poll_wait(file, &dev->waitq, wait);
390
391 spin_lock(&dev->msg_lock);
392
393 if (unlikely(dev->broken))
394 mask |= EPOLLERR;
395 if (!list_empty(&dev->send_list))
396 mask |= EPOLLIN | EPOLLRDNORM;
397 if (!list_empty(&dev->recv_list))
398 mask |= EPOLLOUT | EPOLLWRNORM;
399
400 spin_unlock(&dev->msg_lock);
401
402 return mask;
403 }
404
vduse_dev_reset(struct vduse_dev * dev)405 static void vduse_dev_reset(struct vduse_dev *dev)
406 {
407 int i;
408 struct vduse_iova_domain *domain = dev->domain;
409
410 /* The coherent mappings are handled in vduse_dev_free_coherent() */
411 if (domain->bounce_map)
412 vduse_domain_reset_bounce_map(domain);
413
414 down_write(&dev->rwsem);
415
416 dev->status = 0;
417 dev->driver_features = 0;
418 dev->generation++;
419 spin_lock(&dev->irq_lock);
420 dev->config_cb.callback = NULL;
421 dev->config_cb.private = NULL;
422 spin_unlock(&dev->irq_lock);
423 flush_work(&dev->inject);
424
425 for (i = 0; i < dev->vq_num; i++) {
426 struct vduse_virtqueue *vq = &dev->vqs[i];
427
428 vq->ready = false;
429 vq->desc_addr = 0;
430 vq->driver_addr = 0;
431 vq->device_addr = 0;
432 vq->num = 0;
433 memset(&vq->state, 0, sizeof(vq->state));
434
435 spin_lock(&vq->kick_lock);
436 vq->kicked = false;
437 if (vq->kickfd)
438 eventfd_ctx_put(vq->kickfd);
439 vq->kickfd = NULL;
440 spin_unlock(&vq->kick_lock);
441
442 spin_lock(&vq->irq_lock);
443 vq->cb.callback = NULL;
444 vq->cb.private = NULL;
445 spin_unlock(&vq->irq_lock);
446 flush_work(&vq->inject);
447 flush_work(&vq->kick);
448 }
449
450 up_write(&dev->rwsem);
451 }
452
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)453 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
454 u64 desc_area, u64 driver_area,
455 u64 device_area)
456 {
457 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
458 struct vduse_virtqueue *vq = &dev->vqs[idx];
459
460 vq->desc_addr = desc_area;
461 vq->driver_addr = driver_area;
462 vq->device_addr = device_area;
463
464 return 0;
465 }
466
vduse_vq_kick(struct vduse_virtqueue * vq)467 static void vduse_vq_kick(struct vduse_virtqueue *vq)
468 {
469 spin_lock(&vq->kick_lock);
470 if (!vq->ready)
471 goto unlock;
472
473 if (vq->kickfd)
474 eventfd_signal(vq->kickfd, 1);
475 else
476 vq->kicked = true;
477 unlock:
478 spin_unlock(&vq->kick_lock);
479 }
480
vduse_vq_kick_work(struct work_struct * work)481 static void vduse_vq_kick_work(struct work_struct *work)
482 {
483 struct vduse_virtqueue *vq = container_of(work,
484 struct vduse_virtqueue, kick);
485
486 vduse_vq_kick(vq);
487 }
488
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)489 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
490 {
491 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
492 struct vduse_virtqueue *vq = &dev->vqs[idx];
493
494 if (!eventfd_signal_allowed()) {
495 schedule_work(&vq->kick);
496 return;
497 }
498 vduse_vq_kick(vq);
499 }
500
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)501 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
502 struct vdpa_callback *cb)
503 {
504 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
505 struct vduse_virtqueue *vq = &dev->vqs[idx];
506
507 spin_lock(&vq->irq_lock);
508 vq->cb.callback = cb->callback;
509 vq->cb.private = cb->private;
510 spin_unlock(&vq->irq_lock);
511 }
512
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)513 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
514 {
515 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
516 struct vduse_virtqueue *vq = &dev->vqs[idx];
517
518 vq->num = num;
519 }
520
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)521 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
522 u16 idx, bool ready)
523 {
524 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
525 struct vduse_virtqueue *vq = &dev->vqs[idx];
526
527 vq->ready = ready;
528 }
529
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)530 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
531 {
532 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
533 struct vduse_virtqueue *vq = &dev->vqs[idx];
534
535 return vq->ready;
536 }
537
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)538 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
539 const struct vdpa_vq_state *state)
540 {
541 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542 struct vduse_virtqueue *vq = &dev->vqs[idx];
543
544 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
545 vq->state.packed.last_avail_counter =
546 state->packed.last_avail_counter;
547 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
548 vq->state.packed.last_used_counter =
549 state->packed.last_used_counter;
550 vq->state.packed.last_used_idx = state->packed.last_used_idx;
551 } else
552 vq->state.split.avail_index = state->split.avail_index;
553
554 return 0;
555 }
556
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)557 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
558 struct vdpa_vq_state *state)
559 {
560 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
561 struct vduse_virtqueue *vq = &dev->vqs[idx];
562
563 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
564 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
565
566 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
567 }
568
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)569 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
570 {
571 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
572
573 return dev->vq_align;
574 }
575
vduse_vdpa_get_features(struct vdpa_device * vdpa)576 static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
577 {
578 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
579
580 return dev->device_features;
581 }
582
vduse_vdpa_set_features(struct vdpa_device * vdpa,u64 features)583 static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
584 {
585 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
586
587 dev->driver_features = features;
588 return 0;
589 }
590
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)591 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
592 struct vdpa_callback *cb)
593 {
594 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
595
596 spin_lock(&dev->irq_lock);
597 dev->config_cb.callback = cb->callback;
598 dev->config_cb.private = cb->private;
599 spin_unlock(&dev->irq_lock);
600 }
601
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)602 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
603 {
604 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
605 u16 num_max = 0;
606 int i;
607
608 for (i = 0; i < dev->vq_num; i++)
609 if (num_max < dev->vqs[i].num_max)
610 num_max = dev->vqs[i].num_max;
611
612 return num_max;
613 }
614
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)615 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
616 {
617 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
618
619 return dev->device_id;
620 }
621
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)622 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
623 {
624 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
625
626 return dev->vendor_id;
627 }
628
vduse_vdpa_get_status(struct vdpa_device * vdpa)629 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
630 {
631 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632
633 return dev->status;
634 }
635
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)636 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
637 {
638 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
639
640 if (vduse_dev_set_status(dev, status))
641 return;
642
643 dev->status = status;
644 }
645
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)646 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
647 {
648 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
649
650 return dev->config_size;
651 }
652
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)653 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
654 void *buf, unsigned int len)
655 {
656 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
657
658 if (len > dev->config_size - offset)
659 return;
660
661 memcpy(buf, dev->config + offset, len);
662 }
663
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)664 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
665 const void *buf, unsigned int len)
666 {
667 /* Now we only support read-only configuration space */
668 }
669
vduse_vdpa_reset(struct vdpa_device * vdpa)670 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
671 {
672 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
673 int ret = vduse_dev_set_status(dev, 0);
674
675 vduse_dev_reset(dev);
676
677 return ret;
678 }
679
vduse_vdpa_get_generation(struct vdpa_device * vdpa)680 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
681 {
682 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
683
684 return dev->generation;
685 }
686
vduse_vdpa_set_map(struct vdpa_device * vdpa,struct vhost_iotlb * iotlb)687 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
688 struct vhost_iotlb *iotlb)
689 {
690 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
691 int ret;
692
693 ret = vduse_domain_set_map(dev->domain, iotlb);
694 if (ret)
695 return ret;
696
697 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
698 if (ret) {
699 vduse_domain_clear_map(dev->domain, iotlb);
700 return ret;
701 }
702
703 return 0;
704 }
705
vduse_vdpa_free(struct vdpa_device * vdpa)706 static void vduse_vdpa_free(struct vdpa_device *vdpa)
707 {
708 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
709
710 dev->vdev = NULL;
711 }
712
713 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
714 .set_vq_address = vduse_vdpa_set_vq_address,
715 .kick_vq = vduse_vdpa_kick_vq,
716 .set_vq_cb = vduse_vdpa_set_vq_cb,
717 .set_vq_num = vduse_vdpa_set_vq_num,
718 .set_vq_ready = vduse_vdpa_set_vq_ready,
719 .get_vq_ready = vduse_vdpa_get_vq_ready,
720 .set_vq_state = vduse_vdpa_set_vq_state,
721 .get_vq_state = vduse_vdpa_get_vq_state,
722 .get_vq_align = vduse_vdpa_get_vq_align,
723 .get_features = vduse_vdpa_get_features,
724 .set_features = vduse_vdpa_set_features,
725 .set_config_cb = vduse_vdpa_set_config_cb,
726 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
727 .get_device_id = vduse_vdpa_get_device_id,
728 .get_vendor_id = vduse_vdpa_get_vendor_id,
729 .get_status = vduse_vdpa_get_status,
730 .set_status = vduse_vdpa_set_status,
731 .get_config_size = vduse_vdpa_get_config_size,
732 .get_config = vduse_vdpa_get_config,
733 .set_config = vduse_vdpa_set_config,
734 .get_generation = vduse_vdpa_get_generation,
735 .reset = vduse_vdpa_reset,
736 .set_map = vduse_vdpa_set_map,
737 .free = vduse_vdpa_free,
738 };
739
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)740 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
741 unsigned long offset, size_t size,
742 enum dma_data_direction dir,
743 unsigned long attrs)
744 {
745 struct vduse_dev *vdev = dev_to_vduse(dev);
746 struct vduse_iova_domain *domain = vdev->domain;
747
748 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
749 }
750
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)751 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
752 size_t size, enum dma_data_direction dir,
753 unsigned long attrs)
754 {
755 struct vduse_dev *vdev = dev_to_vduse(dev);
756 struct vduse_iova_domain *domain = vdev->domain;
757
758 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
759 }
760
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)761 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
762 dma_addr_t *dma_addr, gfp_t flag,
763 unsigned long attrs)
764 {
765 struct vduse_dev *vdev = dev_to_vduse(dev);
766 struct vduse_iova_domain *domain = vdev->domain;
767 unsigned long iova;
768 void *addr;
769
770 *dma_addr = DMA_MAPPING_ERROR;
771 addr = vduse_domain_alloc_coherent(domain, size,
772 (dma_addr_t *)&iova, flag, attrs);
773 if (!addr)
774 return NULL;
775
776 *dma_addr = (dma_addr_t)iova;
777
778 return addr;
779 }
780
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)781 static void vduse_dev_free_coherent(struct device *dev, size_t size,
782 void *vaddr, dma_addr_t dma_addr,
783 unsigned long attrs)
784 {
785 struct vduse_dev *vdev = dev_to_vduse(dev);
786 struct vduse_iova_domain *domain = vdev->domain;
787
788 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
789 }
790
vduse_dev_max_mapping_size(struct device * dev)791 static size_t vduse_dev_max_mapping_size(struct device *dev)
792 {
793 struct vduse_dev *vdev = dev_to_vduse(dev);
794 struct vduse_iova_domain *domain = vdev->domain;
795
796 return domain->bounce_size;
797 }
798
799 static const struct dma_map_ops vduse_dev_dma_ops = {
800 .map_page = vduse_dev_map_page,
801 .unmap_page = vduse_dev_unmap_page,
802 .alloc = vduse_dev_alloc_coherent,
803 .free = vduse_dev_free_coherent,
804 .max_mapping_size = vduse_dev_max_mapping_size,
805 };
806
perm_to_file_flags(u8 perm)807 static unsigned int perm_to_file_flags(u8 perm)
808 {
809 unsigned int flags = 0;
810
811 switch (perm) {
812 case VDUSE_ACCESS_WO:
813 flags |= O_WRONLY;
814 break;
815 case VDUSE_ACCESS_RO:
816 flags |= O_RDONLY;
817 break;
818 case VDUSE_ACCESS_RW:
819 flags |= O_RDWR;
820 break;
821 default:
822 WARN(1, "invalidate vhost IOTLB permission\n");
823 break;
824 }
825
826 return flags;
827 }
828
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)829 static int vduse_kickfd_setup(struct vduse_dev *dev,
830 struct vduse_vq_eventfd *eventfd)
831 {
832 struct eventfd_ctx *ctx = NULL;
833 struct vduse_virtqueue *vq;
834 u32 index;
835
836 if (eventfd->index >= dev->vq_num)
837 return -EINVAL;
838
839 index = array_index_nospec(eventfd->index, dev->vq_num);
840 vq = &dev->vqs[index];
841 if (eventfd->fd >= 0) {
842 ctx = eventfd_ctx_fdget(eventfd->fd);
843 if (IS_ERR(ctx))
844 return PTR_ERR(ctx);
845 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
846 return 0;
847
848 spin_lock(&vq->kick_lock);
849 if (vq->kickfd)
850 eventfd_ctx_put(vq->kickfd);
851 vq->kickfd = ctx;
852 if (vq->ready && vq->kicked && vq->kickfd) {
853 eventfd_signal(vq->kickfd, 1);
854 vq->kicked = false;
855 }
856 spin_unlock(&vq->kick_lock);
857
858 return 0;
859 }
860
vduse_dev_is_ready(struct vduse_dev * dev)861 static bool vduse_dev_is_ready(struct vduse_dev *dev)
862 {
863 int i;
864
865 for (i = 0; i < dev->vq_num; i++)
866 if (!dev->vqs[i].num_max)
867 return false;
868
869 return true;
870 }
871
vduse_dev_irq_inject(struct work_struct * work)872 static void vduse_dev_irq_inject(struct work_struct *work)
873 {
874 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
875
876 spin_lock_irq(&dev->irq_lock);
877 if (dev->config_cb.callback)
878 dev->config_cb.callback(dev->config_cb.private);
879 spin_unlock_irq(&dev->irq_lock);
880 }
881
vduse_vq_irq_inject(struct work_struct * work)882 static void vduse_vq_irq_inject(struct work_struct *work)
883 {
884 struct vduse_virtqueue *vq = container_of(work,
885 struct vduse_virtqueue, inject);
886
887 spin_lock_irq(&vq->irq_lock);
888 if (vq->ready && vq->cb.callback)
889 vq->cb.callback(vq->cb.private);
890 spin_unlock_irq(&vq->irq_lock);
891 }
892
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work)893 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
894 struct work_struct *irq_work)
895 {
896 int ret = -EINVAL;
897
898 down_read(&dev->rwsem);
899 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
900 goto unlock;
901
902 ret = 0;
903 queue_work(vduse_irq_wq, irq_work);
904 unlock:
905 up_read(&dev->rwsem);
906
907 return ret;
908 }
909
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)910 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
911 unsigned long arg)
912 {
913 struct vduse_dev *dev = file->private_data;
914 void __user *argp = (void __user *)arg;
915 int ret;
916
917 if (unlikely(dev->broken))
918 return -EPERM;
919
920 switch (cmd) {
921 case VDUSE_IOTLB_GET_FD: {
922 struct vduse_iotlb_entry entry;
923 struct vhost_iotlb_map *map;
924 struct vdpa_map_file *map_file;
925 struct vduse_iova_domain *domain = dev->domain;
926 struct file *f = NULL;
927
928 ret = -EFAULT;
929 if (copy_from_user(&entry, argp, sizeof(entry)))
930 break;
931
932 ret = -EINVAL;
933 if (entry.start > entry.last)
934 break;
935
936 spin_lock(&domain->iotlb_lock);
937 map = vhost_iotlb_itree_first(domain->iotlb,
938 entry.start, entry.last);
939 if (map) {
940 map_file = (struct vdpa_map_file *)map->opaque;
941 f = get_file(map_file->file);
942 entry.offset = map_file->offset;
943 entry.start = map->start;
944 entry.last = map->last;
945 entry.perm = map->perm;
946 }
947 spin_unlock(&domain->iotlb_lock);
948 ret = -EINVAL;
949 if (!f)
950 break;
951
952 ret = -EFAULT;
953 if (copy_to_user(argp, &entry, sizeof(entry))) {
954 fput(f);
955 break;
956 }
957 ret = receive_fd(f, perm_to_file_flags(entry.perm));
958 fput(f);
959 break;
960 }
961 case VDUSE_DEV_GET_FEATURES:
962 /*
963 * Just mirror what driver wrote here.
964 * The driver is expected to check FEATURE_OK later.
965 */
966 ret = put_user(dev->driver_features, (u64 __user *)argp);
967 break;
968 case VDUSE_DEV_SET_CONFIG: {
969 struct vduse_config_data config;
970 unsigned long size = offsetof(struct vduse_config_data,
971 buffer);
972
973 ret = -EFAULT;
974 if (copy_from_user(&config, argp, size))
975 break;
976
977 ret = -EINVAL;
978 if (config.length == 0 ||
979 config.length > dev->config_size - config.offset)
980 break;
981
982 ret = -EFAULT;
983 if (copy_from_user(dev->config + config.offset, argp + size,
984 config.length))
985 break;
986
987 ret = 0;
988 break;
989 }
990 case VDUSE_DEV_INJECT_CONFIG_IRQ:
991 ret = vduse_dev_queue_irq_work(dev, &dev->inject);
992 break;
993 case VDUSE_VQ_SETUP: {
994 struct vduse_vq_config config;
995 u32 index;
996
997 ret = -EFAULT;
998 if (copy_from_user(&config, argp, sizeof(config)))
999 break;
1000
1001 ret = -EINVAL;
1002 if (config.index >= dev->vq_num)
1003 break;
1004
1005 if (!is_mem_zero((const char *)config.reserved,
1006 sizeof(config.reserved)))
1007 break;
1008
1009 index = array_index_nospec(config.index, dev->vq_num);
1010 dev->vqs[index].num_max = config.max_size;
1011 ret = 0;
1012 break;
1013 }
1014 case VDUSE_VQ_GET_INFO: {
1015 struct vduse_vq_info vq_info;
1016 struct vduse_virtqueue *vq;
1017 u32 index;
1018
1019 ret = -EFAULT;
1020 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1021 break;
1022
1023 ret = -EINVAL;
1024 if (vq_info.index >= dev->vq_num)
1025 break;
1026
1027 index = array_index_nospec(vq_info.index, dev->vq_num);
1028 vq = &dev->vqs[index];
1029 vq_info.desc_addr = vq->desc_addr;
1030 vq_info.driver_addr = vq->driver_addr;
1031 vq_info.device_addr = vq->device_addr;
1032 vq_info.num = vq->num;
1033
1034 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1035 vq_info.packed.last_avail_counter =
1036 vq->state.packed.last_avail_counter;
1037 vq_info.packed.last_avail_idx =
1038 vq->state.packed.last_avail_idx;
1039 vq_info.packed.last_used_counter =
1040 vq->state.packed.last_used_counter;
1041 vq_info.packed.last_used_idx =
1042 vq->state.packed.last_used_idx;
1043 } else
1044 vq_info.split.avail_index =
1045 vq->state.split.avail_index;
1046
1047 vq_info.ready = vq->ready;
1048
1049 ret = -EFAULT;
1050 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1051 break;
1052
1053 ret = 0;
1054 break;
1055 }
1056 case VDUSE_VQ_SETUP_KICKFD: {
1057 struct vduse_vq_eventfd eventfd;
1058
1059 ret = -EFAULT;
1060 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1061 break;
1062
1063 ret = vduse_kickfd_setup(dev, &eventfd);
1064 break;
1065 }
1066 case VDUSE_VQ_INJECT_IRQ: {
1067 u32 index;
1068
1069 ret = -EFAULT;
1070 if (get_user(index, (u32 __user *)argp))
1071 break;
1072
1073 ret = -EINVAL;
1074 if (index >= dev->vq_num)
1075 break;
1076
1077 index = array_index_nospec(index, dev->vq_num);
1078 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1079 break;
1080 }
1081 default:
1082 ret = -ENOIOCTLCMD;
1083 break;
1084 }
1085
1086 return ret;
1087 }
1088
vduse_dev_release(struct inode * inode,struct file * file)1089 static int vduse_dev_release(struct inode *inode, struct file *file)
1090 {
1091 struct vduse_dev *dev = file->private_data;
1092
1093 spin_lock(&dev->msg_lock);
1094 /* Make sure the inflight messages can processed after reconncection */
1095 list_splice_init(&dev->recv_list, &dev->send_list);
1096 spin_unlock(&dev->msg_lock);
1097 dev->connected = false;
1098
1099 return 0;
1100 }
1101
vduse_dev_get_from_minor(int minor)1102 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1103 {
1104 struct vduse_dev *dev;
1105
1106 mutex_lock(&vduse_lock);
1107 dev = idr_find(&vduse_idr, minor);
1108 mutex_unlock(&vduse_lock);
1109
1110 return dev;
1111 }
1112
vduse_dev_open(struct inode * inode,struct file * file)1113 static int vduse_dev_open(struct inode *inode, struct file *file)
1114 {
1115 int ret;
1116 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1117
1118 if (!dev)
1119 return -ENODEV;
1120
1121 ret = -EBUSY;
1122 mutex_lock(&dev->lock);
1123 if (dev->connected)
1124 goto unlock;
1125
1126 ret = 0;
1127 dev->connected = true;
1128 file->private_data = dev;
1129 unlock:
1130 mutex_unlock(&dev->lock);
1131
1132 return ret;
1133 }
1134
1135 static const struct file_operations vduse_dev_fops = {
1136 .owner = THIS_MODULE,
1137 .open = vduse_dev_open,
1138 .release = vduse_dev_release,
1139 .read_iter = vduse_dev_read_iter,
1140 .write_iter = vduse_dev_write_iter,
1141 .poll = vduse_dev_poll,
1142 .unlocked_ioctl = vduse_dev_ioctl,
1143 .compat_ioctl = compat_ptr_ioctl,
1144 .llseek = noop_llseek,
1145 };
1146
vduse_dev_create(void)1147 static struct vduse_dev *vduse_dev_create(void)
1148 {
1149 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1150
1151 if (!dev)
1152 return NULL;
1153
1154 mutex_init(&dev->lock);
1155 spin_lock_init(&dev->msg_lock);
1156 INIT_LIST_HEAD(&dev->send_list);
1157 INIT_LIST_HEAD(&dev->recv_list);
1158 spin_lock_init(&dev->irq_lock);
1159 init_rwsem(&dev->rwsem);
1160
1161 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1162 init_waitqueue_head(&dev->waitq);
1163
1164 return dev;
1165 }
1166
vduse_dev_destroy(struct vduse_dev * dev)1167 static void vduse_dev_destroy(struct vduse_dev *dev)
1168 {
1169 kfree(dev);
1170 }
1171
vduse_find_dev(const char * name)1172 static struct vduse_dev *vduse_find_dev(const char *name)
1173 {
1174 struct vduse_dev *dev;
1175 int id;
1176
1177 idr_for_each_entry(&vduse_idr, dev, id)
1178 if (!strcmp(dev->name, name))
1179 return dev;
1180
1181 return NULL;
1182 }
1183
vduse_destroy_dev(char * name)1184 static int vduse_destroy_dev(char *name)
1185 {
1186 struct vduse_dev *dev = vduse_find_dev(name);
1187
1188 if (!dev)
1189 return -EINVAL;
1190
1191 mutex_lock(&dev->lock);
1192 if (dev->vdev || dev->connected) {
1193 mutex_unlock(&dev->lock);
1194 return -EBUSY;
1195 }
1196 dev->connected = true;
1197 mutex_unlock(&dev->lock);
1198
1199 vduse_dev_reset(dev);
1200 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1201 idr_remove(&vduse_idr, dev->minor);
1202 kvfree(dev->config);
1203 kfree(dev->vqs);
1204 vduse_domain_destroy(dev->domain);
1205 kfree(dev->name);
1206 vduse_dev_destroy(dev);
1207 module_put(THIS_MODULE);
1208
1209 return 0;
1210 }
1211
device_is_allowed(u32 device_id)1212 static bool device_is_allowed(u32 device_id)
1213 {
1214 int i;
1215
1216 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1217 if (allowed_device_id[i] == device_id)
1218 return true;
1219
1220 return false;
1221 }
1222
features_is_valid(u64 features)1223 static bool features_is_valid(u64 features)
1224 {
1225 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1226 return false;
1227
1228 /* Now we only support read-only configuration space */
1229 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1230 return false;
1231
1232 return true;
1233 }
1234
vduse_validate_config(struct vduse_dev_config * config)1235 static bool vduse_validate_config(struct vduse_dev_config *config)
1236 {
1237 if (!is_mem_zero((const char *)config->reserved,
1238 sizeof(config->reserved)))
1239 return false;
1240
1241 if (config->vq_align > PAGE_SIZE)
1242 return false;
1243
1244 if (config->config_size > PAGE_SIZE)
1245 return false;
1246
1247 if (!device_is_allowed(config->device_id))
1248 return false;
1249
1250 if (!features_is_valid(config->features))
1251 return false;
1252
1253 return true;
1254 }
1255
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1256 static ssize_t msg_timeout_show(struct device *device,
1257 struct device_attribute *attr, char *buf)
1258 {
1259 struct vduse_dev *dev = dev_get_drvdata(device);
1260
1261 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1262 }
1263
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1264 static ssize_t msg_timeout_store(struct device *device,
1265 struct device_attribute *attr,
1266 const char *buf, size_t count)
1267 {
1268 struct vduse_dev *dev = dev_get_drvdata(device);
1269 int ret;
1270
1271 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1272 if (ret < 0)
1273 return ret;
1274
1275 return count;
1276 }
1277
1278 static DEVICE_ATTR_RW(msg_timeout);
1279
1280 static struct attribute *vduse_dev_attrs[] = {
1281 &dev_attr_msg_timeout.attr,
1282 NULL
1283 };
1284
1285 ATTRIBUTE_GROUPS(vduse_dev);
1286
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1287 static int vduse_create_dev(struct vduse_dev_config *config,
1288 void *config_buf, u64 api_version)
1289 {
1290 int i, ret;
1291 struct vduse_dev *dev;
1292
1293 ret = -EEXIST;
1294 if (vduse_find_dev(config->name))
1295 goto err;
1296
1297 ret = -ENOMEM;
1298 dev = vduse_dev_create();
1299 if (!dev)
1300 goto err;
1301
1302 dev->api_version = api_version;
1303 dev->device_features = config->features;
1304 dev->device_id = config->device_id;
1305 dev->vendor_id = config->vendor_id;
1306 dev->name = kstrdup(config->name, GFP_KERNEL);
1307 if (!dev->name)
1308 goto err_str;
1309
1310 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1311 VDUSE_BOUNCE_SIZE);
1312 if (!dev->domain)
1313 goto err_domain;
1314
1315 dev->config = config_buf;
1316 dev->config_size = config->config_size;
1317 dev->vq_align = config->vq_align;
1318 dev->vq_num = config->vq_num;
1319 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1320 if (!dev->vqs)
1321 goto err_vqs;
1322
1323 for (i = 0; i < dev->vq_num; i++) {
1324 dev->vqs[i].index = i;
1325 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1326 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1327 spin_lock_init(&dev->vqs[i].kick_lock);
1328 spin_lock_init(&dev->vqs[i].irq_lock);
1329 }
1330
1331 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1332 if (ret < 0)
1333 goto err_idr;
1334
1335 dev->minor = ret;
1336 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1337 dev->dev = device_create(vduse_class, NULL,
1338 MKDEV(MAJOR(vduse_major), dev->minor),
1339 dev, "%s", config->name);
1340 if (IS_ERR(dev->dev)) {
1341 ret = PTR_ERR(dev->dev);
1342 goto err_dev;
1343 }
1344 __module_get(THIS_MODULE);
1345
1346 return 0;
1347 err_dev:
1348 idr_remove(&vduse_idr, dev->minor);
1349 err_idr:
1350 kfree(dev->vqs);
1351 err_vqs:
1352 vduse_domain_destroy(dev->domain);
1353 err_domain:
1354 kfree(dev->name);
1355 err_str:
1356 vduse_dev_destroy(dev);
1357 err:
1358 kvfree(config_buf);
1359 return ret;
1360 }
1361
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1362 static long vduse_ioctl(struct file *file, unsigned int cmd,
1363 unsigned long arg)
1364 {
1365 int ret;
1366 void __user *argp = (void __user *)arg;
1367 struct vduse_control *control = file->private_data;
1368
1369 mutex_lock(&vduse_lock);
1370 switch (cmd) {
1371 case VDUSE_GET_API_VERSION:
1372 ret = put_user(control->api_version, (u64 __user *)argp);
1373 break;
1374 case VDUSE_SET_API_VERSION: {
1375 u64 api_version;
1376
1377 ret = -EFAULT;
1378 if (get_user(api_version, (u64 __user *)argp))
1379 break;
1380
1381 ret = -EINVAL;
1382 if (api_version > VDUSE_API_VERSION)
1383 break;
1384
1385 ret = 0;
1386 control->api_version = api_version;
1387 break;
1388 }
1389 case VDUSE_CREATE_DEV: {
1390 struct vduse_dev_config config;
1391 unsigned long size = offsetof(struct vduse_dev_config, config);
1392 void *buf;
1393
1394 ret = -EFAULT;
1395 if (copy_from_user(&config, argp, size))
1396 break;
1397
1398 ret = -EINVAL;
1399 if (vduse_validate_config(&config) == false)
1400 break;
1401
1402 buf = vmemdup_user(argp + size, config.config_size);
1403 if (IS_ERR(buf)) {
1404 ret = PTR_ERR(buf);
1405 break;
1406 }
1407 config.name[VDUSE_NAME_MAX - 1] = '\0';
1408 ret = vduse_create_dev(&config, buf, control->api_version);
1409 break;
1410 }
1411 case VDUSE_DESTROY_DEV: {
1412 char name[VDUSE_NAME_MAX];
1413
1414 ret = -EFAULT;
1415 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1416 break;
1417
1418 name[VDUSE_NAME_MAX - 1] = '\0';
1419 ret = vduse_destroy_dev(name);
1420 break;
1421 }
1422 default:
1423 ret = -EINVAL;
1424 break;
1425 }
1426 mutex_unlock(&vduse_lock);
1427
1428 return ret;
1429 }
1430
vduse_release(struct inode * inode,struct file * file)1431 static int vduse_release(struct inode *inode, struct file *file)
1432 {
1433 struct vduse_control *control = file->private_data;
1434
1435 kfree(control);
1436 return 0;
1437 }
1438
vduse_open(struct inode * inode,struct file * file)1439 static int vduse_open(struct inode *inode, struct file *file)
1440 {
1441 struct vduse_control *control;
1442
1443 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1444 if (!control)
1445 return -ENOMEM;
1446
1447 control->api_version = VDUSE_API_VERSION;
1448 file->private_data = control;
1449
1450 return 0;
1451 }
1452
1453 static const struct file_operations vduse_ctrl_fops = {
1454 .owner = THIS_MODULE,
1455 .open = vduse_open,
1456 .release = vduse_release,
1457 .unlocked_ioctl = vduse_ioctl,
1458 .compat_ioctl = compat_ptr_ioctl,
1459 .llseek = noop_llseek,
1460 };
1461
vduse_devnode(struct device * dev,umode_t * mode)1462 static char *vduse_devnode(struct device *dev, umode_t *mode)
1463 {
1464 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1465 }
1466
vduse_mgmtdev_release(struct device * dev)1467 static void vduse_mgmtdev_release(struct device *dev)
1468 {
1469 }
1470
1471 static struct device vduse_mgmtdev = {
1472 .init_name = "vduse",
1473 .release = vduse_mgmtdev_release,
1474 };
1475
1476 static struct vdpa_mgmt_dev mgmt_dev;
1477
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)1478 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1479 {
1480 struct vduse_vdpa *vdev;
1481 int ret;
1482
1483 if (dev->vdev)
1484 return -EEXIST;
1485
1486 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1487 &vduse_vdpa_config_ops, name, true);
1488 if (IS_ERR(vdev))
1489 return PTR_ERR(vdev);
1490
1491 dev->vdev = vdev;
1492 vdev->dev = dev;
1493 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1494 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1495 if (ret) {
1496 put_device(&vdev->vdpa.dev);
1497 return ret;
1498 }
1499 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1500 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1501 vdev->vdpa.mdev = &mgmt_dev;
1502
1503 return 0;
1504 }
1505
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name)1506 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
1507 {
1508 struct vduse_dev *dev;
1509 int ret;
1510
1511 mutex_lock(&vduse_lock);
1512 dev = vduse_find_dev(name);
1513 if (!dev || !vduse_dev_is_ready(dev)) {
1514 mutex_unlock(&vduse_lock);
1515 return -EINVAL;
1516 }
1517 ret = vduse_dev_init_vdpa(dev, name);
1518 mutex_unlock(&vduse_lock);
1519 if (ret)
1520 return ret;
1521
1522 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1523 if (ret) {
1524 put_device(&dev->vdev->vdpa.dev);
1525 return ret;
1526 }
1527
1528 return 0;
1529 }
1530
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)1531 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1532 {
1533 _vdpa_unregister_device(dev);
1534 }
1535
1536 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1537 .dev_add = vdpa_dev_add,
1538 .dev_del = vdpa_dev_del,
1539 };
1540
1541 static struct virtio_device_id id_table[] = {
1542 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1543 { 0 },
1544 };
1545
1546 static struct vdpa_mgmt_dev mgmt_dev = {
1547 .device = &vduse_mgmtdev,
1548 .id_table = id_table,
1549 .ops = &vdpa_dev_mgmtdev_ops,
1550 };
1551
vduse_mgmtdev_init(void)1552 static int vduse_mgmtdev_init(void)
1553 {
1554 int ret;
1555
1556 ret = device_register(&vduse_mgmtdev);
1557 if (ret)
1558 return ret;
1559
1560 ret = vdpa_mgmtdev_register(&mgmt_dev);
1561 if (ret)
1562 goto err;
1563
1564 return 0;
1565 err:
1566 device_unregister(&vduse_mgmtdev);
1567 return ret;
1568 }
1569
vduse_mgmtdev_exit(void)1570 static void vduse_mgmtdev_exit(void)
1571 {
1572 vdpa_mgmtdev_unregister(&mgmt_dev);
1573 device_unregister(&vduse_mgmtdev);
1574 }
1575
vduse_init(void)1576 static int vduse_init(void)
1577 {
1578 int ret;
1579 struct device *dev;
1580
1581 vduse_class = class_create(THIS_MODULE, "vduse");
1582 if (IS_ERR(vduse_class))
1583 return PTR_ERR(vduse_class);
1584
1585 vduse_class->devnode = vduse_devnode;
1586 vduse_class->dev_groups = vduse_dev_groups;
1587
1588 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1589 if (ret)
1590 goto err_chardev_region;
1591
1592 /* /dev/vduse/control */
1593 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1594 vduse_ctrl_cdev.owner = THIS_MODULE;
1595 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1596 if (ret)
1597 goto err_ctrl_cdev;
1598
1599 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1600 if (IS_ERR(dev)) {
1601 ret = PTR_ERR(dev);
1602 goto err_device;
1603 }
1604
1605 /* /dev/vduse/$DEVICE */
1606 cdev_init(&vduse_cdev, &vduse_dev_fops);
1607 vduse_cdev.owner = THIS_MODULE;
1608 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1609 VDUSE_DEV_MAX - 1);
1610 if (ret)
1611 goto err_cdev;
1612
1613 vduse_irq_wq = alloc_workqueue("vduse-irq",
1614 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1615 if (!vduse_irq_wq) {
1616 ret = -ENOMEM;
1617 goto err_wq;
1618 }
1619
1620 ret = vduse_domain_init();
1621 if (ret)
1622 goto err_domain;
1623
1624 ret = vduse_mgmtdev_init();
1625 if (ret)
1626 goto err_mgmtdev;
1627
1628 return 0;
1629 err_mgmtdev:
1630 vduse_domain_exit();
1631 err_domain:
1632 destroy_workqueue(vduse_irq_wq);
1633 err_wq:
1634 cdev_del(&vduse_cdev);
1635 err_cdev:
1636 device_destroy(vduse_class, vduse_major);
1637 err_device:
1638 cdev_del(&vduse_ctrl_cdev);
1639 err_ctrl_cdev:
1640 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1641 err_chardev_region:
1642 class_destroy(vduse_class);
1643 return ret;
1644 }
1645 module_init(vduse_init);
1646
vduse_exit(void)1647 static void vduse_exit(void)
1648 {
1649 vduse_mgmtdev_exit();
1650 vduse_domain_exit();
1651 destroy_workqueue(vduse_irq_wq);
1652 cdev_del(&vduse_cdev);
1653 device_destroy(vduse_class, vduse_major);
1654 cdev_del(&vduse_ctrl_cdev);
1655 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1656 class_destroy(vduse_class);
1657 }
1658 module_exit(vduse_exit);
1659
1660 MODULE_LICENSE(DRV_LICENSE);
1661 MODULE_AUTHOR(DRV_AUTHOR);
1662 MODULE_DESCRIPTION(DRV_DESC);
1663