1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 
20 #include "vfio_pci_private.h"
21 
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64	ioread64
24 #define vfio_iowrite64	iowrite64
25 #define vfio_ioread32	ioread32
26 #define vfio_iowrite32	iowrite32
27 #define vfio_ioread16	ioread16
28 #define vfio_iowrite16	iowrite16
29 #else
30 #define vfio_ioread64	ioread64be
31 #define vfio_iowrite64	iowrite64be
32 #define vfio_ioread32	ioread32be
33 #define vfio_iowrite32	iowrite32be
34 #define vfio_ioread16	ioread16be
35 #define vfio_iowrite16	iowrite16be
36 #endif
37 #define vfio_ioread8	ioread8
38 #define vfio_iowrite8	iowrite8
39 
40 /*
41  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
42  * range which is inaccessible.  The excluded range drops writes and fills
43  * reads with -1.  This is intended for handling MSI-X vector tables and
44  * leftover space for ROM BARs.
45  */
do_io_rw(void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite)46 static ssize_t do_io_rw(void __iomem *io, char __user *buf,
47 			loff_t off, size_t count, size_t x_start,
48 			size_t x_end, bool iswrite)
49 {
50 	ssize_t done = 0;
51 
52 	while (count) {
53 		size_t fillable, filled;
54 
55 		if (off < x_start)
56 			fillable = min(count, (size_t)(x_start - off));
57 		else if (off >= x_end)
58 			fillable = count;
59 		else
60 			fillable = 0;
61 
62 		if (fillable >= 4 && !(off % 4)) {
63 			u32 val;
64 
65 			if (iswrite) {
66 				if (copy_from_user(&val, buf, 4))
67 					return -EFAULT;
68 
69 				vfio_iowrite32(val, io + off);
70 			} else {
71 				val = vfio_ioread32(io + off);
72 
73 				if (copy_to_user(buf, &val, 4))
74 					return -EFAULT;
75 			}
76 
77 			filled = 4;
78 		} else if (fillable >= 2 && !(off % 2)) {
79 			u16 val;
80 
81 			if (iswrite) {
82 				if (copy_from_user(&val, buf, 2))
83 					return -EFAULT;
84 
85 				vfio_iowrite16(val, io + off);
86 			} else {
87 				val = vfio_ioread16(io + off);
88 
89 				if (copy_to_user(buf, &val, 2))
90 					return -EFAULT;
91 			}
92 
93 			filled = 2;
94 		} else if (fillable) {
95 			u8 val;
96 
97 			if (iswrite) {
98 				if (copy_from_user(&val, buf, 1))
99 					return -EFAULT;
100 
101 				vfio_iowrite8(val, io + off);
102 			} else {
103 				val = vfio_ioread8(io + off);
104 
105 				if (copy_to_user(buf, &val, 1))
106 					return -EFAULT;
107 			}
108 
109 			filled = 1;
110 		} else {
111 			/* Fill reads with -1, drop writes */
112 			filled = min(count, (size_t)(x_end - off));
113 			if (!iswrite) {
114 				u8 val = 0xFF;
115 				size_t i;
116 
117 				for (i = 0; i < filled; i++)
118 					if (copy_to_user(buf + i, &val, 1))
119 						return -EFAULT;
120 			}
121 		}
122 
123 		count -= filled;
124 		done += filled;
125 		off += filled;
126 		buf += filled;
127 	}
128 
129 	return done;
130 }
131 
vfio_pci_setup_barmap(struct vfio_pci_device * vdev,int bar)132 static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
133 {
134 	struct pci_dev *pdev = vdev->pdev;
135 	int ret;
136 	void __iomem *io;
137 
138 	if (vdev->barmap[bar])
139 		return 0;
140 
141 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
142 	if (ret)
143 		return ret;
144 
145 	io = pci_iomap(pdev, bar, 0);
146 	if (!io) {
147 		pci_release_selected_regions(pdev, 1 << bar);
148 		return -ENOMEM;
149 	}
150 
151 	vdev->barmap[bar] = io;
152 
153 	return 0;
154 }
155 
vfio_pci_bar_rw(struct vfio_pci_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)156 ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
157 			size_t count, loff_t *ppos, bool iswrite)
158 {
159 	struct pci_dev *pdev = vdev->pdev;
160 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
161 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
162 	size_t x_start = 0, x_end = 0;
163 	resource_size_t end;
164 	void __iomem *io;
165 	ssize_t done;
166 
167 	if (pci_resource_start(pdev, bar))
168 		end = pci_resource_len(pdev, bar);
169 	else if (bar == PCI_ROM_RESOURCE &&
170 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
171 		end = 0x20000;
172 	else
173 		return -EINVAL;
174 
175 	if (pos >= end)
176 		return -EINVAL;
177 
178 	count = min(count, (size_t)(end - pos));
179 
180 	if (bar == PCI_ROM_RESOURCE) {
181 		/*
182 		 * The ROM can fill less space than the BAR, so we start the
183 		 * excluded range at the end of the actual ROM.  This makes
184 		 * filling large ROM BARs much faster.
185 		 */
186 		io = pci_map_rom(pdev, &x_start);
187 		if (!io)
188 			return -ENOMEM;
189 		x_end = end;
190 	} else {
191 		int ret = vfio_pci_setup_barmap(vdev, bar);
192 		if (ret)
193 			return ret;
194 
195 		io = vdev->barmap[bar];
196 	}
197 
198 	if (bar == vdev->msix_bar) {
199 		x_start = vdev->msix_offset;
200 		x_end = vdev->msix_offset + vdev->msix_size;
201 	}
202 
203 	done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
204 
205 	if (done >= 0)
206 		*ppos += done;
207 
208 	if (bar == PCI_ROM_RESOURCE)
209 		pci_unmap_rom(pdev, io);
210 
211 	return done;
212 }
213 
vfio_pci_vga_rw(struct vfio_pci_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)214 ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
215 			       size_t count, loff_t *ppos, bool iswrite)
216 {
217 	int ret;
218 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
219 	void __iomem *iomem = NULL;
220 	unsigned int rsrc;
221 	bool is_ioport;
222 	ssize_t done;
223 
224 	if (!vdev->has_vga)
225 		return -EINVAL;
226 
227 	if (pos > 0xbfffful)
228 		return -EINVAL;
229 
230 	switch ((u32)pos) {
231 	case 0xa0000 ... 0xbffff:
232 		count = min(count, (size_t)(0xc0000 - pos));
233 		iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
234 		off = pos - 0xa0000;
235 		rsrc = VGA_RSRC_LEGACY_MEM;
236 		is_ioport = false;
237 		break;
238 	case 0x3b0 ... 0x3bb:
239 		count = min(count, (size_t)(0x3bc - pos));
240 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
241 		off = pos - 0x3b0;
242 		rsrc = VGA_RSRC_LEGACY_IO;
243 		is_ioport = true;
244 		break;
245 	case 0x3c0 ... 0x3df:
246 		count = min(count, (size_t)(0x3e0 - pos));
247 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
248 		off = pos - 0x3c0;
249 		rsrc = VGA_RSRC_LEGACY_IO;
250 		is_ioport = true;
251 		break;
252 	default:
253 		return -EINVAL;
254 	}
255 
256 	if (!iomem)
257 		return -ENOMEM;
258 
259 	ret = vga_get_interruptible(vdev->pdev, rsrc);
260 	if (ret) {
261 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
262 		return ret;
263 	}
264 
265 	done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
266 
267 	vga_put(vdev->pdev, rsrc);
268 
269 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
270 
271 	if (done >= 0)
272 		*ppos += done;
273 
274 	return done;
275 }
276 
vfio_pci_ioeventfd_handler(void * opaque,void * unused)277 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
278 {
279 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
280 
281 	switch (ioeventfd->count) {
282 	case 1:
283 		vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
284 		break;
285 	case 2:
286 		vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
287 		break;
288 	case 4:
289 		vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
290 		break;
291 #ifdef iowrite64
292 	case 8:
293 		vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
294 		break;
295 #endif
296 	}
297 
298 	return 0;
299 }
300 
vfio_pci_ioeventfd(struct vfio_pci_device * vdev,loff_t offset,uint64_t data,int count,int fd)301 long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
302 			uint64_t data, int count, int fd)
303 {
304 	struct pci_dev *pdev = vdev->pdev;
305 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
306 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
307 	struct vfio_pci_ioeventfd *ioeventfd;
308 
309 	/* Only support ioeventfds into BARs */
310 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
311 		return -EINVAL;
312 
313 	if (pos + count > pci_resource_len(pdev, bar))
314 		return -EINVAL;
315 
316 	/* Disallow ioeventfds working around MSI-X table writes */
317 	if (bar == vdev->msix_bar &&
318 	    !(pos + count <= vdev->msix_offset ||
319 	      pos >= vdev->msix_offset + vdev->msix_size))
320 		return -EINVAL;
321 
322 #ifndef iowrite64
323 	if (count == 8)
324 		return -EINVAL;
325 #endif
326 
327 	ret = vfio_pci_setup_barmap(vdev, bar);
328 	if (ret)
329 		return ret;
330 
331 	mutex_lock(&vdev->ioeventfds_lock);
332 
333 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
334 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
335 		    ioeventfd->data == data && ioeventfd->count == count) {
336 			if (fd == -1) {
337 				vfio_virqfd_disable(&ioeventfd->virqfd);
338 				list_del(&ioeventfd->next);
339 				vdev->ioeventfds_nr--;
340 				kfree(ioeventfd);
341 				ret = 0;
342 			} else
343 				ret = -EEXIST;
344 
345 			goto out_unlock;
346 		}
347 	}
348 
349 	if (fd < 0) {
350 		ret = -ENODEV;
351 		goto out_unlock;
352 	}
353 
354 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
355 		ret = -ENOSPC;
356 		goto out_unlock;
357 	}
358 
359 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
360 	if (!ioeventfd) {
361 		ret = -ENOMEM;
362 		goto out_unlock;
363 	}
364 
365 	ioeventfd->addr = vdev->barmap[bar] + pos;
366 	ioeventfd->data = data;
367 	ioeventfd->pos = pos;
368 	ioeventfd->bar = bar;
369 	ioeventfd->count = count;
370 
371 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
372 				 NULL, NULL, &ioeventfd->virqfd, fd);
373 	if (ret) {
374 		kfree(ioeventfd);
375 		goto out_unlock;
376 	}
377 
378 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
379 	vdev->ioeventfds_nr++;
380 
381 out_unlock:
382 	mutex_unlock(&vdev->ioeventfds_lock);
383 
384 	return ret;
385 }
386