1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>,
6  *          Ashok Raj <ashok.raj@intel.com>,
7  *          Shaohua Li <shaohua.li@intel.com>,
8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9  *          Fenghua Yu <fenghua.yu@intel.com>
10  *          Joerg Roedel <jroedel@suse.de>
11  */
12 
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15 
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
31 #include <linux/io.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
49 
50 #include "irq_remapping.h"
51 #include "intel-pasid.h"
52 
53 #define ROOT_SIZE		VTD_PAGE_SIZE
54 #define CONTEXT_SIZE		VTD_PAGE_SIZE
55 
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60 
61 #define IOAPIC_RANGE_START	(0xfee00000)
62 #define IOAPIC_RANGE_END	(0xfeefffff)
63 #define IOVA_START_ADDR		(0x1000)
64 
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
66 
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69 
70 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72 
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
76 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78 
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN		(1)
81 
82 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
83 
84 /* page table handling */
85 #define LEVEL_STRIDE		(9)
86 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
87 
88 /*
89  * This bitmap is used to advertise the page sizes our hardware support
90  * to the IOMMU core, which will then use this information to split
91  * physically contiguous memory regions it is mapping into page sizes
92  * that we support.
93  *
94  * Traditionally the IOMMU core just handed us the mappings directly,
95  * after making sure the size is an order of a 4KiB page and that the
96  * mapping has natural alignment.
97  *
98  * To retain this behavior, we currently advertise that we support
99  * all page sizes that are an order of 4KiB.
100  *
101  * If at some point we'd like to utilize the IOMMU core's new behavior,
102  * we could change this to advertise the real page sizes we support.
103  */
104 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
105 
agaw_to_level(int agaw)106 static inline int agaw_to_level(int agaw)
107 {
108 	return agaw + 2;
109 }
110 
agaw_to_width(int agaw)111 static inline int agaw_to_width(int agaw)
112 {
113 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
114 }
115 
width_to_agaw(int width)116 static inline int width_to_agaw(int width)
117 {
118 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
119 }
120 
level_to_offset_bits(int level)121 static inline unsigned int level_to_offset_bits(int level)
122 {
123 	return (level - 1) * LEVEL_STRIDE;
124 }
125 
pfn_level_offset(unsigned long pfn,int level)126 static inline int pfn_level_offset(unsigned long pfn, int level)
127 {
128 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129 }
130 
level_mask(int level)131 static inline unsigned long level_mask(int level)
132 {
133 	return -1UL << level_to_offset_bits(level);
134 }
135 
level_size(int level)136 static inline unsigned long level_size(int level)
137 {
138 	return 1UL << level_to_offset_bits(level);
139 }
140 
align_to_level(unsigned long pfn,int level)141 static inline unsigned long align_to_level(unsigned long pfn, int level)
142 {
143 	return (pfn + level_size(level) - 1) & level_mask(level);
144 }
145 
lvl_to_nr_pages(unsigned int lvl)146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147 {
148 	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
149 }
150 
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154 {
155 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156 }
157 
mm_to_dma_pfn(unsigned long mm_pfn)158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159 {
160 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161 }
page_to_dma_pfn(struct page * pg)162 static inline unsigned long page_to_dma_pfn(struct page *pg)
163 {
164 	return mm_to_dma_pfn(page_to_pfn(pg));
165 }
virt_to_dma_pfn(void * p)166 static inline unsigned long virt_to_dma_pfn(void *p)
167 {
168 	return page_to_dma_pfn(virt_to_page(p));
169 }
170 
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu **g_iommus;
173 
174 static void __init check_tylersburg_isoch(void);
175 static int rwbf_quirk;
176 
177 /*
178  * set to 1 to panic kernel if can't successfully enable VT-d
179  * (used when kernel is launched w/ TXT)
180  */
181 static int force_on = 0;
182 int intel_iommu_tboot_noforce;
183 static int no_platform_optin;
184 
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
186 
187 /*
188  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189  * if marked present.
190  */
root_entry_lctp(struct root_entry * re)191 static phys_addr_t root_entry_lctp(struct root_entry *re)
192 {
193 	if (!(re->lo & 1))
194 		return 0;
195 
196 	return re->lo & VTD_PAGE_MASK;
197 }
198 
199 /*
200  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201  * if marked present.
202  */
root_entry_uctp(struct root_entry * re)203 static phys_addr_t root_entry_uctp(struct root_entry *re)
204 {
205 	if (!(re->hi & 1))
206 		return 0;
207 
208 	return re->hi & VTD_PAGE_MASK;
209 }
210 
context_clear_pasid_enable(struct context_entry * context)211 static inline void context_clear_pasid_enable(struct context_entry *context)
212 {
213 	context->lo &= ~(1ULL << 11);
214 }
215 
context_pasid_enabled(struct context_entry * context)216 static inline bool context_pasid_enabled(struct context_entry *context)
217 {
218 	return !!(context->lo & (1ULL << 11));
219 }
220 
context_set_copied(struct context_entry * context)221 static inline void context_set_copied(struct context_entry *context)
222 {
223 	context->hi |= (1ull << 3);
224 }
225 
context_copied(struct context_entry * context)226 static inline bool context_copied(struct context_entry *context)
227 {
228 	return !!(context->hi & (1ULL << 3));
229 }
230 
__context_present(struct context_entry * context)231 static inline bool __context_present(struct context_entry *context)
232 {
233 	return (context->lo & 1);
234 }
235 
context_present(struct context_entry * context)236 bool context_present(struct context_entry *context)
237 {
238 	return context_pasid_enabled(context) ?
239 	     __context_present(context) :
240 	     __context_present(context) && !context_copied(context);
241 }
242 
context_set_present(struct context_entry * context)243 static inline void context_set_present(struct context_entry *context)
244 {
245 	context->lo |= 1;
246 }
247 
context_set_fault_enable(struct context_entry * context)248 static inline void context_set_fault_enable(struct context_entry *context)
249 {
250 	context->lo &= (((u64)-1) << 2) | 1;
251 }
252 
context_set_translation_type(struct context_entry * context,unsigned long value)253 static inline void context_set_translation_type(struct context_entry *context,
254 						unsigned long value)
255 {
256 	context->lo &= (((u64)-1) << 4) | 3;
257 	context->lo |= (value & 3) << 2;
258 }
259 
context_set_address_root(struct context_entry * context,unsigned long value)260 static inline void context_set_address_root(struct context_entry *context,
261 					    unsigned long value)
262 {
263 	context->lo &= ~VTD_PAGE_MASK;
264 	context->lo |= value & VTD_PAGE_MASK;
265 }
266 
context_set_address_width(struct context_entry * context,unsigned long value)267 static inline void context_set_address_width(struct context_entry *context,
268 					     unsigned long value)
269 {
270 	context->hi |= value & 7;
271 }
272 
context_set_domain_id(struct context_entry * context,unsigned long value)273 static inline void context_set_domain_id(struct context_entry *context,
274 					 unsigned long value)
275 {
276 	context->hi |= (value & ((1 << 16) - 1)) << 8;
277 }
278 
context_domain_id(struct context_entry * c)279 static inline int context_domain_id(struct context_entry *c)
280 {
281 	return((c->hi >> 8) & 0xffff);
282 }
283 
context_clear_entry(struct context_entry * context)284 static inline void context_clear_entry(struct context_entry *context)
285 {
286 	context->lo = 0;
287 	context->hi = 0;
288 }
289 
290 /*
291  * This domain is a statically identity mapping domain.
292  *	1. This domain creats a static 1:1 mapping to all usable memory.
293  * 	2. It maps to each iommu if successful.
294  *	3. Each iommu mapps to this domain if successful.
295  */
296 static struct dmar_domain *si_domain;
297 static int hw_pass_through = 1;
298 
299 /* si_domain contains mulitple devices */
300 #define DOMAIN_FLAG_STATIC_IDENTITY		BIT(0)
301 
302 /*
303  * This is a DMA domain allocated through the iommu domain allocation
304  * interface. But one or more devices belonging to this domain have
305  * been chosen to use a private domain. We should avoid to use the
306  * map/unmap/iova_to_phys APIs on it.
307  */
308 #define DOMAIN_FLAG_LOSE_CHILDREN		BIT(1)
309 
310 #define for_each_domain_iommu(idx, domain)			\
311 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
312 		if (domain->iommu_refcnt[idx])
313 
314 struct dmar_rmrr_unit {
315 	struct list_head list;		/* list of rmrr units	*/
316 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
317 	u64	base_address;		/* reserved base address*/
318 	u64	end_address;		/* reserved end address */
319 	struct dmar_dev_scope *devices;	/* target devices */
320 	int	devices_cnt;		/* target device count */
321 };
322 
323 struct dmar_atsr_unit {
324 	struct list_head list;		/* list of ATSR units */
325 	struct acpi_dmar_header *hdr;	/* ACPI header */
326 	struct dmar_dev_scope *devices;	/* target devices */
327 	int devices_cnt;		/* target device count */
328 	u8 include_all:1;		/* include all ports */
329 };
330 
331 static LIST_HEAD(dmar_atsr_units);
332 static LIST_HEAD(dmar_rmrr_units);
333 
334 #define for_each_rmrr_units(rmrr) \
335 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
336 
337 /* bitmap for indexing intel_iommus */
338 static int g_num_of_iommus;
339 
340 static void domain_exit(struct dmar_domain *domain);
341 static void domain_remove_dev_info(struct dmar_domain *domain);
342 static void dmar_remove_one_dev_info(struct device *dev);
343 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
344 static void domain_context_clear(struct intel_iommu *iommu,
345 				 struct device *dev);
346 static int domain_detach_iommu(struct dmar_domain *domain,
347 			       struct intel_iommu *iommu);
348 static bool device_is_rmrr_locked(struct device *dev);
349 static int intel_iommu_attach_device(struct iommu_domain *domain,
350 				     struct device *dev);
351 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
352 					    dma_addr_t iova);
353 
354 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
355 int dmar_disabled = 0;
356 #else
357 int dmar_disabled = 1;
358 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
359 
360 int intel_iommu_sm;
361 int intel_iommu_enabled = 0;
362 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
363 
364 static int dmar_map_gfx = 1;
365 static int dmar_forcedac;
366 static int intel_iommu_strict;
367 static int intel_iommu_superpage = 1;
368 static int iommu_identity_mapping;
369 static int intel_no_bounce;
370 
371 #define IDENTMAP_ALL		1
372 #define IDENTMAP_GFX		2
373 #define IDENTMAP_AZALIA		4
374 
375 int intel_iommu_gfx_mapped;
376 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
377 
378 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
379 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
380 static DEFINE_SPINLOCK(device_domain_lock);
381 static LIST_HEAD(device_domain_list);
382 
383 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&	\
384 				to_pci_dev(d)->untrusted)
385 
386 /*
387  * Iterate over elements in device_domain_list and call the specified
388  * callback @fn against each element.
389  */
for_each_device_domain(int (* fn)(struct device_domain_info * info,void * data),void * data)390 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
391 				     void *data), void *data)
392 {
393 	int ret = 0;
394 	unsigned long flags;
395 	struct device_domain_info *info;
396 
397 	spin_lock_irqsave(&device_domain_lock, flags);
398 	list_for_each_entry(info, &device_domain_list, global) {
399 		ret = fn(info, data);
400 		if (ret) {
401 			spin_unlock_irqrestore(&device_domain_lock, flags);
402 			return ret;
403 		}
404 	}
405 	spin_unlock_irqrestore(&device_domain_lock, flags);
406 
407 	return 0;
408 }
409 
410 const struct iommu_ops intel_iommu_ops;
411 
translation_pre_enabled(struct intel_iommu * iommu)412 static bool translation_pre_enabled(struct intel_iommu *iommu)
413 {
414 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
415 }
416 
clear_translation_pre_enabled(struct intel_iommu * iommu)417 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
418 {
419 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
420 }
421 
init_translation_status(struct intel_iommu * iommu)422 static void init_translation_status(struct intel_iommu *iommu)
423 {
424 	u32 gsts;
425 
426 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
427 	if (gsts & DMA_GSTS_TES)
428 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
429 }
430 
431 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
to_dmar_domain(struct iommu_domain * dom)432 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
433 {
434 	return container_of(dom, struct dmar_domain, domain);
435 }
436 
intel_iommu_setup(char * str)437 static int __init intel_iommu_setup(char *str)
438 {
439 	if (!str)
440 		return -EINVAL;
441 	while (*str) {
442 		if (!strncmp(str, "on", 2)) {
443 			dmar_disabled = 0;
444 			pr_info("IOMMU enabled\n");
445 		} else if (!strncmp(str, "off", 3)) {
446 			dmar_disabled = 1;
447 			no_platform_optin = 1;
448 			pr_info("IOMMU disabled\n");
449 		} else if (!strncmp(str, "igfx_off", 8)) {
450 			dmar_map_gfx = 0;
451 			pr_info("Disable GFX device mapping\n");
452 		} else if (!strncmp(str, "forcedac", 8)) {
453 			pr_info("Forcing DAC for PCI devices\n");
454 			dmar_forcedac = 1;
455 		} else if (!strncmp(str, "strict", 6)) {
456 			pr_info("Disable batched IOTLB flush\n");
457 			intel_iommu_strict = 1;
458 		} else if (!strncmp(str, "sp_off", 6)) {
459 			pr_info("Disable supported super page\n");
460 			intel_iommu_superpage = 0;
461 		} else if (!strncmp(str, "sm_on", 5)) {
462 			pr_info("Intel-IOMMU: scalable mode supported\n");
463 			intel_iommu_sm = 1;
464 		} else if (!strncmp(str, "tboot_noforce", 13)) {
465 			printk(KERN_INFO
466 				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 			intel_iommu_tboot_noforce = 1;
468 		} else if (!strncmp(str, "nobounce", 8)) {
469 			pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
470 			intel_no_bounce = 1;
471 		}
472 
473 		str += strcspn(str, ",");
474 		while (*str == ',')
475 			str++;
476 	}
477 	return 0;
478 }
479 __setup("intel_iommu=", intel_iommu_setup);
480 
481 static struct kmem_cache *iommu_domain_cache;
482 static struct kmem_cache *iommu_devinfo_cache;
483 
get_iommu_domain(struct intel_iommu * iommu,u16 did)484 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
485 {
486 	struct dmar_domain **domains;
487 	int idx = did >> 8;
488 
489 	domains = iommu->domains[idx];
490 	if (!domains)
491 		return NULL;
492 
493 	return domains[did & 0xff];
494 }
495 
set_iommu_domain(struct intel_iommu * iommu,u16 did,struct dmar_domain * domain)496 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
497 			     struct dmar_domain *domain)
498 {
499 	struct dmar_domain **domains;
500 	int idx = did >> 8;
501 
502 	if (!iommu->domains[idx]) {
503 		size_t size = 256 * sizeof(struct dmar_domain *);
504 		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
505 	}
506 
507 	domains = iommu->domains[idx];
508 	if (WARN_ON(!domains))
509 		return;
510 	else
511 		domains[did & 0xff] = domain;
512 }
513 
alloc_pgtable_page(int node)514 void *alloc_pgtable_page(int node)
515 {
516 	struct page *page;
517 	void *vaddr = NULL;
518 
519 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
520 	if (page)
521 		vaddr = page_address(page);
522 	return vaddr;
523 }
524 
free_pgtable_page(void * vaddr)525 void free_pgtable_page(void *vaddr)
526 {
527 	free_page((unsigned long)vaddr);
528 }
529 
alloc_domain_mem(void)530 static inline void *alloc_domain_mem(void)
531 {
532 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
533 }
534 
free_domain_mem(void * vaddr)535 static void free_domain_mem(void *vaddr)
536 {
537 	kmem_cache_free(iommu_domain_cache, vaddr);
538 }
539 
alloc_devinfo_mem(void)540 static inline void * alloc_devinfo_mem(void)
541 {
542 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
543 }
544 
free_devinfo_mem(void * vaddr)545 static inline void free_devinfo_mem(void *vaddr)
546 {
547 	kmem_cache_free(iommu_devinfo_cache, vaddr);
548 }
549 
domain_type_is_si(struct dmar_domain * domain)550 static inline int domain_type_is_si(struct dmar_domain *domain)
551 {
552 	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
553 }
554 
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)555 static inline int domain_pfn_supported(struct dmar_domain *domain,
556 				       unsigned long pfn)
557 {
558 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
559 
560 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
561 }
562 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)563 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
564 {
565 	unsigned long sagaw;
566 	int agaw = -1;
567 
568 	sagaw = cap_sagaw(iommu->cap);
569 	for (agaw = width_to_agaw(max_gaw);
570 	     agaw >= 0; agaw--) {
571 		if (test_bit(agaw, &sagaw))
572 			break;
573 	}
574 
575 	return agaw;
576 }
577 
578 /*
579  * Calculate max SAGAW for each iommu.
580  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)581 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
582 {
583 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
584 }
585 
586 /*
587  * calculate agaw for each iommu.
588  * "SAGAW" may be different across iommus, use a default agaw, and
589  * get a supported less agaw for iommus that don't support the default agaw.
590  */
iommu_calculate_agaw(struct intel_iommu * iommu)591 int iommu_calculate_agaw(struct intel_iommu *iommu)
592 {
593 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
594 }
595 
596 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)597 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
598 {
599 	int iommu_id;
600 
601 	/* si_domain and vm domain should not get here. */
602 	if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
603 		return NULL;
604 
605 	for_each_domain_iommu(iommu_id, domain)
606 		break;
607 
608 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
609 		return NULL;
610 
611 	return g_iommus[iommu_id];
612 }
613 
domain_update_iommu_coherency(struct dmar_domain * domain)614 static void domain_update_iommu_coherency(struct dmar_domain *domain)
615 {
616 	struct dmar_drhd_unit *drhd;
617 	struct intel_iommu *iommu;
618 	bool found = false;
619 	int i;
620 
621 	domain->iommu_coherency = 1;
622 
623 	for_each_domain_iommu(i, domain) {
624 		found = true;
625 		if (!ecap_coherent(g_iommus[i]->ecap)) {
626 			domain->iommu_coherency = 0;
627 			break;
628 		}
629 	}
630 	if (found)
631 		return;
632 
633 	/* No hardware attached; use lowest common denominator */
634 	rcu_read_lock();
635 	for_each_active_iommu(iommu, drhd) {
636 		if (!ecap_coherent(iommu->ecap)) {
637 			domain->iommu_coherency = 0;
638 			break;
639 		}
640 	}
641 	rcu_read_unlock();
642 }
643 
domain_update_iommu_snooping(struct intel_iommu * skip)644 static int domain_update_iommu_snooping(struct intel_iommu *skip)
645 {
646 	struct dmar_drhd_unit *drhd;
647 	struct intel_iommu *iommu;
648 	int ret = 1;
649 
650 	rcu_read_lock();
651 	for_each_active_iommu(iommu, drhd) {
652 		if (iommu != skip) {
653 			if (!ecap_sc_support(iommu->ecap)) {
654 				ret = 0;
655 				break;
656 			}
657 		}
658 	}
659 	rcu_read_unlock();
660 
661 	return ret;
662 }
663 
domain_update_iommu_superpage(struct intel_iommu * skip)664 static int domain_update_iommu_superpage(struct intel_iommu *skip)
665 {
666 	struct dmar_drhd_unit *drhd;
667 	struct intel_iommu *iommu;
668 	int mask = 0xf;
669 
670 	if (!intel_iommu_superpage) {
671 		return 0;
672 	}
673 
674 	/* set iommu_superpage to the smallest common denominator */
675 	rcu_read_lock();
676 	for_each_active_iommu(iommu, drhd) {
677 		if (iommu != skip) {
678 			mask &= cap_super_page_val(iommu->cap);
679 			if (!mask)
680 				break;
681 		}
682 	}
683 	rcu_read_unlock();
684 
685 	return fls(mask);
686 }
687 
688 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)689 static void domain_update_iommu_cap(struct dmar_domain *domain)
690 {
691 	domain_update_iommu_coherency(domain);
692 	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
693 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
694 }
695 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)696 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
697 					 u8 devfn, int alloc)
698 {
699 	struct root_entry *root = &iommu->root_entry[bus];
700 	struct context_entry *context;
701 	u64 *entry;
702 
703 	entry = &root->lo;
704 	if (sm_supported(iommu)) {
705 		if (devfn >= 0x80) {
706 			devfn -= 0x80;
707 			entry = &root->hi;
708 		}
709 		devfn *= 2;
710 	}
711 	if (*entry & 1)
712 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
713 	else {
714 		unsigned long phy_addr;
715 		if (!alloc)
716 			return NULL;
717 
718 		context = alloc_pgtable_page(iommu->node);
719 		if (!context)
720 			return NULL;
721 
722 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
723 		phy_addr = virt_to_phys((void *)context);
724 		*entry = phy_addr | 1;
725 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
726 	}
727 	return &context[devfn];
728 }
729 
iommu_dummy(struct device * dev)730 static int iommu_dummy(struct device *dev)
731 {
732 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
733 }
734 
735 /**
736  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
737  *				 sub-hierarchy of a candidate PCI-PCI bridge
738  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
739  * @bridge: the candidate PCI-PCI bridge
740  *
741  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
742  */
743 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)744 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
745 {
746 	struct pci_dev *pdev, *pbridge;
747 
748 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
749 		return false;
750 
751 	pdev = to_pci_dev(dev);
752 	pbridge = to_pci_dev(bridge);
753 
754 	if (pbridge->subordinate &&
755 	    pbridge->subordinate->number <= pdev->bus->number &&
756 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
757 		return true;
758 
759 	return false;
760 }
761 
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)762 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
763 {
764 	struct dmar_drhd_unit *drhd = NULL;
765 	struct intel_iommu *iommu;
766 	struct device *tmp;
767 	struct pci_dev *pdev = NULL;
768 	u16 segment = 0;
769 	int i;
770 
771 	if (iommu_dummy(dev))
772 		return NULL;
773 
774 	if (dev_is_pci(dev)) {
775 		struct pci_dev *pf_pdev;
776 
777 		pdev = to_pci_dev(dev);
778 
779 #ifdef CONFIG_X86
780 		/* VMD child devices currently cannot be handled individually */
781 		if (is_vmd(pdev->bus))
782 			return NULL;
783 #endif
784 
785 		/* VFs aren't listed in scope tables; we need to look up
786 		 * the PF instead to find the IOMMU. */
787 		pf_pdev = pci_physfn(pdev);
788 		dev = &pf_pdev->dev;
789 		segment = pci_domain_nr(pdev->bus);
790 	} else if (has_acpi_companion(dev))
791 		dev = &ACPI_COMPANION(dev)->dev;
792 
793 	rcu_read_lock();
794 	for_each_active_iommu(iommu, drhd) {
795 		if (pdev && segment != drhd->segment)
796 			continue;
797 
798 		for_each_active_dev_scope(drhd->devices,
799 					  drhd->devices_cnt, i, tmp) {
800 			if (tmp == dev) {
801 				/* For a VF use its original BDF# not that of the PF
802 				 * which we used for the IOMMU lookup. Strictly speaking
803 				 * we could do this for all PCI devices; we only need to
804 				 * get the BDF# from the scope table for ACPI matches. */
805 				if (pdev && pdev->is_virtfn)
806 					goto got_pdev;
807 
808 				*bus = drhd->devices[i].bus;
809 				*devfn = drhd->devices[i].devfn;
810 				goto out;
811 			}
812 
813 			if (is_downstream_to_pci_bridge(dev, tmp))
814 				goto got_pdev;
815 		}
816 
817 		if (pdev && drhd->include_all) {
818 		got_pdev:
819 			*bus = pdev->bus->number;
820 			*devfn = pdev->devfn;
821 			goto out;
822 		}
823 	}
824 	iommu = NULL;
825  out:
826 	rcu_read_unlock();
827 
828 	return iommu;
829 }
830 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)831 static void domain_flush_cache(struct dmar_domain *domain,
832 			       void *addr, int size)
833 {
834 	if (!domain->iommu_coherency)
835 		clflush_cache_range(addr, size);
836 }
837 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)838 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
839 {
840 	struct context_entry *context;
841 	int ret = 0;
842 	unsigned long flags;
843 
844 	spin_lock_irqsave(&iommu->lock, flags);
845 	context = iommu_context_addr(iommu, bus, devfn, 0);
846 	if (context)
847 		ret = context_present(context);
848 	spin_unlock_irqrestore(&iommu->lock, flags);
849 	return ret;
850 }
851 
free_context_table(struct intel_iommu * iommu)852 static void free_context_table(struct intel_iommu *iommu)
853 {
854 	int i;
855 	unsigned long flags;
856 	struct context_entry *context;
857 
858 	spin_lock_irqsave(&iommu->lock, flags);
859 	if (!iommu->root_entry) {
860 		goto out;
861 	}
862 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
863 		context = iommu_context_addr(iommu, i, 0, 0);
864 		if (context)
865 			free_pgtable_page(context);
866 
867 		if (!sm_supported(iommu))
868 			continue;
869 
870 		context = iommu_context_addr(iommu, i, 0x80, 0);
871 		if (context)
872 			free_pgtable_page(context);
873 
874 	}
875 	free_pgtable_page(iommu->root_entry);
876 	iommu->root_entry = NULL;
877 out:
878 	spin_unlock_irqrestore(&iommu->lock, flags);
879 }
880 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level)881 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
882 				      unsigned long pfn, int *target_level)
883 {
884 	struct dma_pte *parent, *pte;
885 	int level = agaw_to_level(domain->agaw);
886 	int offset;
887 
888 	BUG_ON(!domain->pgd);
889 
890 	if (!domain_pfn_supported(domain, pfn))
891 		/* Address beyond IOMMU's addressing capabilities. */
892 		return NULL;
893 
894 	parent = domain->pgd;
895 
896 	while (1) {
897 		void *tmp_page;
898 
899 		offset = pfn_level_offset(pfn, level);
900 		pte = &parent[offset];
901 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
902 			break;
903 		if (level == *target_level)
904 			break;
905 
906 		if (!dma_pte_present(pte)) {
907 			uint64_t pteval;
908 
909 			tmp_page = alloc_pgtable_page(domain->nid);
910 
911 			if (!tmp_page)
912 				return NULL;
913 
914 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
915 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
916 			if (cmpxchg64(&pte->val, 0ULL, pteval))
917 				/* Someone else set it while we were thinking; use theirs. */
918 				free_pgtable_page(tmp_page);
919 			else
920 				domain_flush_cache(domain, pte, sizeof(*pte));
921 		}
922 		if (level == 1)
923 			break;
924 
925 		parent = phys_to_virt(dma_pte_addr(pte));
926 		level--;
927 	}
928 
929 	if (!*target_level)
930 		*target_level = level;
931 
932 	return pte;
933 }
934 
935 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)936 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
937 					 unsigned long pfn,
938 					 int level, int *large_page)
939 {
940 	struct dma_pte *parent, *pte;
941 	int total = agaw_to_level(domain->agaw);
942 	int offset;
943 
944 	parent = domain->pgd;
945 	while (level <= total) {
946 		offset = pfn_level_offset(pfn, total);
947 		pte = &parent[offset];
948 		if (level == total)
949 			return pte;
950 
951 		if (!dma_pte_present(pte)) {
952 			*large_page = total;
953 			break;
954 		}
955 
956 		if (dma_pte_superpage(pte)) {
957 			*large_page = total;
958 			return pte;
959 		}
960 
961 		parent = phys_to_virt(dma_pte_addr(pte));
962 		total--;
963 	}
964 	return NULL;
965 }
966 
967 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)968 static void dma_pte_clear_range(struct dmar_domain *domain,
969 				unsigned long start_pfn,
970 				unsigned long last_pfn)
971 {
972 	unsigned int large_page;
973 	struct dma_pte *first_pte, *pte;
974 
975 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
976 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
977 	BUG_ON(start_pfn > last_pfn);
978 
979 	/* we don't need lock here; nobody else touches the iova range */
980 	do {
981 		large_page = 1;
982 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
983 		if (!pte) {
984 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
985 			continue;
986 		}
987 		do {
988 			dma_clear_pte(pte);
989 			start_pfn += lvl_to_nr_pages(large_page);
990 			pte++;
991 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
992 
993 		domain_flush_cache(domain, first_pte,
994 				   (void *)pte - (void *)first_pte);
995 
996 	} while (start_pfn && start_pfn <= last_pfn);
997 }
998 
dma_pte_free_level(struct dmar_domain * domain,int level,int retain_level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)999 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1000 			       int retain_level, struct dma_pte *pte,
1001 			       unsigned long pfn, unsigned long start_pfn,
1002 			       unsigned long last_pfn)
1003 {
1004 	pfn = max(start_pfn, pfn);
1005 	pte = &pte[pfn_level_offset(pfn, level)];
1006 
1007 	do {
1008 		unsigned long level_pfn;
1009 		struct dma_pte *level_pte;
1010 
1011 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1012 			goto next;
1013 
1014 		level_pfn = pfn & level_mask(level);
1015 		level_pte = phys_to_virt(dma_pte_addr(pte));
1016 
1017 		if (level > 2) {
1018 			dma_pte_free_level(domain, level - 1, retain_level,
1019 					   level_pte, level_pfn, start_pfn,
1020 					   last_pfn);
1021 		}
1022 
1023 		/*
1024 		 * Free the page table if we're below the level we want to
1025 		 * retain and the range covers the entire table.
1026 		 */
1027 		if (level < retain_level && !(start_pfn > level_pfn ||
1028 		      last_pfn < level_pfn + level_size(level) - 1)) {
1029 			dma_clear_pte(pte);
1030 			domain_flush_cache(domain, pte, sizeof(*pte));
1031 			free_pgtable_page(level_pte);
1032 		}
1033 next:
1034 		pfn += level_size(level);
1035 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1036 }
1037 
1038 /*
1039  * clear last level (leaf) ptes and free page table pages below the
1040  * level we wish to keep intact.
1041  */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,int retain_level)1042 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1043 				   unsigned long start_pfn,
1044 				   unsigned long last_pfn,
1045 				   int retain_level)
1046 {
1047 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1048 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1049 	BUG_ON(start_pfn > last_pfn);
1050 
1051 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1052 
1053 	/* We don't need lock here; nobody else touches the iova range */
1054 	dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1055 			   domain->pgd, 0, start_pfn, last_pfn);
1056 
1057 	/* free pgd */
1058 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1059 		free_pgtable_page(domain->pgd);
1060 		domain->pgd = NULL;
1061 	}
1062 }
1063 
1064 /* When a page at a given level is being unlinked from its parent, we don't
1065    need to *modify* it at all. All we need to do is make a list of all the
1066    pages which can be freed just as soon as we've flushed the IOTLB and we
1067    know the hardware page-walk will no longer touch them.
1068    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1069    be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct page * freelist)1070 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1071 					    int level, struct dma_pte *pte,
1072 					    struct page *freelist)
1073 {
1074 	struct page *pg;
1075 
1076 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1077 	pg->freelist = freelist;
1078 	freelist = pg;
1079 
1080 	if (level == 1)
1081 		return freelist;
1082 
1083 	pte = page_address(pg);
1084 	do {
1085 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1086 			freelist = dma_pte_list_pagetables(domain, level - 1,
1087 							   pte, freelist);
1088 		pte++;
1089 	} while (!first_pte_in_page(pte));
1090 
1091 	return freelist;
1092 }
1093 
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct page * freelist)1094 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1095 					struct dma_pte *pte, unsigned long pfn,
1096 					unsigned long start_pfn,
1097 					unsigned long last_pfn,
1098 					struct page *freelist)
1099 {
1100 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1101 
1102 	pfn = max(start_pfn, pfn);
1103 	pte = &pte[pfn_level_offset(pfn, level)];
1104 
1105 	do {
1106 		unsigned long level_pfn;
1107 
1108 		if (!dma_pte_present(pte))
1109 			goto next;
1110 
1111 		level_pfn = pfn & level_mask(level);
1112 
1113 		/* If range covers entire pagetable, free it */
1114 		if (start_pfn <= level_pfn &&
1115 		    last_pfn >= level_pfn + level_size(level) - 1) {
1116 			/* These suborbinate page tables are going away entirely. Don't
1117 			   bother to clear them; we're just going to *free* them. */
1118 			if (level > 1 && !dma_pte_superpage(pte))
1119 				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1120 
1121 			dma_clear_pte(pte);
1122 			if (!first_pte)
1123 				first_pte = pte;
1124 			last_pte = pte;
1125 		} else if (level > 1) {
1126 			/* Recurse down into a level that isn't *entirely* obsolete */
1127 			freelist = dma_pte_clear_level(domain, level - 1,
1128 						       phys_to_virt(dma_pte_addr(pte)),
1129 						       level_pfn, start_pfn, last_pfn,
1130 						       freelist);
1131 		}
1132 next:
1133 		pfn += level_size(level);
1134 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1135 
1136 	if (first_pte)
1137 		domain_flush_cache(domain, first_pte,
1138 				   (void *)++last_pte - (void *)first_pte);
1139 
1140 	return freelist;
1141 }
1142 
1143 /* We can't just free the pages because the IOMMU may still be walking
1144    the page tables, and may have cached the intermediate levels. The
1145    pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1146 static struct page *domain_unmap(struct dmar_domain *domain,
1147 				 unsigned long start_pfn,
1148 				 unsigned long last_pfn)
1149 {
1150 	struct page *freelist;
1151 
1152 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1153 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1154 	BUG_ON(start_pfn > last_pfn);
1155 
1156 	/* we don't need lock here; nobody else touches the iova range */
1157 	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1158 				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1159 
1160 	/* free pgd */
1161 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162 		struct page *pgd_page = virt_to_page(domain->pgd);
1163 		pgd_page->freelist = freelist;
1164 		freelist = pgd_page;
1165 
1166 		domain->pgd = NULL;
1167 	}
1168 
1169 	return freelist;
1170 }
1171 
dma_free_pagelist(struct page * freelist)1172 static void dma_free_pagelist(struct page *freelist)
1173 {
1174 	struct page *pg;
1175 
1176 	while ((pg = freelist)) {
1177 		freelist = pg->freelist;
1178 		free_pgtable_page(page_address(pg));
1179 	}
1180 }
1181 
iova_entry_free(unsigned long data)1182 static void iova_entry_free(unsigned long data)
1183 {
1184 	struct page *freelist = (struct page *)data;
1185 
1186 	dma_free_pagelist(freelist);
1187 }
1188 
1189 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1190 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1191 {
1192 	struct root_entry *root;
1193 	unsigned long flags;
1194 
1195 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1196 	if (!root) {
1197 		pr_err("Allocating root entry for %s failed\n",
1198 			iommu->name);
1199 		return -ENOMEM;
1200 	}
1201 
1202 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1203 
1204 	spin_lock_irqsave(&iommu->lock, flags);
1205 	iommu->root_entry = root;
1206 	spin_unlock_irqrestore(&iommu->lock, flags);
1207 
1208 	return 0;
1209 }
1210 
iommu_set_root_entry(struct intel_iommu * iommu)1211 static void iommu_set_root_entry(struct intel_iommu *iommu)
1212 {
1213 	u64 addr;
1214 	u32 sts;
1215 	unsigned long flag;
1216 
1217 	addr = virt_to_phys(iommu->root_entry);
1218 	if (sm_supported(iommu))
1219 		addr |= DMA_RTADDR_SMT;
1220 
1221 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1222 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1223 
1224 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1225 
1226 	/* Make sure hardware complete it */
1227 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1228 		      readl, (sts & DMA_GSTS_RTPS), sts);
1229 
1230 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1231 }
1232 
iommu_flush_write_buffer(struct intel_iommu * iommu)1233 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1234 {
1235 	u32 val;
1236 	unsigned long flag;
1237 
1238 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1239 		return;
1240 
1241 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1242 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1243 
1244 	/* Make sure hardware complete it */
1245 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1246 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1247 
1248 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1249 }
1250 
1251 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1252 static void __iommu_flush_context(struct intel_iommu *iommu,
1253 				  u16 did, u16 source_id, u8 function_mask,
1254 				  u64 type)
1255 {
1256 	u64 val = 0;
1257 	unsigned long flag;
1258 
1259 	switch (type) {
1260 	case DMA_CCMD_GLOBAL_INVL:
1261 		val = DMA_CCMD_GLOBAL_INVL;
1262 		break;
1263 	case DMA_CCMD_DOMAIN_INVL:
1264 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1265 		break;
1266 	case DMA_CCMD_DEVICE_INVL:
1267 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1268 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1269 		break;
1270 	default:
1271 		BUG();
1272 	}
1273 	val |= DMA_CCMD_ICC;
1274 
1275 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1276 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1277 
1278 	/* Make sure hardware complete it */
1279 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1280 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1281 
1282 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1283 }
1284 
1285 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1286 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1287 				u64 addr, unsigned int size_order, u64 type)
1288 {
1289 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1290 	u64 val = 0, val_iva = 0;
1291 	unsigned long flag;
1292 
1293 	switch (type) {
1294 	case DMA_TLB_GLOBAL_FLUSH:
1295 		/* global flush doesn't need set IVA_REG */
1296 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1297 		break;
1298 	case DMA_TLB_DSI_FLUSH:
1299 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1300 		break;
1301 	case DMA_TLB_PSI_FLUSH:
1302 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1303 		/* IH bit is passed in as part of address */
1304 		val_iva = size_order | addr;
1305 		break;
1306 	default:
1307 		BUG();
1308 	}
1309 	/* Note: set drain read/write */
1310 #if 0
1311 	/*
1312 	 * This is probably to be super secure.. Looks like we can
1313 	 * ignore it without any impact.
1314 	 */
1315 	if (cap_read_drain(iommu->cap))
1316 		val |= DMA_TLB_READ_DRAIN;
1317 #endif
1318 	if (cap_write_drain(iommu->cap))
1319 		val |= DMA_TLB_WRITE_DRAIN;
1320 
1321 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1322 	/* Note: Only uses first TLB reg currently */
1323 	if (val_iva)
1324 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1325 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1326 
1327 	/* Make sure hardware complete it */
1328 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1329 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1330 
1331 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1332 
1333 	/* check IOTLB invalidation granularity */
1334 	if (DMA_TLB_IAIG(val) == 0)
1335 		pr_err("Flush IOTLB failed\n");
1336 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1337 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1338 			(unsigned long long)DMA_TLB_IIRG(type),
1339 			(unsigned long long)DMA_TLB_IAIG(val));
1340 }
1341 
1342 static struct device_domain_info *
iommu_support_dev_iotlb(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1343 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1344 			 u8 bus, u8 devfn)
1345 {
1346 	struct device_domain_info *info;
1347 
1348 	assert_spin_locked(&device_domain_lock);
1349 
1350 	if (!iommu->qi)
1351 		return NULL;
1352 
1353 	list_for_each_entry(info, &domain->devices, link)
1354 		if (info->iommu == iommu && info->bus == bus &&
1355 		    info->devfn == devfn) {
1356 			if (info->ats_supported && info->dev)
1357 				return info;
1358 			break;
1359 		}
1360 
1361 	return NULL;
1362 }
1363 
domain_update_iotlb(struct dmar_domain * domain)1364 static void domain_update_iotlb(struct dmar_domain *domain)
1365 {
1366 	struct device_domain_info *info;
1367 	bool has_iotlb_device = false;
1368 
1369 	assert_spin_locked(&device_domain_lock);
1370 
1371 	list_for_each_entry(info, &domain->devices, link) {
1372 		struct pci_dev *pdev;
1373 
1374 		if (!info->dev || !dev_is_pci(info->dev))
1375 			continue;
1376 
1377 		pdev = to_pci_dev(info->dev);
1378 		if (pdev->ats_enabled) {
1379 			has_iotlb_device = true;
1380 			break;
1381 		}
1382 	}
1383 
1384 	domain->has_iotlb_device = has_iotlb_device;
1385 }
1386 
iommu_enable_dev_iotlb(struct device_domain_info * info)1387 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1388 {
1389 	struct pci_dev *pdev;
1390 
1391 	assert_spin_locked(&device_domain_lock);
1392 
1393 	if (!info || !dev_is_pci(info->dev))
1394 		return;
1395 
1396 	pdev = to_pci_dev(info->dev);
1397 	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
1398 	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1399 	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1400 	 * reserved, which should be set to 0.
1401 	 */
1402 	if (!ecap_dit(info->iommu->ecap))
1403 		info->pfsid = 0;
1404 	else {
1405 		struct pci_dev *pf_pdev;
1406 
1407 		/* pdev will be returned if device is not a vf */
1408 		pf_pdev = pci_physfn(pdev);
1409 		info->pfsid = pci_dev_id(pf_pdev);
1410 	}
1411 
1412 #ifdef CONFIG_INTEL_IOMMU_SVM
1413 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1414 	   the device if you enable PASID support after ATS support is
1415 	   undefined. So always enable PASID support on devices which
1416 	   have it, even if we can't yet know if we're ever going to
1417 	   use it. */
1418 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1419 		info->pasid_enabled = 1;
1420 
1421 	if (info->pri_supported &&
1422 	    (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1423 	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1424 		info->pri_enabled = 1;
1425 #endif
1426 	if (!pdev->untrusted && info->ats_supported &&
1427 	    pci_ats_page_aligned(pdev) &&
1428 	    !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1429 		info->ats_enabled = 1;
1430 		domain_update_iotlb(info->domain);
1431 		info->ats_qdep = pci_ats_queue_depth(pdev);
1432 	}
1433 }
1434 
iommu_disable_dev_iotlb(struct device_domain_info * info)1435 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1436 {
1437 	struct pci_dev *pdev;
1438 
1439 	assert_spin_locked(&device_domain_lock);
1440 
1441 	if (!dev_is_pci(info->dev))
1442 		return;
1443 
1444 	pdev = to_pci_dev(info->dev);
1445 
1446 	if (info->ats_enabled) {
1447 		pci_disable_ats(pdev);
1448 		info->ats_enabled = 0;
1449 		domain_update_iotlb(info->domain);
1450 	}
1451 #ifdef CONFIG_INTEL_IOMMU_SVM
1452 	if (info->pri_enabled) {
1453 		pci_disable_pri(pdev);
1454 		info->pri_enabled = 0;
1455 	}
1456 	if (info->pasid_enabled) {
1457 		pci_disable_pasid(pdev);
1458 		info->pasid_enabled = 0;
1459 	}
1460 #endif
1461 }
1462 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1463 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1464 				  u64 addr, unsigned mask)
1465 {
1466 	u16 sid, qdep;
1467 	unsigned long flags;
1468 	struct device_domain_info *info;
1469 
1470 	if (!domain->has_iotlb_device)
1471 		return;
1472 
1473 	spin_lock_irqsave(&device_domain_lock, flags);
1474 	list_for_each_entry(info, &domain->devices, link) {
1475 		if (!info->ats_enabled)
1476 			continue;
1477 
1478 		sid = info->bus << 8 | info->devfn;
1479 		qdep = info->ats_qdep;
1480 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1481 				qdep, addr, mask);
1482 	}
1483 	spin_unlock_irqrestore(&device_domain_lock, flags);
1484 }
1485 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages,int ih,int map)1486 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1487 				  struct dmar_domain *domain,
1488 				  unsigned long pfn, unsigned int pages,
1489 				  int ih, int map)
1490 {
1491 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1492 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1493 	u16 did = domain->iommu_did[iommu->seq_id];
1494 
1495 	BUG_ON(pages == 0);
1496 
1497 	if (ih)
1498 		ih = 1 << 6;
1499 	/*
1500 	 * Fallback to domain selective flush if no PSI support or the size is
1501 	 * too big.
1502 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1503 	 * aligned to the size
1504 	 */
1505 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1506 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1507 						DMA_TLB_DSI_FLUSH);
1508 	else
1509 		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1510 						DMA_TLB_PSI_FLUSH);
1511 
1512 	/*
1513 	 * In caching mode, changes of pages from non-present to present require
1514 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1515 	 */
1516 	if (!cap_caching_mode(iommu->cap) || !map)
1517 		iommu_flush_dev_iotlb(domain, addr, mask);
1518 }
1519 
1520 /* Notification for newly created mappings */
__mapping_notify_one(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages)1521 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1522 					struct dmar_domain *domain,
1523 					unsigned long pfn, unsigned int pages)
1524 {
1525 	/* It's a non-present to present mapping. Only flush if caching mode */
1526 	if (cap_caching_mode(iommu->cap))
1527 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1528 	else
1529 		iommu_flush_write_buffer(iommu);
1530 }
1531 
iommu_flush_iova(struct iova_domain * iovad)1532 static void iommu_flush_iova(struct iova_domain *iovad)
1533 {
1534 	struct dmar_domain *domain;
1535 	int idx;
1536 
1537 	domain = container_of(iovad, struct dmar_domain, iovad);
1538 
1539 	for_each_domain_iommu(idx, domain) {
1540 		struct intel_iommu *iommu = g_iommus[idx];
1541 		u16 did = domain->iommu_did[iommu->seq_id];
1542 
1543 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1544 
1545 		if (!cap_caching_mode(iommu->cap))
1546 			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1547 					      0, MAX_AGAW_PFN_WIDTH);
1548 	}
1549 }
1550 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1551 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1552 {
1553 	u32 pmen;
1554 	unsigned long flags;
1555 
1556 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1557 		return;
1558 
1559 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1560 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1561 	pmen &= ~DMA_PMEN_EPM;
1562 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1563 
1564 	/* wait for the protected region status bit to clear */
1565 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1566 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1567 
1568 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1569 }
1570 
iommu_enable_translation(struct intel_iommu * iommu)1571 static void iommu_enable_translation(struct intel_iommu *iommu)
1572 {
1573 	u32 sts;
1574 	unsigned long flags;
1575 
1576 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1577 	iommu->gcmd |= DMA_GCMD_TE;
1578 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1579 
1580 	/* Make sure hardware complete it */
1581 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1582 		      readl, (sts & DMA_GSTS_TES), sts);
1583 
1584 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1585 }
1586 
iommu_disable_translation(struct intel_iommu * iommu)1587 static void iommu_disable_translation(struct intel_iommu *iommu)
1588 {
1589 	u32 sts;
1590 	unsigned long flag;
1591 
1592 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1593 	iommu->gcmd &= ~DMA_GCMD_TE;
1594 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1595 
1596 	/* Make sure hardware complete it */
1597 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1598 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1599 
1600 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1601 }
1602 
iommu_init_domains(struct intel_iommu * iommu)1603 static int iommu_init_domains(struct intel_iommu *iommu)
1604 {
1605 	u32 ndomains, nlongs;
1606 	size_t size;
1607 
1608 	ndomains = cap_ndoms(iommu->cap);
1609 	pr_debug("%s: Number of Domains supported <%d>\n",
1610 		 iommu->name, ndomains);
1611 	nlongs = BITS_TO_LONGS(ndomains);
1612 
1613 	spin_lock_init(&iommu->lock);
1614 
1615 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1616 	if (!iommu->domain_ids) {
1617 		pr_err("%s: Allocating domain id array failed\n",
1618 		       iommu->name);
1619 		return -ENOMEM;
1620 	}
1621 
1622 	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1623 	iommu->domains = kzalloc(size, GFP_KERNEL);
1624 
1625 	if (iommu->domains) {
1626 		size = 256 * sizeof(struct dmar_domain *);
1627 		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1628 	}
1629 
1630 	if (!iommu->domains || !iommu->domains[0]) {
1631 		pr_err("%s: Allocating domain array failed\n",
1632 		       iommu->name);
1633 		kfree(iommu->domain_ids);
1634 		kfree(iommu->domains);
1635 		iommu->domain_ids = NULL;
1636 		iommu->domains    = NULL;
1637 		return -ENOMEM;
1638 	}
1639 
1640 	/*
1641 	 * If Caching mode is set, then invalid translations are tagged
1642 	 * with domain-id 0, hence we need to pre-allocate it. We also
1643 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1644 	 * make sure it is not used for a real domain.
1645 	 */
1646 	set_bit(0, iommu->domain_ids);
1647 
1648 	/*
1649 	 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1650 	 * entry for first-level or pass-through translation modes should
1651 	 * be programmed with a domain id different from those used for
1652 	 * second-level or nested translation. We reserve a domain id for
1653 	 * this purpose.
1654 	 */
1655 	if (sm_supported(iommu))
1656 		set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1657 
1658 	return 0;
1659 }
1660 
disable_dmar_iommu(struct intel_iommu * iommu)1661 static void disable_dmar_iommu(struct intel_iommu *iommu)
1662 {
1663 	struct device_domain_info *info, *tmp;
1664 	unsigned long flags;
1665 
1666 	if (!iommu->domains || !iommu->domain_ids)
1667 		return;
1668 
1669 	spin_lock_irqsave(&device_domain_lock, flags);
1670 	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1671 		if (info->iommu != iommu)
1672 			continue;
1673 
1674 		if (!info->dev || !info->domain)
1675 			continue;
1676 
1677 		__dmar_remove_one_dev_info(info);
1678 	}
1679 	spin_unlock_irqrestore(&device_domain_lock, flags);
1680 
1681 	if (iommu->gcmd & DMA_GCMD_TE)
1682 		iommu_disable_translation(iommu);
1683 }
1684 
free_dmar_iommu(struct intel_iommu * iommu)1685 static void free_dmar_iommu(struct intel_iommu *iommu)
1686 {
1687 	if ((iommu->domains) && (iommu->domain_ids)) {
1688 		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1689 		int i;
1690 
1691 		for (i = 0; i < elems; i++)
1692 			kfree(iommu->domains[i]);
1693 		kfree(iommu->domains);
1694 		kfree(iommu->domain_ids);
1695 		iommu->domains = NULL;
1696 		iommu->domain_ids = NULL;
1697 	}
1698 
1699 	g_iommus[iommu->seq_id] = NULL;
1700 
1701 	/* free context mapping */
1702 	free_context_table(iommu);
1703 
1704 #ifdef CONFIG_INTEL_IOMMU_SVM
1705 	if (pasid_supported(iommu)) {
1706 		if (ecap_prs(iommu->ecap))
1707 			intel_svm_finish_prq(iommu);
1708 	}
1709 #endif
1710 }
1711 
alloc_domain(int flags)1712 static struct dmar_domain *alloc_domain(int flags)
1713 {
1714 	struct dmar_domain *domain;
1715 
1716 	domain = alloc_domain_mem();
1717 	if (!domain)
1718 		return NULL;
1719 
1720 	memset(domain, 0, sizeof(*domain));
1721 	domain->nid = NUMA_NO_NODE;
1722 	domain->flags = flags;
1723 	domain->has_iotlb_device = false;
1724 	INIT_LIST_HEAD(&domain->devices);
1725 
1726 	return domain;
1727 }
1728 
1729 /* Must be called with iommu->lock */
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1730 static int domain_attach_iommu(struct dmar_domain *domain,
1731 			       struct intel_iommu *iommu)
1732 {
1733 	unsigned long ndomains;
1734 	int num;
1735 
1736 	assert_spin_locked(&device_domain_lock);
1737 	assert_spin_locked(&iommu->lock);
1738 
1739 	domain->iommu_refcnt[iommu->seq_id] += 1;
1740 	domain->iommu_count += 1;
1741 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1742 		ndomains = cap_ndoms(iommu->cap);
1743 		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1744 
1745 		if (num >= ndomains) {
1746 			pr_err("%s: No free domain ids\n", iommu->name);
1747 			domain->iommu_refcnt[iommu->seq_id] -= 1;
1748 			domain->iommu_count -= 1;
1749 			return -ENOSPC;
1750 		}
1751 
1752 		set_bit(num, iommu->domain_ids);
1753 		set_iommu_domain(iommu, num, domain);
1754 
1755 		domain->iommu_did[iommu->seq_id] = num;
1756 		domain->nid			 = iommu->node;
1757 
1758 		domain_update_iommu_cap(domain);
1759 	}
1760 
1761 	return 0;
1762 }
1763 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1764 static int domain_detach_iommu(struct dmar_domain *domain,
1765 			       struct intel_iommu *iommu)
1766 {
1767 	int num, count;
1768 
1769 	assert_spin_locked(&device_domain_lock);
1770 	assert_spin_locked(&iommu->lock);
1771 
1772 	domain->iommu_refcnt[iommu->seq_id] -= 1;
1773 	count = --domain->iommu_count;
1774 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1775 		num = domain->iommu_did[iommu->seq_id];
1776 		clear_bit(num, iommu->domain_ids);
1777 		set_iommu_domain(iommu, num, NULL);
1778 
1779 		domain_update_iommu_cap(domain);
1780 		domain->iommu_did[iommu->seq_id] = 0;
1781 	}
1782 
1783 	return count;
1784 }
1785 
1786 static struct iova_domain reserved_iova_list;
1787 static struct lock_class_key reserved_rbtree_key;
1788 
dmar_init_reserved_ranges(void)1789 static int dmar_init_reserved_ranges(void)
1790 {
1791 	struct pci_dev *pdev = NULL;
1792 	struct iova *iova;
1793 	int i;
1794 
1795 	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1796 
1797 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1798 		&reserved_rbtree_key);
1799 
1800 	/* IOAPIC ranges shouldn't be accessed by DMA */
1801 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1802 		IOVA_PFN(IOAPIC_RANGE_END));
1803 	if (!iova) {
1804 		pr_err("Reserve IOAPIC range failed\n");
1805 		return -ENODEV;
1806 	}
1807 
1808 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1809 	for_each_pci_dev(pdev) {
1810 		struct resource *r;
1811 
1812 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1813 			r = &pdev->resource[i];
1814 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1815 				continue;
1816 			iova = reserve_iova(&reserved_iova_list,
1817 					    IOVA_PFN(r->start),
1818 					    IOVA_PFN(r->end));
1819 			if (!iova) {
1820 				pci_err(pdev, "Reserve iova for %pR failed\n", r);
1821 				return -ENODEV;
1822 			}
1823 		}
1824 	}
1825 	return 0;
1826 }
1827 
domain_reserve_special_ranges(struct dmar_domain * domain)1828 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1829 {
1830 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1831 }
1832 
guestwidth_to_adjustwidth(int gaw)1833 static inline int guestwidth_to_adjustwidth(int gaw)
1834 {
1835 	int agaw;
1836 	int r = (gaw - 12) % 9;
1837 
1838 	if (r == 0)
1839 		agaw = gaw;
1840 	else
1841 		agaw = gaw + 9 - r;
1842 	if (agaw > 64)
1843 		agaw = 64;
1844 	return agaw;
1845 }
1846 
domain_init(struct dmar_domain * domain,struct intel_iommu * iommu,int guest_width)1847 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1848 		       int guest_width)
1849 {
1850 	int adjust_width, agaw;
1851 	unsigned long sagaw;
1852 	int err;
1853 
1854 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1855 
1856 	err = init_iova_flush_queue(&domain->iovad,
1857 				    iommu_flush_iova, iova_entry_free);
1858 	if (err)
1859 		return err;
1860 
1861 	domain_reserve_special_ranges(domain);
1862 
1863 	/* calculate AGAW */
1864 	if (guest_width > cap_mgaw(iommu->cap))
1865 		guest_width = cap_mgaw(iommu->cap);
1866 	domain->gaw = guest_width;
1867 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1868 	agaw = width_to_agaw(adjust_width);
1869 	sagaw = cap_sagaw(iommu->cap);
1870 	if (!test_bit(agaw, &sagaw)) {
1871 		/* hardware doesn't support it, choose a bigger one */
1872 		pr_debug("Hardware doesn't support agaw %d\n", agaw);
1873 		agaw = find_next_bit(&sagaw, 5, agaw);
1874 		if (agaw >= 5)
1875 			return -ENODEV;
1876 	}
1877 	domain->agaw = agaw;
1878 
1879 	if (ecap_coherent(iommu->ecap))
1880 		domain->iommu_coherency = 1;
1881 	else
1882 		domain->iommu_coherency = 0;
1883 
1884 	if (ecap_sc_support(iommu->ecap))
1885 		domain->iommu_snooping = 1;
1886 	else
1887 		domain->iommu_snooping = 0;
1888 
1889 	if (intel_iommu_superpage)
1890 		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1891 	else
1892 		domain->iommu_superpage = 0;
1893 
1894 	domain->nid = iommu->node;
1895 
1896 	/* always allocate the top pgd */
1897 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1898 	if (!domain->pgd)
1899 		return -ENOMEM;
1900 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1901 	return 0;
1902 }
1903 
domain_exit(struct dmar_domain * domain)1904 static void domain_exit(struct dmar_domain *domain)
1905 {
1906 
1907 	/* Remove associated devices and clear attached or cached domains */
1908 	domain_remove_dev_info(domain);
1909 
1910 	/* destroy iovas */
1911 	put_iova_domain(&domain->iovad);
1912 
1913 	if (domain->pgd) {
1914 		struct page *freelist;
1915 
1916 		freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1917 		dma_free_pagelist(freelist);
1918 	}
1919 
1920 	free_domain_mem(domain);
1921 }
1922 
1923 /*
1924  * Get the PASID directory size for scalable mode context entry.
1925  * Value of X in the PDTS field of a scalable mode context entry
1926  * indicates PASID directory with 2^(X + 7) entries.
1927  */
context_get_sm_pds(struct pasid_table * table)1928 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1929 {
1930 	int pds, max_pde;
1931 
1932 	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1933 	pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1934 	if (pds < 7)
1935 		return 0;
1936 
1937 	return pds - 7;
1938 }
1939 
1940 /*
1941  * Set the RID_PASID field of a scalable mode context entry. The
1942  * IOMMU hardware will use the PASID value set in this field for
1943  * DMA translations of DMA requests without PASID.
1944  */
1945 static inline void
context_set_sm_rid2pasid(struct context_entry * context,unsigned long pasid)1946 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1947 {
1948 	context->hi |= pasid & ((1 << 20) - 1);
1949 	context->hi |= (1 << 20);
1950 }
1951 
1952 /*
1953  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1954  * entry.
1955  */
context_set_sm_dte(struct context_entry * context)1956 static inline void context_set_sm_dte(struct context_entry *context)
1957 {
1958 	context->lo |= (1 << 2);
1959 }
1960 
1961 /*
1962  * Set the PRE(Page Request Enable) field of a scalable mode context
1963  * entry.
1964  */
context_set_sm_pre(struct context_entry * context)1965 static inline void context_set_sm_pre(struct context_entry *context)
1966 {
1967 	context->lo |= (1 << 4);
1968 }
1969 
1970 /* Convert value to context PASID directory size field coding. */
1971 #define context_pdts(pds)	(((pds) & 0x7) << 9)
1972 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,struct pasid_table * table,u8 bus,u8 devfn)1973 static int domain_context_mapping_one(struct dmar_domain *domain,
1974 				      struct intel_iommu *iommu,
1975 				      struct pasid_table *table,
1976 				      u8 bus, u8 devfn)
1977 {
1978 	u16 did = domain->iommu_did[iommu->seq_id];
1979 	int translation = CONTEXT_TT_MULTI_LEVEL;
1980 	struct device_domain_info *info = NULL;
1981 	struct context_entry *context;
1982 	unsigned long flags;
1983 	int ret;
1984 
1985 	WARN_ON(did == 0);
1986 
1987 	if (hw_pass_through && domain_type_is_si(domain))
1988 		translation = CONTEXT_TT_PASS_THROUGH;
1989 
1990 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1991 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1992 
1993 	BUG_ON(!domain->pgd);
1994 
1995 	spin_lock_irqsave(&device_domain_lock, flags);
1996 	spin_lock(&iommu->lock);
1997 
1998 	ret = -ENOMEM;
1999 	context = iommu_context_addr(iommu, bus, devfn, 1);
2000 	if (!context)
2001 		goto out_unlock;
2002 
2003 	ret = 0;
2004 	if (context_present(context))
2005 		goto out_unlock;
2006 
2007 	/*
2008 	 * For kdump cases, old valid entries may be cached due to the
2009 	 * in-flight DMA and copied pgtable, but there is no unmapping
2010 	 * behaviour for them, thus we need an explicit cache flush for
2011 	 * the newly-mapped device. For kdump, at this point, the device
2012 	 * is supposed to finish reset at its driver probe stage, so no
2013 	 * in-flight DMA will exist, and we don't need to worry anymore
2014 	 * hereafter.
2015 	 */
2016 	if (context_copied(context)) {
2017 		u16 did_old = context_domain_id(context);
2018 
2019 		if (did_old < cap_ndoms(iommu->cap)) {
2020 			iommu->flush.flush_context(iommu, did_old,
2021 						   (((u16)bus) << 8) | devfn,
2022 						   DMA_CCMD_MASK_NOBIT,
2023 						   DMA_CCMD_DEVICE_INVL);
2024 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2025 						 DMA_TLB_DSI_FLUSH);
2026 		}
2027 	}
2028 
2029 	context_clear_entry(context);
2030 
2031 	if (sm_supported(iommu)) {
2032 		unsigned long pds;
2033 
2034 		WARN_ON(!table);
2035 
2036 		/* Setup the PASID DIR pointer: */
2037 		pds = context_get_sm_pds(table);
2038 		context->lo = (u64)virt_to_phys(table->table) |
2039 				context_pdts(pds);
2040 
2041 		/* Setup the RID_PASID field: */
2042 		context_set_sm_rid2pasid(context, PASID_RID2PASID);
2043 
2044 		/*
2045 		 * Setup the Device-TLB enable bit and Page request
2046 		 * Enable bit:
2047 		 */
2048 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2049 		if (info && info->ats_supported)
2050 			context_set_sm_dte(context);
2051 		if (info && info->pri_supported)
2052 			context_set_sm_pre(context);
2053 	} else {
2054 		struct dma_pte *pgd = domain->pgd;
2055 		int agaw;
2056 
2057 		context_set_domain_id(context, did);
2058 
2059 		if (translation != CONTEXT_TT_PASS_THROUGH) {
2060 			/*
2061 			 * Skip top levels of page tables for iommu which has
2062 			 * less agaw than default. Unnecessary for PT mode.
2063 			 */
2064 			for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2065 				ret = -ENOMEM;
2066 				pgd = phys_to_virt(dma_pte_addr(pgd));
2067 				if (!dma_pte_present(pgd))
2068 					goto out_unlock;
2069 			}
2070 
2071 			info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072 			if (info && info->ats_supported)
2073 				translation = CONTEXT_TT_DEV_IOTLB;
2074 			else
2075 				translation = CONTEXT_TT_MULTI_LEVEL;
2076 
2077 			context_set_address_root(context, virt_to_phys(pgd));
2078 			context_set_address_width(context, agaw);
2079 		} else {
2080 			/*
2081 			 * In pass through mode, AW must be programmed to
2082 			 * indicate the largest AGAW value supported by
2083 			 * hardware. And ASR is ignored by hardware.
2084 			 */
2085 			context_set_address_width(context, iommu->msagaw);
2086 		}
2087 
2088 		context_set_translation_type(context, translation);
2089 	}
2090 
2091 	context_set_fault_enable(context);
2092 	context_set_present(context);
2093 	domain_flush_cache(domain, context, sizeof(*context));
2094 
2095 	/*
2096 	 * It's a non-present to present mapping. If hardware doesn't cache
2097 	 * non-present entry we only need to flush the write-buffer. If the
2098 	 * _does_ cache non-present entries, then it does so in the special
2099 	 * domain #0, which we have to flush:
2100 	 */
2101 	if (cap_caching_mode(iommu->cap)) {
2102 		iommu->flush.flush_context(iommu, 0,
2103 					   (((u16)bus) << 8) | devfn,
2104 					   DMA_CCMD_MASK_NOBIT,
2105 					   DMA_CCMD_DEVICE_INVL);
2106 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2107 	} else {
2108 		iommu_flush_write_buffer(iommu);
2109 	}
2110 	iommu_enable_dev_iotlb(info);
2111 
2112 	ret = 0;
2113 
2114 out_unlock:
2115 	spin_unlock(&iommu->lock);
2116 	spin_unlock_irqrestore(&device_domain_lock, flags);
2117 
2118 	return ret;
2119 }
2120 
2121 struct domain_context_mapping_data {
2122 	struct dmar_domain *domain;
2123 	struct intel_iommu *iommu;
2124 	struct pasid_table *table;
2125 };
2126 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)2127 static int domain_context_mapping_cb(struct pci_dev *pdev,
2128 				     u16 alias, void *opaque)
2129 {
2130 	struct domain_context_mapping_data *data = opaque;
2131 
2132 	return domain_context_mapping_one(data->domain, data->iommu,
2133 					  data->table, PCI_BUS_NUM(alias),
2134 					  alias & 0xff);
2135 }
2136 
2137 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)2138 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2139 {
2140 	struct domain_context_mapping_data data;
2141 	struct pasid_table *table;
2142 	struct intel_iommu *iommu;
2143 	u8 bus, devfn;
2144 
2145 	iommu = device_to_iommu(dev, &bus, &devfn);
2146 	if (!iommu)
2147 		return -ENODEV;
2148 
2149 	table = intel_pasid_get_table(dev);
2150 
2151 	if (!dev_is_pci(dev))
2152 		return domain_context_mapping_one(domain, iommu, table,
2153 						  bus, devfn);
2154 
2155 	data.domain = domain;
2156 	data.iommu = iommu;
2157 	data.table = table;
2158 
2159 	return pci_for_each_dma_alias(to_pci_dev(dev),
2160 				      &domain_context_mapping_cb, &data);
2161 }
2162 
domain_context_mapped_cb(struct pci_dev * pdev,u16 alias,void * opaque)2163 static int domain_context_mapped_cb(struct pci_dev *pdev,
2164 				    u16 alias, void *opaque)
2165 {
2166 	struct intel_iommu *iommu = opaque;
2167 
2168 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2169 }
2170 
domain_context_mapped(struct device * dev)2171 static int domain_context_mapped(struct device *dev)
2172 {
2173 	struct intel_iommu *iommu;
2174 	u8 bus, devfn;
2175 
2176 	iommu = device_to_iommu(dev, &bus, &devfn);
2177 	if (!iommu)
2178 		return -ENODEV;
2179 
2180 	if (!dev_is_pci(dev))
2181 		return device_context_mapped(iommu, bus, devfn);
2182 
2183 	return !pci_for_each_dma_alias(to_pci_dev(dev),
2184 				       domain_context_mapped_cb, iommu);
2185 }
2186 
2187 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)2188 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2189 					    size_t size)
2190 {
2191 	host_addr &= ~PAGE_MASK;
2192 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2193 }
2194 
2195 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)2196 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2197 					  unsigned long iov_pfn,
2198 					  unsigned long phy_pfn,
2199 					  unsigned long pages)
2200 {
2201 	int support, level = 1;
2202 	unsigned long pfnmerge;
2203 
2204 	support = domain->iommu_superpage;
2205 
2206 	/* To use a large page, the virtual *and* physical addresses
2207 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2208 	   of them will mean we have to use smaller pages. So just
2209 	   merge them and check both at once. */
2210 	pfnmerge = iov_pfn | phy_pfn;
2211 
2212 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2213 		pages >>= VTD_STRIDE_SHIFT;
2214 		if (!pages)
2215 			break;
2216 		pfnmerge >>= VTD_STRIDE_SHIFT;
2217 		level++;
2218 		support--;
2219 	}
2220 	return level;
2221 }
2222 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2223 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224 			    struct scatterlist *sg, unsigned long phys_pfn,
2225 			    unsigned long nr_pages, int prot)
2226 {
2227 	struct dma_pte *first_pte = NULL, *pte = NULL;
2228 	phys_addr_t uninitialized_var(pteval);
2229 	unsigned long sg_res = 0;
2230 	unsigned int largepage_lvl = 0;
2231 	unsigned long lvl_pages = 0;
2232 
2233 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2234 
2235 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2236 		return -EINVAL;
2237 
2238 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2239 
2240 	if (!sg) {
2241 		sg_res = nr_pages;
2242 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2243 	}
2244 
2245 	while (nr_pages > 0) {
2246 		uint64_t tmp;
2247 
2248 		if (!sg_res) {
2249 			unsigned int pgoff = sg->offset & ~PAGE_MASK;
2250 
2251 			sg_res = aligned_nrpages(sg->offset, sg->length);
2252 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2253 			sg->dma_length = sg->length;
2254 			pteval = (sg_phys(sg) - pgoff) | prot;
2255 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2256 		}
2257 
2258 		if (!pte) {
2259 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260 
2261 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2262 			if (!pte)
2263 				return -ENOMEM;
2264 			/* It is large page*/
2265 			if (largepage_lvl > 1) {
2266 				unsigned long nr_superpages, end_pfn;
2267 
2268 				pteval |= DMA_PTE_LARGE_PAGE;
2269 				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2270 
2271 				nr_superpages = sg_res / lvl_pages;
2272 				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273 
2274 				/*
2275 				 * Ensure that old small page tables are
2276 				 * removed to make room for superpage(s).
2277 				 * We're adding new large pages, so make sure
2278 				 * we don't remove their parent tables.
2279 				 */
2280 				dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2281 						       largepage_lvl + 1);
2282 			} else {
2283 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2284 			}
2285 
2286 		}
2287 		/* We don't need lock here, nobody else
2288 		 * touches the iova range
2289 		 */
2290 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2291 		if (tmp) {
2292 			static int dumps = 5;
2293 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294 				iov_pfn, tmp, (unsigned long long)pteval);
2295 			if (dumps) {
2296 				dumps--;
2297 				debug_dma_dump_mappings(NULL);
2298 			}
2299 			WARN_ON(1);
2300 		}
2301 
2302 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303 
2304 		BUG_ON(nr_pages < lvl_pages);
2305 		BUG_ON(sg_res < lvl_pages);
2306 
2307 		nr_pages -= lvl_pages;
2308 		iov_pfn += lvl_pages;
2309 		phys_pfn += lvl_pages;
2310 		pteval += lvl_pages * VTD_PAGE_SIZE;
2311 		sg_res -= lvl_pages;
2312 
2313 		/* If the next PTE would be the first in a new page, then we
2314 		   need to flush the cache on the entries we've just written.
2315 		   And then we'll need to recalculate 'pte', so clear it and
2316 		   let it get set again in the if (!pte) block above.
2317 
2318 		   If we're done (!nr_pages) we need to flush the cache too.
2319 
2320 		   Also if we've been setting superpages, we may need to
2321 		   recalculate 'pte' and switch back to smaller pages for the
2322 		   end of the mapping, if the trailing size is not enough to
2323 		   use another superpage (i.e. sg_res < lvl_pages). */
2324 		pte++;
2325 		if (!nr_pages || first_pte_in_page(pte) ||
2326 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2327 			domain_flush_cache(domain, first_pte,
2328 					   (void *)pte - (void *)first_pte);
2329 			pte = NULL;
2330 		}
2331 
2332 		if (!sg_res && nr_pages)
2333 			sg = sg_next(sg);
2334 	}
2335 	return 0;
2336 }
2337 
domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2338 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339 			  struct scatterlist *sg, unsigned long phys_pfn,
2340 			  unsigned long nr_pages, int prot)
2341 {
2342 	int iommu_id, ret;
2343 	struct intel_iommu *iommu;
2344 
2345 	/* Do the real mapping first */
2346 	ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2347 	if (ret)
2348 		return ret;
2349 
2350 	for_each_domain_iommu(iommu_id, domain) {
2351 		iommu = g_iommus[iommu_id];
2352 		__mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2353 	}
2354 
2355 	return 0;
2356 }
2357 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)2358 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2359 				    struct scatterlist *sg, unsigned long nr_pages,
2360 				    int prot)
2361 {
2362 	return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2363 }
2364 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)2365 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 				     unsigned long phys_pfn, unsigned long nr_pages,
2367 				     int prot)
2368 {
2369 	return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2370 }
2371 
domain_context_clear_one(struct intel_iommu * iommu,u8 bus,u8 devfn)2372 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2373 {
2374 	unsigned long flags;
2375 	struct context_entry *context;
2376 	u16 did_old;
2377 
2378 	if (!iommu)
2379 		return;
2380 
2381 	spin_lock_irqsave(&iommu->lock, flags);
2382 	context = iommu_context_addr(iommu, bus, devfn, 0);
2383 	if (!context) {
2384 		spin_unlock_irqrestore(&iommu->lock, flags);
2385 		return;
2386 	}
2387 	did_old = context_domain_id(context);
2388 	context_clear_entry(context);
2389 	__iommu_flush_cache(iommu, context, sizeof(*context));
2390 	spin_unlock_irqrestore(&iommu->lock, flags);
2391 	iommu->flush.flush_context(iommu,
2392 				   did_old,
2393 				   (((u16)bus) << 8) | devfn,
2394 				   DMA_CCMD_MASK_NOBIT,
2395 				   DMA_CCMD_DEVICE_INVL);
2396 	iommu->flush.flush_iotlb(iommu,
2397 				 did_old,
2398 				 0,
2399 				 0,
2400 				 DMA_TLB_DSI_FLUSH);
2401 }
2402 
unlink_domain_info(struct device_domain_info * info)2403 static inline void unlink_domain_info(struct device_domain_info *info)
2404 {
2405 	assert_spin_locked(&device_domain_lock);
2406 	list_del(&info->link);
2407 	list_del(&info->global);
2408 	if (info->dev)
2409 		info->dev->archdata.iommu = NULL;
2410 }
2411 
domain_remove_dev_info(struct dmar_domain * domain)2412 static void domain_remove_dev_info(struct dmar_domain *domain)
2413 {
2414 	struct device_domain_info *info, *tmp;
2415 	unsigned long flags;
2416 
2417 	spin_lock_irqsave(&device_domain_lock, flags);
2418 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
2419 		__dmar_remove_one_dev_info(info);
2420 	spin_unlock_irqrestore(&device_domain_lock, flags);
2421 }
2422 
2423 /*
2424  * find_domain
2425  * Note: we use struct device->archdata.iommu stores the info
2426  */
find_domain(struct device * dev)2427 static struct dmar_domain *find_domain(struct device *dev)
2428 {
2429 	struct device_domain_info *info;
2430 
2431 	if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2432 		struct iommu_domain *domain;
2433 
2434 		dev->archdata.iommu = NULL;
2435 		domain = iommu_get_domain_for_dev(dev);
2436 		if (domain)
2437 			intel_iommu_attach_device(domain, dev);
2438 	}
2439 
2440 	/* No lock here, assumes no domain exit in normal case */
2441 	info = dev->archdata.iommu;
2442 
2443 	if (likely(info))
2444 		return info->domain;
2445 	return NULL;
2446 }
2447 
2448 static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment,int bus,int devfn)2449 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2450 {
2451 	struct device_domain_info *info;
2452 
2453 	list_for_each_entry(info, &device_domain_list, global)
2454 		if (info->iommu->segment == segment && info->bus == bus &&
2455 		    info->devfn == devfn)
2456 			return info;
2457 
2458 	return NULL;
2459 }
2460 
dmar_insert_one_dev_info(struct intel_iommu * iommu,int bus,int devfn,struct device * dev,struct dmar_domain * domain)2461 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2462 						    int bus, int devfn,
2463 						    struct device *dev,
2464 						    struct dmar_domain *domain)
2465 {
2466 	struct dmar_domain *found = NULL;
2467 	struct device_domain_info *info;
2468 	unsigned long flags;
2469 	int ret;
2470 
2471 	info = alloc_devinfo_mem();
2472 	if (!info)
2473 		return NULL;
2474 
2475 	info->bus = bus;
2476 	info->devfn = devfn;
2477 	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2478 	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2479 	info->ats_qdep = 0;
2480 	info->dev = dev;
2481 	info->domain = domain;
2482 	info->iommu = iommu;
2483 	info->pasid_table = NULL;
2484 	info->auxd_enabled = 0;
2485 	INIT_LIST_HEAD(&info->auxiliary_domains);
2486 
2487 	if (dev && dev_is_pci(dev)) {
2488 		struct pci_dev *pdev = to_pci_dev(info->dev);
2489 
2490 		if (!pdev->untrusted &&
2491 		    !pci_ats_disabled() &&
2492 		    ecap_dev_iotlb_support(iommu->ecap) &&
2493 		    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2494 		    dmar_find_matched_atsr_unit(pdev))
2495 			info->ats_supported = 1;
2496 
2497 		if (sm_supported(iommu)) {
2498 			if (pasid_supported(iommu)) {
2499 				int features = pci_pasid_features(pdev);
2500 				if (features >= 0)
2501 					info->pasid_supported = features | 1;
2502 			}
2503 
2504 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
2505 			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2506 				info->pri_supported = 1;
2507 		}
2508 	}
2509 
2510 	spin_lock_irqsave(&device_domain_lock, flags);
2511 	if (dev)
2512 		found = find_domain(dev);
2513 
2514 	if (!found) {
2515 		struct device_domain_info *info2;
2516 		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2517 		if (info2) {
2518 			found      = info2->domain;
2519 			info2->dev = dev;
2520 		}
2521 	}
2522 
2523 	if (found) {
2524 		spin_unlock_irqrestore(&device_domain_lock, flags);
2525 		free_devinfo_mem(info);
2526 		/* Caller must free the original domain */
2527 		return found;
2528 	}
2529 
2530 	spin_lock(&iommu->lock);
2531 	ret = domain_attach_iommu(domain, iommu);
2532 	spin_unlock(&iommu->lock);
2533 
2534 	if (ret) {
2535 		spin_unlock_irqrestore(&device_domain_lock, flags);
2536 		free_devinfo_mem(info);
2537 		return NULL;
2538 	}
2539 
2540 	list_add(&info->link, &domain->devices);
2541 	list_add(&info->global, &device_domain_list);
2542 	if (dev)
2543 		dev->archdata.iommu = info;
2544 	spin_unlock_irqrestore(&device_domain_lock, flags);
2545 
2546 	/* PASID table is mandatory for a PCI device in scalable mode. */
2547 	if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2548 		ret = intel_pasid_alloc_table(dev);
2549 		if (ret) {
2550 			dev_err(dev, "PASID table allocation failed\n");
2551 			dmar_remove_one_dev_info(dev);
2552 			return NULL;
2553 		}
2554 
2555 		/* Setup the PASID entry for requests without PASID: */
2556 		spin_lock(&iommu->lock);
2557 		if (hw_pass_through && domain_type_is_si(domain))
2558 			ret = intel_pasid_setup_pass_through(iommu, domain,
2559 					dev, PASID_RID2PASID);
2560 		else
2561 			ret = intel_pasid_setup_second_level(iommu, domain,
2562 					dev, PASID_RID2PASID);
2563 		spin_unlock(&iommu->lock);
2564 		if (ret) {
2565 			dev_err(dev, "Setup RID2PASID failed\n");
2566 			dmar_remove_one_dev_info(dev);
2567 			return NULL;
2568 		}
2569 	}
2570 
2571 	if (dev && domain_context_mapping(domain, dev)) {
2572 		dev_err(dev, "Domain context map failed\n");
2573 		dmar_remove_one_dev_info(dev);
2574 		return NULL;
2575 	}
2576 
2577 	return domain;
2578 }
2579 
get_last_alias(struct pci_dev * pdev,u16 alias,void * opaque)2580 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2581 {
2582 	*(u16 *)opaque = alias;
2583 	return 0;
2584 }
2585 
find_or_alloc_domain(struct device * dev,int gaw)2586 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2587 {
2588 	struct device_domain_info *info;
2589 	struct dmar_domain *domain = NULL;
2590 	struct intel_iommu *iommu;
2591 	u16 dma_alias;
2592 	unsigned long flags;
2593 	u8 bus, devfn;
2594 
2595 	iommu = device_to_iommu(dev, &bus, &devfn);
2596 	if (!iommu)
2597 		return NULL;
2598 
2599 	if (dev_is_pci(dev)) {
2600 		struct pci_dev *pdev = to_pci_dev(dev);
2601 
2602 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2603 
2604 		spin_lock_irqsave(&device_domain_lock, flags);
2605 		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2606 						      PCI_BUS_NUM(dma_alias),
2607 						      dma_alias & 0xff);
2608 		if (info) {
2609 			iommu = info->iommu;
2610 			domain = info->domain;
2611 		}
2612 		spin_unlock_irqrestore(&device_domain_lock, flags);
2613 
2614 		/* DMA alias already has a domain, use it */
2615 		if (info)
2616 			goto out;
2617 	}
2618 
2619 	/* Allocate and initialize new domain for the device */
2620 	domain = alloc_domain(0);
2621 	if (!domain)
2622 		return NULL;
2623 	if (domain_init(domain, iommu, gaw)) {
2624 		domain_exit(domain);
2625 		return NULL;
2626 	}
2627 
2628 out:
2629 	return domain;
2630 }
2631 
set_domain_for_dev(struct device * dev,struct dmar_domain * domain)2632 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2633 					      struct dmar_domain *domain)
2634 {
2635 	struct intel_iommu *iommu;
2636 	struct dmar_domain *tmp;
2637 	u16 req_id, dma_alias;
2638 	u8 bus, devfn;
2639 
2640 	iommu = device_to_iommu(dev, &bus, &devfn);
2641 	if (!iommu)
2642 		return NULL;
2643 
2644 	req_id = ((u16)bus << 8) | devfn;
2645 
2646 	if (dev_is_pci(dev)) {
2647 		struct pci_dev *pdev = to_pci_dev(dev);
2648 
2649 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2650 
2651 		/* register PCI DMA alias device */
2652 		if (req_id != dma_alias) {
2653 			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2654 					dma_alias & 0xff, NULL, domain);
2655 
2656 			if (!tmp || tmp != domain)
2657 				return tmp;
2658 		}
2659 	}
2660 
2661 	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662 	if (!tmp || tmp != domain)
2663 		return tmp;
2664 
2665 	return domain;
2666 }
2667 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2668 static int iommu_domain_identity_map(struct dmar_domain *domain,
2669 				     unsigned long long start,
2670 				     unsigned long long end)
2671 {
2672 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2673 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2674 
2675 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2676 			  dma_to_mm_pfn(last_vpfn))) {
2677 		pr_err("Reserving iova failed\n");
2678 		return -ENOMEM;
2679 	}
2680 
2681 	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2682 	/*
2683 	 * RMRR range might have overlap with physical memory range,
2684 	 * clear it first
2685 	 */
2686 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2687 
2688 	return __domain_mapping(domain, first_vpfn, NULL,
2689 				first_vpfn, last_vpfn - first_vpfn + 1,
2690 				DMA_PTE_READ|DMA_PTE_WRITE);
2691 }
2692 
domain_prepare_identity_map(struct device * dev,struct dmar_domain * domain,unsigned long long start,unsigned long long end)2693 static int domain_prepare_identity_map(struct device *dev,
2694 				       struct dmar_domain *domain,
2695 				       unsigned long long start,
2696 				       unsigned long long end)
2697 {
2698 	/* For _hardware_ passthrough, don't bother. But for software
2699 	   passthrough, we do it anyway -- it may indicate a memory
2700 	   range which is reserved in E820, so which didn't get set
2701 	   up to start with in si_domain */
2702 	if (domain == si_domain && hw_pass_through) {
2703 		dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2704 			 start, end);
2705 		return 0;
2706 	}
2707 
2708 	dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
2709 
2710 	if (end < start) {
2711 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2712 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2713 			dmi_get_system_info(DMI_BIOS_VENDOR),
2714 			dmi_get_system_info(DMI_BIOS_VERSION),
2715 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2716 		return -EIO;
2717 	}
2718 
2719 	if (end >> agaw_to_width(domain->agaw)) {
2720 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2721 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2722 		     agaw_to_width(domain->agaw),
2723 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2724 		     dmi_get_system_info(DMI_BIOS_VERSION),
2725 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2726 		return -EIO;
2727 	}
2728 
2729 	return iommu_domain_identity_map(domain, start, end);
2730 }
2731 
2732 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2733 
si_domain_init(int hw)2734 static int __init si_domain_init(int hw)
2735 {
2736 	struct dmar_rmrr_unit *rmrr;
2737 	struct device *dev;
2738 	int i, nid, ret;
2739 
2740 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2741 	if (!si_domain)
2742 		return -EFAULT;
2743 
2744 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2745 		domain_exit(si_domain);
2746 		return -EFAULT;
2747 	}
2748 
2749 	if (hw)
2750 		return 0;
2751 
2752 	for_each_online_node(nid) {
2753 		unsigned long start_pfn, end_pfn;
2754 		int i;
2755 
2756 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2757 			ret = iommu_domain_identity_map(si_domain,
2758 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2759 			if (ret)
2760 				return ret;
2761 		}
2762 	}
2763 
2764 	/*
2765 	 * Normally we use DMA domains for devices which have RMRRs. But we
2766 	 * loose this requirement for graphic and usb devices. Identity map
2767 	 * the RMRRs for graphic and USB devices so that they could use the
2768 	 * si_domain.
2769 	 */
2770 	for_each_rmrr_units(rmrr) {
2771 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2772 					  i, dev) {
2773 			unsigned long long start = rmrr->base_address;
2774 			unsigned long long end = rmrr->end_address;
2775 
2776 			if (device_is_rmrr_locked(dev))
2777 				continue;
2778 
2779 			if (WARN_ON(end < start ||
2780 				    end >> agaw_to_width(si_domain->agaw)))
2781 				continue;
2782 
2783 			ret = iommu_domain_identity_map(si_domain, start, end);
2784 			if (ret)
2785 				return ret;
2786 		}
2787 	}
2788 
2789 	return 0;
2790 }
2791 
identity_mapping(struct device * dev)2792 static int identity_mapping(struct device *dev)
2793 {
2794 	struct device_domain_info *info;
2795 
2796 	info = dev->archdata.iommu;
2797 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
2798 		return (info->domain == si_domain);
2799 
2800 	return 0;
2801 }
2802 
domain_add_dev_info(struct dmar_domain * domain,struct device * dev)2803 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2804 {
2805 	struct dmar_domain *ndomain;
2806 	struct intel_iommu *iommu;
2807 	u8 bus, devfn;
2808 
2809 	iommu = device_to_iommu(dev, &bus, &devfn);
2810 	if (!iommu)
2811 		return -ENODEV;
2812 
2813 	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2814 	if (ndomain != domain)
2815 		return -EBUSY;
2816 
2817 	return 0;
2818 }
2819 
device_has_rmrr(struct device * dev)2820 static bool device_has_rmrr(struct device *dev)
2821 {
2822 	struct dmar_rmrr_unit *rmrr;
2823 	struct device *tmp;
2824 	int i;
2825 
2826 	rcu_read_lock();
2827 	for_each_rmrr_units(rmrr) {
2828 		/*
2829 		 * Return TRUE if this RMRR contains the device that
2830 		 * is passed in.
2831 		 */
2832 		for_each_active_dev_scope(rmrr->devices,
2833 					  rmrr->devices_cnt, i, tmp)
2834 			if (tmp == dev ||
2835 			    is_downstream_to_pci_bridge(dev, tmp)) {
2836 				rcu_read_unlock();
2837 				return true;
2838 			}
2839 	}
2840 	rcu_read_unlock();
2841 	return false;
2842 }
2843 
2844 /**
2845  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2846  * is relaxable (ie. is allowed to be not enforced under some conditions)
2847  * @dev: device handle
2848  *
2849  * We assume that PCI USB devices with RMRRs have them largely
2850  * for historical reasons and that the RMRR space is not actively used post
2851  * boot.  This exclusion may change if vendors begin to abuse it.
2852  *
2853  * The same exception is made for graphics devices, with the requirement that
2854  * any use of the RMRR regions will be torn down before assigning the device
2855  * to a guest.
2856  *
2857  * Return: true if the RMRR is relaxable, false otherwise
2858  */
device_rmrr_is_relaxable(struct device * dev)2859 static bool device_rmrr_is_relaxable(struct device *dev)
2860 {
2861 	struct pci_dev *pdev;
2862 
2863 	if (!dev_is_pci(dev))
2864 		return false;
2865 
2866 	pdev = to_pci_dev(dev);
2867 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2868 		return true;
2869 	else
2870 		return false;
2871 }
2872 
2873 /*
2874  * There are a couple cases where we need to restrict the functionality of
2875  * devices associated with RMRRs.  The first is when evaluating a device for
2876  * identity mapping because problems exist when devices are moved in and out
2877  * of domains and their respective RMRR information is lost.  This means that
2878  * a device with associated RMRRs will never be in a "passthrough" domain.
2879  * The second is use of the device through the IOMMU API.  This interface
2880  * expects to have full control of the IOVA space for the device.  We cannot
2881  * satisfy both the requirement that RMRR access is maintained and have an
2882  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2883  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2884  * We therefore prevent devices associated with an RMRR from participating in
2885  * the IOMMU API, which eliminates them from device assignment.
2886  *
2887  * In both cases, devices which have relaxable RMRRs are not concerned by this
2888  * restriction. See device_rmrr_is_relaxable comment.
2889  */
device_is_rmrr_locked(struct device * dev)2890 static bool device_is_rmrr_locked(struct device *dev)
2891 {
2892 	if (!device_has_rmrr(dev))
2893 		return false;
2894 
2895 	if (device_rmrr_is_relaxable(dev))
2896 		return false;
2897 
2898 	return true;
2899 }
2900 
2901 /*
2902  * Return the required default domain type for a specific device.
2903  *
2904  * @dev: the device in query
2905  * @startup: true if this is during early boot
2906  *
2907  * Returns:
2908  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2909  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2910  *  - 0: both identity and dynamic domains work for this device
2911  */
device_def_domain_type(struct device * dev)2912 static int device_def_domain_type(struct device *dev)
2913 {
2914 	if (dev_is_pci(dev)) {
2915 		struct pci_dev *pdev = to_pci_dev(dev);
2916 
2917 		if (device_is_rmrr_locked(dev))
2918 			return IOMMU_DOMAIN_DMA;
2919 
2920 		/*
2921 		 * Prevent any device marked as untrusted from getting
2922 		 * placed into the statically identity mapping domain.
2923 		 */
2924 		if (pdev->untrusted)
2925 			return IOMMU_DOMAIN_DMA;
2926 
2927 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2928 			return IOMMU_DOMAIN_IDENTITY;
2929 
2930 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2931 			return IOMMU_DOMAIN_IDENTITY;
2932 
2933 		/*
2934 		 * We want to start off with all devices in the 1:1 domain, and
2935 		 * take them out later if we find they can't access all of memory.
2936 		 *
2937 		 * However, we can't do this for PCI devices behind bridges,
2938 		 * because all PCI devices behind the same bridge will end up
2939 		 * with the same source-id on their transactions.
2940 		 *
2941 		 * Practically speaking, we can't change things around for these
2942 		 * devices at run-time, because we can't be sure there'll be no
2943 		 * DMA transactions in flight for any of their siblings.
2944 		 *
2945 		 * So PCI devices (unless they're on the root bus) as well as
2946 		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2947 		 * the 1:1 domain, just in _case_ one of their siblings turns out
2948 		 * not to be able to map all of memory.
2949 		 */
2950 		if (!pci_is_pcie(pdev)) {
2951 			if (!pci_is_root_bus(pdev->bus))
2952 				return IOMMU_DOMAIN_DMA;
2953 			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2954 				return IOMMU_DOMAIN_DMA;
2955 		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2956 			return IOMMU_DOMAIN_DMA;
2957 	} else {
2958 		if (device_has_rmrr(dev))
2959 			return IOMMU_DOMAIN_DMA;
2960 	}
2961 
2962 	return (iommu_identity_mapping & IDENTMAP_ALL) ?
2963 			IOMMU_DOMAIN_IDENTITY : 0;
2964 }
2965 
intel_iommu_init_qi(struct intel_iommu * iommu)2966 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2967 {
2968 	/*
2969 	 * Start from the sane iommu hardware state.
2970 	 * If the queued invalidation is already initialized by us
2971 	 * (for example, while enabling interrupt-remapping) then
2972 	 * we got the things already rolling from a sane state.
2973 	 */
2974 	if (!iommu->qi) {
2975 		/*
2976 		 * Clear any previous faults.
2977 		 */
2978 		dmar_fault(-1, iommu);
2979 		/*
2980 		 * Disable queued invalidation if supported and already enabled
2981 		 * before OS handover.
2982 		 */
2983 		dmar_disable_qi(iommu);
2984 	}
2985 
2986 	if (dmar_enable_qi(iommu)) {
2987 		/*
2988 		 * Queued Invalidate not enabled, use Register Based Invalidate
2989 		 */
2990 		iommu->flush.flush_context = __iommu_flush_context;
2991 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2992 		pr_info("%s: Using Register based invalidation\n",
2993 			iommu->name);
2994 	} else {
2995 		iommu->flush.flush_context = qi_flush_context;
2996 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2997 		pr_info("%s: Using Queued invalidation\n", iommu->name);
2998 	}
2999 }
3000 
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)3001 static int copy_context_table(struct intel_iommu *iommu,
3002 			      struct root_entry *old_re,
3003 			      struct context_entry **tbl,
3004 			      int bus, bool ext)
3005 {
3006 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3007 	struct context_entry *new_ce = NULL, ce;
3008 	struct context_entry *old_ce = NULL;
3009 	struct root_entry re;
3010 	phys_addr_t old_ce_phys;
3011 
3012 	tbl_idx = ext ? bus * 2 : bus;
3013 	memcpy(&re, old_re, sizeof(re));
3014 
3015 	for (devfn = 0; devfn < 256; devfn++) {
3016 		/* First calculate the correct index */
3017 		idx = (ext ? devfn * 2 : devfn) % 256;
3018 
3019 		if (idx == 0) {
3020 			/* First save what we may have and clean up */
3021 			if (new_ce) {
3022 				tbl[tbl_idx] = new_ce;
3023 				__iommu_flush_cache(iommu, new_ce,
3024 						    VTD_PAGE_SIZE);
3025 				pos = 1;
3026 			}
3027 
3028 			if (old_ce)
3029 				memunmap(old_ce);
3030 
3031 			ret = 0;
3032 			if (devfn < 0x80)
3033 				old_ce_phys = root_entry_lctp(&re);
3034 			else
3035 				old_ce_phys = root_entry_uctp(&re);
3036 
3037 			if (!old_ce_phys) {
3038 				if (ext && devfn == 0) {
3039 					/* No LCTP, try UCTP */
3040 					devfn = 0x7f;
3041 					continue;
3042 				} else {
3043 					goto out;
3044 				}
3045 			}
3046 
3047 			ret = -ENOMEM;
3048 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
3049 					MEMREMAP_WB);
3050 			if (!old_ce)
3051 				goto out;
3052 
3053 			new_ce = alloc_pgtable_page(iommu->node);
3054 			if (!new_ce)
3055 				goto out_unmap;
3056 
3057 			ret = 0;
3058 		}
3059 
3060 		/* Now copy the context entry */
3061 		memcpy(&ce, old_ce + idx, sizeof(ce));
3062 
3063 		if (!__context_present(&ce))
3064 			continue;
3065 
3066 		did = context_domain_id(&ce);
3067 		if (did >= 0 && did < cap_ndoms(iommu->cap))
3068 			set_bit(did, iommu->domain_ids);
3069 
3070 		/*
3071 		 * We need a marker for copied context entries. This
3072 		 * marker needs to work for the old format as well as
3073 		 * for extended context entries.
3074 		 *
3075 		 * Bit 67 of the context entry is used. In the old
3076 		 * format this bit is available to software, in the
3077 		 * extended format it is the PGE bit, but PGE is ignored
3078 		 * by HW if PASIDs are disabled (and thus still
3079 		 * available).
3080 		 *
3081 		 * So disable PASIDs first and then mark the entry
3082 		 * copied. This means that we don't copy PASID
3083 		 * translations from the old kernel, but this is fine as
3084 		 * faults there are not fatal.
3085 		 */
3086 		context_clear_pasid_enable(&ce);
3087 		context_set_copied(&ce);
3088 
3089 		new_ce[idx] = ce;
3090 	}
3091 
3092 	tbl[tbl_idx + pos] = new_ce;
3093 
3094 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3095 
3096 out_unmap:
3097 	memunmap(old_ce);
3098 
3099 out:
3100 	return ret;
3101 }
3102 
copy_translation_tables(struct intel_iommu * iommu)3103 static int copy_translation_tables(struct intel_iommu *iommu)
3104 {
3105 	struct context_entry **ctxt_tbls;
3106 	struct root_entry *old_rt;
3107 	phys_addr_t old_rt_phys;
3108 	int ctxt_table_entries;
3109 	unsigned long flags;
3110 	u64 rtaddr_reg;
3111 	int bus, ret;
3112 	bool new_ext, ext;
3113 
3114 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3115 	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3116 	new_ext    = !!ecap_ecs(iommu->ecap);
3117 
3118 	/*
3119 	 * The RTT bit can only be changed when translation is disabled,
3120 	 * but disabling translation means to open a window for data
3121 	 * corruption. So bail out and don't copy anything if we would
3122 	 * have to change the bit.
3123 	 */
3124 	if (new_ext != ext)
3125 		return -EINVAL;
3126 
3127 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3128 	if (!old_rt_phys)
3129 		return -EINVAL;
3130 
3131 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3132 	if (!old_rt)
3133 		return -ENOMEM;
3134 
3135 	/* This is too big for the stack - allocate it from slab */
3136 	ctxt_table_entries = ext ? 512 : 256;
3137 	ret = -ENOMEM;
3138 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3139 	if (!ctxt_tbls)
3140 		goto out_unmap;
3141 
3142 	for (bus = 0; bus < 256; bus++) {
3143 		ret = copy_context_table(iommu, &old_rt[bus],
3144 					 ctxt_tbls, bus, ext);
3145 		if (ret) {
3146 			pr_err("%s: Failed to copy context table for bus %d\n",
3147 				iommu->name, bus);
3148 			continue;
3149 		}
3150 	}
3151 
3152 	spin_lock_irqsave(&iommu->lock, flags);
3153 
3154 	/* Context tables are copied, now write them to the root_entry table */
3155 	for (bus = 0; bus < 256; bus++) {
3156 		int idx = ext ? bus * 2 : bus;
3157 		u64 val;
3158 
3159 		if (ctxt_tbls[idx]) {
3160 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
3161 			iommu->root_entry[bus].lo = val;
3162 		}
3163 
3164 		if (!ext || !ctxt_tbls[idx + 1])
3165 			continue;
3166 
3167 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3168 		iommu->root_entry[bus].hi = val;
3169 	}
3170 
3171 	spin_unlock_irqrestore(&iommu->lock, flags);
3172 
3173 	kfree(ctxt_tbls);
3174 
3175 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3176 
3177 	ret = 0;
3178 
3179 out_unmap:
3180 	memunmap(old_rt);
3181 
3182 	return ret;
3183 }
3184 
init_dmars(void)3185 static int __init init_dmars(void)
3186 {
3187 	struct dmar_drhd_unit *drhd;
3188 	struct intel_iommu *iommu;
3189 	int ret;
3190 
3191 	/*
3192 	 * for each drhd
3193 	 *    allocate root
3194 	 *    initialize and program root entry to not present
3195 	 * endfor
3196 	 */
3197 	for_each_drhd_unit(drhd) {
3198 		/*
3199 		 * lock not needed as this is only incremented in the single
3200 		 * threaded kernel __init code path all other access are read
3201 		 * only
3202 		 */
3203 		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3204 			g_num_of_iommus++;
3205 			continue;
3206 		}
3207 		pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3208 	}
3209 
3210 	/* Preallocate enough resources for IOMMU hot-addition */
3211 	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3212 		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3213 
3214 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3215 			GFP_KERNEL);
3216 	if (!g_iommus) {
3217 		pr_err("Allocating global iommu array failed\n");
3218 		ret = -ENOMEM;
3219 		goto error;
3220 	}
3221 
3222 	for_each_iommu(iommu, drhd) {
3223 		if (drhd->ignored) {
3224 			iommu_disable_translation(iommu);
3225 			continue;
3226 		}
3227 
3228 		/*
3229 		 * Find the max pasid size of all IOMMU's in the system.
3230 		 * We need to ensure the system pasid table is no bigger
3231 		 * than the smallest supported.
3232 		 */
3233 		if (pasid_supported(iommu)) {
3234 			u32 temp = 2 << ecap_pss(iommu->ecap);
3235 
3236 			intel_pasid_max_id = min_t(u32, temp,
3237 						   intel_pasid_max_id);
3238 		}
3239 
3240 		g_iommus[iommu->seq_id] = iommu;
3241 
3242 		intel_iommu_init_qi(iommu);
3243 
3244 		ret = iommu_init_domains(iommu);
3245 		if (ret)
3246 			goto free_iommu;
3247 
3248 		init_translation_status(iommu);
3249 
3250 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3251 			iommu_disable_translation(iommu);
3252 			clear_translation_pre_enabled(iommu);
3253 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3254 				iommu->name);
3255 		}
3256 
3257 		/*
3258 		 * TBD:
3259 		 * we could share the same root & context tables
3260 		 * among all IOMMU's. Need to Split it later.
3261 		 */
3262 		ret = iommu_alloc_root_entry(iommu);
3263 		if (ret)
3264 			goto free_iommu;
3265 
3266 		if (translation_pre_enabled(iommu)) {
3267 			pr_info("Translation already enabled - trying to copy translation structures\n");
3268 
3269 			ret = copy_translation_tables(iommu);
3270 			if (ret) {
3271 				/*
3272 				 * We found the IOMMU with translation
3273 				 * enabled - but failed to copy over the
3274 				 * old root-entry table. Try to proceed
3275 				 * by disabling translation now and
3276 				 * allocating a clean root-entry table.
3277 				 * This might cause DMAR faults, but
3278 				 * probably the dump will still succeed.
3279 				 */
3280 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
3281 				       iommu->name);
3282 				iommu_disable_translation(iommu);
3283 				clear_translation_pre_enabled(iommu);
3284 			} else {
3285 				pr_info("Copied translation tables from previous kernel for %s\n",
3286 					iommu->name);
3287 			}
3288 		}
3289 
3290 		if (!ecap_pass_through(iommu->ecap))
3291 			hw_pass_through = 0;
3292 #ifdef CONFIG_INTEL_IOMMU_SVM
3293 		if (pasid_supported(iommu))
3294 			intel_svm_init(iommu);
3295 #endif
3296 	}
3297 
3298 	/*
3299 	 * Now that qi is enabled on all iommus, set the root entry and flush
3300 	 * caches. This is required on some Intel X58 chipsets, otherwise the
3301 	 * flush_context function will loop forever and the boot hangs.
3302 	 */
3303 	for_each_active_iommu(iommu, drhd) {
3304 		iommu_flush_write_buffer(iommu);
3305 		iommu_set_root_entry(iommu);
3306 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3307 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3308 	}
3309 
3310 	if (iommu_default_passthrough())
3311 		iommu_identity_mapping |= IDENTMAP_ALL;
3312 
3313 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3314 	dmar_map_gfx = 0;
3315 #endif
3316 
3317 	if (!dmar_map_gfx)
3318 		iommu_identity_mapping |= IDENTMAP_GFX;
3319 
3320 	check_tylersburg_isoch();
3321 
3322 	ret = si_domain_init(hw_pass_through);
3323 	if (ret)
3324 		goto free_iommu;
3325 
3326 	/*
3327 	 * for each drhd
3328 	 *   enable fault log
3329 	 *   global invalidate context cache
3330 	 *   global invalidate iotlb
3331 	 *   enable translation
3332 	 */
3333 	for_each_iommu(iommu, drhd) {
3334 		if (drhd->ignored) {
3335 			/*
3336 			 * we always have to disable PMRs or DMA may fail on
3337 			 * this device
3338 			 */
3339 			if (force_on)
3340 				iommu_disable_protect_mem_regions(iommu);
3341 			continue;
3342 		}
3343 
3344 		iommu_flush_write_buffer(iommu);
3345 
3346 #ifdef CONFIG_INTEL_IOMMU_SVM
3347 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3348 			/*
3349 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3350 			 * could cause possible lock race condition.
3351 			 */
3352 			up_write(&dmar_global_lock);
3353 			ret = intel_svm_enable_prq(iommu);
3354 			down_write(&dmar_global_lock);
3355 			if (ret)
3356 				goto free_iommu;
3357 		}
3358 #endif
3359 		ret = dmar_set_interrupt(iommu);
3360 		if (ret)
3361 			goto free_iommu;
3362 	}
3363 
3364 	return 0;
3365 
3366 free_iommu:
3367 	for_each_active_iommu(iommu, drhd) {
3368 		disable_dmar_iommu(iommu);
3369 		free_dmar_iommu(iommu);
3370 	}
3371 
3372 	kfree(g_iommus);
3373 
3374 error:
3375 	return ret;
3376 }
3377 
3378 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)3379 static unsigned long intel_alloc_iova(struct device *dev,
3380 				     struct dmar_domain *domain,
3381 				     unsigned long nrpages, uint64_t dma_mask)
3382 {
3383 	unsigned long iova_pfn;
3384 
3385 	/* Restrict dma_mask to the width that the iommu can handle */
3386 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3387 	/* Ensure we reserve the whole size-aligned region */
3388 	nrpages = __roundup_pow_of_two(nrpages);
3389 
3390 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3391 		/*
3392 		 * First try to allocate an io virtual address in
3393 		 * DMA_BIT_MASK(32) and if that fails then try allocating
3394 		 * from higher range
3395 		 */
3396 		iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3397 					   IOVA_PFN(DMA_BIT_MASK(32)), false);
3398 		if (iova_pfn)
3399 			return iova_pfn;
3400 	}
3401 	iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3402 				   IOVA_PFN(dma_mask), true);
3403 	if (unlikely(!iova_pfn)) {
3404 		dev_err(dev, "Allocating %ld-page iova failed", nrpages);
3405 		return 0;
3406 	}
3407 
3408 	return iova_pfn;
3409 }
3410 
get_private_domain_for_dev(struct device * dev)3411 static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
3412 {
3413 	struct dmar_domain *domain, *tmp;
3414 	struct dmar_rmrr_unit *rmrr;
3415 	struct device *i_dev;
3416 	int i, ret;
3417 
3418 	/* Device shouldn't be attached by any domains. */
3419 	domain = find_domain(dev);
3420 	if (domain)
3421 		return NULL;
3422 
3423 	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3424 	if (!domain)
3425 		goto out;
3426 
3427 	/* We have a new domain - setup possible RMRRs for the device */
3428 	rcu_read_lock();
3429 	for_each_rmrr_units(rmrr) {
3430 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3431 					  i, i_dev) {
3432 			if (i_dev != dev)
3433 				continue;
3434 
3435 			ret = domain_prepare_identity_map(dev, domain,
3436 							  rmrr->base_address,
3437 							  rmrr->end_address);
3438 			if (ret)
3439 				dev_err(dev, "Mapping reserved region failed\n");
3440 		}
3441 	}
3442 	rcu_read_unlock();
3443 
3444 	tmp = set_domain_for_dev(dev, domain);
3445 	if (!tmp || domain != tmp) {
3446 		domain_exit(domain);
3447 		domain = tmp;
3448 	}
3449 
3450 out:
3451 	if (!domain)
3452 		dev_err(dev, "Allocating domain failed\n");
3453 	else
3454 		domain->domain.type = IOMMU_DOMAIN_DMA;
3455 
3456 	return domain;
3457 }
3458 
3459 /* Check if the dev needs to go through non-identity map and unmap process.*/
iommu_need_mapping(struct device * dev)3460 static bool iommu_need_mapping(struct device *dev)
3461 {
3462 	int ret;
3463 
3464 	if (iommu_dummy(dev))
3465 		return false;
3466 
3467 	ret = identity_mapping(dev);
3468 	if (ret) {
3469 		u64 dma_mask = *dev->dma_mask;
3470 
3471 		if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3472 			dma_mask = dev->coherent_dma_mask;
3473 
3474 		if (dma_mask >= dma_direct_get_required_mask(dev))
3475 			return false;
3476 
3477 		/*
3478 		 * 32 bit DMA is removed from si_domain and fall back to
3479 		 * non-identity mapping.
3480 		 */
3481 		dmar_remove_one_dev_info(dev);
3482 		ret = iommu_request_dma_domain_for_dev(dev);
3483 		if (ret) {
3484 			struct iommu_domain *domain;
3485 			struct dmar_domain *dmar_domain;
3486 
3487 			domain = iommu_get_domain_for_dev(dev);
3488 			if (domain) {
3489 				dmar_domain = to_dmar_domain(domain);
3490 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3491 			}
3492 			dmar_remove_one_dev_info(dev);
3493 			get_private_domain_for_dev(dev);
3494 		}
3495 
3496 		dev_info(dev, "32bit DMA uses non-identity mapping\n");
3497 	}
3498 
3499 	return true;
3500 }
3501 
__intel_map_single(struct device * dev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)3502 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3503 				     size_t size, int dir, u64 dma_mask)
3504 {
3505 	struct dmar_domain *domain;
3506 	phys_addr_t start_paddr;
3507 	unsigned long iova_pfn;
3508 	int prot = 0;
3509 	int ret;
3510 	struct intel_iommu *iommu;
3511 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3512 
3513 	BUG_ON(dir == DMA_NONE);
3514 
3515 	domain = find_domain(dev);
3516 	if (!domain)
3517 		return DMA_MAPPING_ERROR;
3518 
3519 	iommu = domain_get_iommu(domain);
3520 	size = aligned_nrpages(paddr, size);
3521 
3522 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3523 	if (!iova_pfn)
3524 		goto error;
3525 
3526 	/*
3527 	 * Check if DMAR supports zero-length reads on write only
3528 	 * mappings..
3529 	 */
3530 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3531 			!cap_zlr(iommu->cap))
3532 		prot |= DMA_PTE_READ;
3533 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3534 		prot |= DMA_PTE_WRITE;
3535 	/*
3536 	 * paddr - (paddr + size) might be partial page, we should map the whole
3537 	 * page.  Note: if two part of one page are separately mapped, we
3538 	 * might have two guest_addr mapping to the same host paddr, but this
3539 	 * is not a big problem
3540 	 */
3541 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3542 				 mm_to_dma_pfn(paddr_pfn), size, prot);
3543 	if (ret)
3544 		goto error;
3545 
3546 	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3547 	start_paddr += paddr & ~PAGE_MASK;
3548 
3549 	trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3550 
3551 	return start_paddr;
3552 
3553 error:
3554 	if (iova_pfn)
3555 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3556 	dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3557 		size, (unsigned long long)paddr, dir);
3558 	return DMA_MAPPING_ERROR;
3559 }
3560 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3561 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3562 				 unsigned long offset, size_t size,
3563 				 enum dma_data_direction dir,
3564 				 unsigned long attrs)
3565 {
3566 	if (iommu_need_mapping(dev))
3567 		return __intel_map_single(dev, page_to_phys(page) + offset,
3568 				size, dir, *dev->dma_mask);
3569 	return dma_direct_map_page(dev, page, offset, size, dir, attrs);
3570 }
3571 
intel_map_resource(struct device * dev,phys_addr_t phys_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3572 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3573 				     size_t size, enum dma_data_direction dir,
3574 				     unsigned long attrs)
3575 {
3576 	if (iommu_need_mapping(dev))
3577 		return __intel_map_single(dev, phys_addr, size, dir,
3578 				*dev->dma_mask);
3579 	return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
3580 }
3581 
intel_unmap(struct device * dev,dma_addr_t dev_addr,size_t size)3582 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3583 {
3584 	struct dmar_domain *domain;
3585 	unsigned long start_pfn, last_pfn;
3586 	unsigned long nrpages;
3587 	unsigned long iova_pfn;
3588 	struct intel_iommu *iommu;
3589 	struct page *freelist;
3590 	struct pci_dev *pdev = NULL;
3591 
3592 	domain = find_domain(dev);
3593 	BUG_ON(!domain);
3594 
3595 	iommu = domain_get_iommu(domain);
3596 
3597 	iova_pfn = IOVA_PFN(dev_addr);
3598 
3599 	nrpages = aligned_nrpages(dev_addr, size);
3600 	start_pfn = mm_to_dma_pfn(iova_pfn);
3601 	last_pfn = start_pfn + nrpages - 1;
3602 
3603 	if (dev_is_pci(dev))
3604 		pdev = to_pci_dev(dev);
3605 
3606 	freelist = domain_unmap(domain, start_pfn, last_pfn);
3607 	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3608 			!has_iova_flush_queue(&domain->iovad)) {
3609 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3610 				      nrpages, !freelist, 0);
3611 		/* free iova */
3612 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3613 		dma_free_pagelist(freelist);
3614 	} else {
3615 		queue_iova(&domain->iovad, iova_pfn, nrpages,
3616 			   (unsigned long)freelist);
3617 		/*
3618 		 * queue up the release of the unmap to save the 1/6th of the
3619 		 * cpu used up by the iotlb flush operation...
3620 		 */
3621 	}
3622 
3623 	trace_unmap_single(dev, dev_addr, size);
3624 }
3625 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3626 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3627 			     size_t size, enum dma_data_direction dir,
3628 			     unsigned long attrs)
3629 {
3630 	if (iommu_need_mapping(dev))
3631 		intel_unmap(dev, dev_addr, size);
3632 	else
3633 		dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3634 }
3635 
intel_unmap_resource(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3636 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3637 		size_t size, enum dma_data_direction dir, unsigned long attrs)
3638 {
3639 	if (iommu_need_mapping(dev))
3640 		intel_unmap(dev, dev_addr, size);
3641 }
3642 
intel_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flags,unsigned long attrs)3643 static void *intel_alloc_coherent(struct device *dev, size_t size,
3644 				  dma_addr_t *dma_handle, gfp_t flags,
3645 				  unsigned long attrs)
3646 {
3647 	struct page *page = NULL;
3648 	int order;
3649 
3650 	if (!iommu_need_mapping(dev))
3651 		return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3652 
3653 	size = PAGE_ALIGN(size);
3654 	order = get_order(size);
3655 
3656 	if (gfpflags_allow_blocking(flags)) {
3657 		unsigned int count = size >> PAGE_SHIFT;
3658 
3659 		page = dma_alloc_from_contiguous(dev, count, order,
3660 						 flags & __GFP_NOWARN);
3661 	}
3662 
3663 	if (!page)
3664 		page = alloc_pages(flags, order);
3665 	if (!page)
3666 		return NULL;
3667 	memset(page_address(page), 0, size);
3668 
3669 	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3670 					 DMA_BIDIRECTIONAL,
3671 					 dev->coherent_dma_mask);
3672 	if (*dma_handle != DMA_MAPPING_ERROR)
3673 		return page_address(page);
3674 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3675 		__free_pages(page, order);
3676 
3677 	return NULL;
3678 }
3679 
intel_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,unsigned long attrs)3680 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3681 				dma_addr_t dma_handle, unsigned long attrs)
3682 {
3683 	int order;
3684 	struct page *page = virt_to_page(vaddr);
3685 
3686 	if (!iommu_need_mapping(dev))
3687 		return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3688 
3689 	size = PAGE_ALIGN(size);
3690 	order = get_order(size);
3691 
3692 	intel_unmap(dev, dma_handle, size);
3693 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3694 		__free_pages(page, order);
3695 }
3696 
intel_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3697 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3698 			   int nelems, enum dma_data_direction dir,
3699 			   unsigned long attrs)
3700 {
3701 	dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3702 	unsigned long nrpages = 0;
3703 	struct scatterlist *sg;
3704 	int i;
3705 
3706 	if (!iommu_need_mapping(dev))
3707 		return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3708 
3709 	for_each_sg(sglist, sg, nelems, i) {
3710 		nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3711 	}
3712 
3713 	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3714 
3715 	trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3716 }
3717 
intel_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3718 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3719 			enum dma_data_direction dir, unsigned long attrs)
3720 {
3721 	int i;
3722 	struct dmar_domain *domain;
3723 	size_t size = 0;
3724 	int prot = 0;
3725 	unsigned long iova_pfn;
3726 	int ret;
3727 	struct scatterlist *sg;
3728 	unsigned long start_vpfn;
3729 	struct intel_iommu *iommu;
3730 
3731 	BUG_ON(dir == DMA_NONE);
3732 	if (!iommu_need_mapping(dev))
3733 		return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
3734 
3735 	domain = find_domain(dev);
3736 	if (!domain)
3737 		return 0;
3738 
3739 	iommu = domain_get_iommu(domain);
3740 
3741 	for_each_sg(sglist, sg, nelems, i)
3742 		size += aligned_nrpages(sg->offset, sg->length);
3743 
3744 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3745 				*dev->dma_mask);
3746 	if (!iova_pfn) {
3747 		sglist->dma_length = 0;
3748 		return 0;
3749 	}
3750 
3751 	/*
3752 	 * Check if DMAR supports zero-length reads on write only
3753 	 * mappings..
3754 	 */
3755 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3756 			!cap_zlr(iommu->cap))
3757 		prot |= DMA_PTE_READ;
3758 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3759 		prot |= DMA_PTE_WRITE;
3760 
3761 	start_vpfn = mm_to_dma_pfn(iova_pfn);
3762 
3763 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3764 	if (unlikely(ret)) {
3765 		dma_pte_free_pagetable(domain, start_vpfn,
3766 				       start_vpfn + size - 1,
3767 				       agaw_to_level(domain->agaw) + 1);
3768 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3769 		return 0;
3770 	}
3771 
3772 	trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
3773 		     sg_phys(sglist), size << VTD_PAGE_SHIFT);
3774 
3775 	return nelems;
3776 }
3777 
intel_get_required_mask(struct device * dev)3778 static u64 intel_get_required_mask(struct device *dev)
3779 {
3780 	if (!iommu_need_mapping(dev))
3781 		return dma_direct_get_required_mask(dev);
3782 	return DMA_BIT_MASK(32);
3783 }
3784 
3785 static const struct dma_map_ops intel_dma_ops = {
3786 	.alloc = intel_alloc_coherent,
3787 	.free = intel_free_coherent,
3788 	.map_sg = intel_map_sg,
3789 	.unmap_sg = intel_unmap_sg,
3790 	.map_page = intel_map_page,
3791 	.unmap_page = intel_unmap_page,
3792 	.map_resource = intel_map_resource,
3793 	.unmap_resource = intel_unmap_resource,
3794 	.dma_supported = dma_direct_supported,
3795 	.mmap = dma_common_mmap,
3796 	.get_sgtable = dma_common_get_sgtable,
3797 	.get_required_mask = intel_get_required_mask,
3798 };
3799 
3800 static void
bounce_sync_single(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir,enum dma_sync_target target)3801 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3802 		   enum dma_data_direction dir, enum dma_sync_target target)
3803 {
3804 	struct dmar_domain *domain;
3805 	phys_addr_t tlb_addr;
3806 
3807 	domain = find_domain(dev);
3808 	if (WARN_ON(!domain))
3809 		return;
3810 
3811 	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3812 	if (is_swiotlb_buffer(tlb_addr))
3813 		swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3814 }
3815 
3816 static dma_addr_t
bounce_map_single(struct device * dev,phys_addr_t paddr,size_t size,enum dma_data_direction dir,unsigned long attrs,u64 dma_mask)3817 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3818 		  enum dma_data_direction dir, unsigned long attrs,
3819 		  u64 dma_mask)
3820 {
3821 	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3822 	struct dmar_domain *domain;
3823 	struct intel_iommu *iommu;
3824 	unsigned long iova_pfn;
3825 	unsigned long nrpages;
3826 	phys_addr_t tlb_addr;
3827 	int prot = 0;
3828 	int ret;
3829 
3830 	domain = find_domain(dev);
3831 	if (WARN_ON(dir == DMA_NONE || !domain))
3832 		return DMA_MAPPING_ERROR;
3833 
3834 	iommu = domain_get_iommu(domain);
3835 	if (WARN_ON(!iommu))
3836 		return DMA_MAPPING_ERROR;
3837 
3838 	nrpages = aligned_nrpages(0, size);
3839 	iova_pfn = intel_alloc_iova(dev, domain,
3840 				    dma_to_mm_pfn(nrpages), dma_mask);
3841 	if (!iova_pfn)
3842 		return DMA_MAPPING_ERROR;
3843 
3844 	/*
3845 	 * Check if DMAR supports zero-length reads on write only
3846 	 * mappings..
3847 	 */
3848 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3849 			!cap_zlr(iommu->cap))
3850 		prot |= DMA_PTE_READ;
3851 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3852 		prot |= DMA_PTE_WRITE;
3853 
3854 	/*
3855 	 * If both the physical buffer start address and size are
3856 	 * page aligned, we don't need to use a bounce page.
3857 	 */
3858 	if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3859 		tlb_addr = swiotlb_tbl_map_single(dev,
3860 				__phys_to_dma(dev, io_tlb_start),
3861 				paddr, size, aligned_size, dir, attrs);
3862 		if (tlb_addr == DMA_MAPPING_ERROR) {
3863 			goto swiotlb_error;
3864 		} else {
3865 			/* Cleanup the padding area. */
3866 			void *padding_start = phys_to_virt(tlb_addr);
3867 			size_t padding_size = aligned_size;
3868 
3869 			if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3870 			    (dir == DMA_TO_DEVICE ||
3871 			     dir == DMA_BIDIRECTIONAL)) {
3872 				padding_start += size;
3873 				padding_size -= size;
3874 			}
3875 
3876 			memset(padding_start, 0, padding_size);
3877 		}
3878 	} else {
3879 		tlb_addr = paddr;
3880 	}
3881 
3882 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3883 				 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3884 	if (ret)
3885 		goto mapping_error;
3886 
3887 	trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3888 
3889 	return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3890 
3891 mapping_error:
3892 	if (is_swiotlb_buffer(tlb_addr))
3893 		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3894 					 aligned_size, dir, attrs);
3895 swiotlb_error:
3896 	free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3897 	dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3898 		size, (unsigned long long)paddr, dir);
3899 
3900 	return DMA_MAPPING_ERROR;
3901 }
3902 
3903 static void
bounce_unmap_single(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3904 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3905 		    enum dma_data_direction dir, unsigned long attrs)
3906 {
3907 	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3908 	struct dmar_domain *domain;
3909 	phys_addr_t tlb_addr;
3910 
3911 	domain = find_domain(dev);
3912 	if (WARN_ON(!domain))
3913 		return;
3914 
3915 	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3916 	if (WARN_ON(!tlb_addr))
3917 		return;
3918 
3919 	intel_unmap(dev, dev_addr, size);
3920 	if (is_swiotlb_buffer(tlb_addr))
3921 		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3922 					 aligned_size, dir, attrs);
3923 
3924 	trace_bounce_unmap_single(dev, dev_addr, size);
3925 }
3926 
3927 static dma_addr_t
bounce_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3928 bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3929 		size_t size, enum dma_data_direction dir, unsigned long attrs)
3930 {
3931 	return bounce_map_single(dev, page_to_phys(page) + offset,
3932 				 size, dir, attrs, *dev->dma_mask);
3933 }
3934 
3935 static dma_addr_t
bounce_map_resource(struct device * dev,phys_addr_t phys_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3936 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3937 		    enum dma_data_direction dir, unsigned long attrs)
3938 {
3939 	return bounce_map_single(dev, phys_addr, size,
3940 				 dir, attrs, *dev->dma_mask);
3941 }
3942 
3943 static void
bounce_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3944 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3945 		  enum dma_data_direction dir, unsigned long attrs)
3946 {
3947 	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3948 }
3949 
3950 static void
bounce_unmap_resource(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3951 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3952 		      enum dma_data_direction dir, unsigned long attrs)
3953 {
3954 	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3955 }
3956 
3957 static void
bounce_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3958 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3959 		enum dma_data_direction dir, unsigned long attrs)
3960 {
3961 	struct scatterlist *sg;
3962 	int i;
3963 
3964 	for_each_sg(sglist, sg, nelems, i)
3965 		bounce_unmap_page(dev, sg->dma_address,
3966 				  sg_dma_len(sg), dir, attrs);
3967 }
3968 
3969 static int
bounce_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3970 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3971 	      enum dma_data_direction dir, unsigned long attrs)
3972 {
3973 	int i;
3974 	struct scatterlist *sg;
3975 
3976 	for_each_sg(sglist, sg, nelems, i) {
3977 		sg->dma_address = bounce_map_page(dev, sg_page(sg),
3978 						  sg->offset, sg->length,
3979 						  dir, attrs);
3980 		if (sg->dma_address == DMA_MAPPING_ERROR)
3981 			goto out_unmap;
3982 		sg_dma_len(sg) = sg->length;
3983 	}
3984 
3985 	return nelems;
3986 
3987 out_unmap:
3988 	bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3989 	return 0;
3990 }
3991 
3992 static void
bounce_sync_single_for_cpu(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)3993 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3994 			   size_t size, enum dma_data_direction dir)
3995 {
3996 	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3997 }
3998 
3999 static void
bounce_sync_single_for_device(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)4000 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4001 			      size_t size, enum dma_data_direction dir)
4002 {
4003 	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4004 }
4005 
4006 static void
bounce_sync_sg_for_cpu(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir)4007 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4008 		       int nelems, enum dma_data_direction dir)
4009 {
4010 	struct scatterlist *sg;
4011 	int i;
4012 
4013 	for_each_sg(sglist, sg, nelems, i)
4014 		bounce_sync_single(dev, sg_dma_address(sg),
4015 				   sg_dma_len(sg), dir, SYNC_FOR_CPU);
4016 }
4017 
4018 static void
bounce_sync_sg_for_device(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir)4019 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4020 			  int nelems, enum dma_data_direction dir)
4021 {
4022 	struct scatterlist *sg;
4023 	int i;
4024 
4025 	for_each_sg(sglist, sg, nelems, i)
4026 		bounce_sync_single(dev, sg_dma_address(sg),
4027 				   sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4028 }
4029 
4030 static const struct dma_map_ops bounce_dma_ops = {
4031 	.alloc			= intel_alloc_coherent,
4032 	.free			= intel_free_coherent,
4033 	.map_sg			= bounce_map_sg,
4034 	.unmap_sg		= bounce_unmap_sg,
4035 	.map_page		= bounce_map_page,
4036 	.unmap_page		= bounce_unmap_page,
4037 	.sync_single_for_cpu	= bounce_sync_single_for_cpu,
4038 	.sync_single_for_device	= bounce_sync_single_for_device,
4039 	.sync_sg_for_cpu	= bounce_sync_sg_for_cpu,
4040 	.sync_sg_for_device	= bounce_sync_sg_for_device,
4041 	.map_resource		= bounce_map_resource,
4042 	.unmap_resource		= bounce_unmap_resource,
4043 	.dma_supported		= dma_direct_supported,
4044 };
4045 
iommu_domain_cache_init(void)4046 static inline int iommu_domain_cache_init(void)
4047 {
4048 	int ret = 0;
4049 
4050 	iommu_domain_cache = kmem_cache_create("iommu_domain",
4051 					 sizeof(struct dmar_domain),
4052 					 0,
4053 					 SLAB_HWCACHE_ALIGN,
4054 
4055 					 NULL);
4056 	if (!iommu_domain_cache) {
4057 		pr_err("Couldn't create iommu_domain cache\n");
4058 		ret = -ENOMEM;
4059 	}
4060 
4061 	return ret;
4062 }
4063 
iommu_devinfo_cache_init(void)4064 static inline int iommu_devinfo_cache_init(void)
4065 {
4066 	int ret = 0;
4067 
4068 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4069 					 sizeof(struct device_domain_info),
4070 					 0,
4071 					 SLAB_HWCACHE_ALIGN,
4072 					 NULL);
4073 	if (!iommu_devinfo_cache) {
4074 		pr_err("Couldn't create devinfo cache\n");
4075 		ret = -ENOMEM;
4076 	}
4077 
4078 	return ret;
4079 }
4080 
iommu_init_mempool(void)4081 static int __init iommu_init_mempool(void)
4082 {
4083 	int ret;
4084 	ret = iova_cache_get();
4085 	if (ret)
4086 		return ret;
4087 
4088 	ret = iommu_domain_cache_init();
4089 	if (ret)
4090 		goto domain_error;
4091 
4092 	ret = iommu_devinfo_cache_init();
4093 	if (!ret)
4094 		return ret;
4095 
4096 	kmem_cache_destroy(iommu_domain_cache);
4097 domain_error:
4098 	iova_cache_put();
4099 
4100 	return -ENOMEM;
4101 }
4102 
iommu_exit_mempool(void)4103 static void __init iommu_exit_mempool(void)
4104 {
4105 	kmem_cache_destroy(iommu_devinfo_cache);
4106 	kmem_cache_destroy(iommu_domain_cache);
4107 	iova_cache_put();
4108 }
4109 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)4110 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4111 {
4112 	struct dmar_drhd_unit *drhd;
4113 	u32 vtbar;
4114 	int rc;
4115 
4116 	/* We know that this device on this chipset has its own IOMMU.
4117 	 * If we find it under a different IOMMU, then the BIOS is lying
4118 	 * to us. Hope that the IOMMU for this device is actually
4119 	 * disabled, and it needs no translation...
4120 	 */
4121 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4122 	if (rc) {
4123 		/* "can't" happen */
4124 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4125 		return;
4126 	}
4127 	vtbar &= 0xffff0000;
4128 
4129 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
4130 	drhd = dmar_find_matched_drhd_unit(pdev);
4131 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4132 			    TAINT_FIRMWARE_WORKAROUND,
4133 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4134 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4135 }
4136 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4137 
init_no_remapping_devices(void)4138 static void __init init_no_remapping_devices(void)
4139 {
4140 	struct dmar_drhd_unit *drhd;
4141 	struct device *dev;
4142 	int i;
4143 
4144 	for_each_drhd_unit(drhd) {
4145 		if (!drhd->include_all) {
4146 			for_each_active_dev_scope(drhd->devices,
4147 						  drhd->devices_cnt, i, dev)
4148 				break;
4149 			/* ignore DMAR unit if no devices exist */
4150 			if (i == drhd->devices_cnt)
4151 				drhd->ignored = 1;
4152 		}
4153 	}
4154 
4155 	for_each_active_drhd_unit(drhd) {
4156 		if (drhd->include_all)
4157 			continue;
4158 
4159 		for_each_active_dev_scope(drhd->devices,
4160 					  drhd->devices_cnt, i, dev)
4161 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4162 				break;
4163 		if (i < drhd->devices_cnt)
4164 			continue;
4165 
4166 		/* This IOMMU has *only* gfx devices. Either bypass it or
4167 		   set the gfx_mapped flag, as appropriate */
4168 		if (!dmar_map_gfx) {
4169 			drhd->ignored = 1;
4170 			for_each_active_dev_scope(drhd->devices,
4171 						  drhd->devices_cnt, i, dev)
4172 				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4173 		}
4174 	}
4175 }
4176 
4177 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)4178 static int init_iommu_hw(void)
4179 {
4180 	struct dmar_drhd_unit *drhd;
4181 	struct intel_iommu *iommu = NULL;
4182 
4183 	for_each_active_iommu(iommu, drhd)
4184 		if (iommu->qi)
4185 			dmar_reenable_qi(iommu);
4186 
4187 	for_each_iommu(iommu, drhd) {
4188 		if (drhd->ignored) {
4189 			/*
4190 			 * we always have to disable PMRs or DMA may fail on
4191 			 * this device
4192 			 */
4193 			if (force_on)
4194 				iommu_disable_protect_mem_regions(iommu);
4195 			continue;
4196 		}
4197 
4198 		iommu_flush_write_buffer(iommu);
4199 
4200 		iommu_set_root_entry(iommu);
4201 
4202 		iommu->flush.flush_context(iommu, 0, 0, 0,
4203 					   DMA_CCMD_GLOBAL_INVL);
4204 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4205 		iommu_enable_translation(iommu);
4206 		iommu_disable_protect_mem_regions(iommu);
4207 	}
4208 
4209 	return 0;
4210 }
4211 
iommu_flush_all(void)4212 static void iommu_flush_all(void)
4213 {
4214 	struct dmar_drhd_unit *drhd;
4215 	struct intel_iommu *iommu;
4216 
4217 	for_each_active_iommu(iommu, drhd) {
4218 		iommu->flush.flush_context(iommu, 0, 0, 0,
4219 					   DMA_CCMD_GLOBAL_INVL);
4220 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4221 					 DMA_TLB_GLOBAL_FLUSH);
4222 	}
4223 }
4224 
iommu_suspend(void)4225 static int iommu_suspend(void)
4226 {
4227 	struct dmar_drhd_unit *drhd;
4228 	struct intel_iommu *iommu = NULL;
4229 	unsigned long flag;
4230 
4231 	for_each_active_iommu(iommu, drhd) {
4232 		iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4233 						 GFP_ATOMIC);
4234 		if (!iommu->iommu_state)
4235 			goto nomem;
4236 	}
4237 
4238 	iommu_flush_all();
4239 
4240 	for_each_active_iommu(iommu, drhd) {
4241 		iommu_disable_translation(iommu);
4242 
4243 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4244 
4245 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
4246 			readl(iommu->reg + DMAR_FECTL_REG);
4247 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4248 			readl(iommu->reg + DMAR_FEDATA_REG);
4249 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4250 			readl(iommu->reg + DMAR_FEADDR_REG);
4251 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4252 			readl(iommu->reg + DMAR_FEUADDR_REG);
4253 
4254 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4255 	}
4256 	return 0;
4257 
4258 nomem:
4259 	for_each_active_iommu(iommu, drhd)
4260 		kfree(iommu->iommu_state);
4261 
4262 	return -ENOMEM;
4263 }
4264 
iommu_resume(void)4265 static void iommu_resume(void)
4266 {
4267 	struct dmar_drhd_unit *drhd;
4268 	struct intel_iommu *iommu = NULL;
4269 	unsigned long flag;
4270 
4271 	if (init_iommu_hw()) {
4272 		if (force_on)
4273 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4274 		else
4275 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4276 		return;
4277 	}
4278 
4279 	for_each_active_iommu(iommu, drhd) {
4280 
4281 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4282 
4283 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4284 			iommu->reg + DMAR_FECTL_REG);
4285 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4286 			iommu->reg + DMAR_FEDATA_REG);
4287 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4288 			iommu->reg + DMAR_FEADDR_REG);
4289 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4290 			iommu->reg + DMAR_FEUADDR_REG);
4291 
4292 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4293 	}
4294 
4295 	for_each_active_iommu(iommu, drhd)
4296 		kfree(iommu->iommu_state);
4297 }
4298 
4299 static struct syscore_ops iommu_syscore_ops = {
4300 	.resume		= iommu_resume,
4301 	.suspend	= iommu_suspend,
4302 };
4303 
init_iommu_pm_ops(void)4304 static void __init init_iommu_pm_ops(void)
4305 {
4306 	register_syscore_ops(&iommu_syscore_ops);
4307 }
4308 
4309 #else
init_iommu_pm_ops(void)4310 static inline void init_iommu_pm_ops(void) {}
4311 #endif	/* CONFIG_PM */
4312 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)4313 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4314 {
4315 	struct acpi_dmar_reserved_memory *rmrr;
4316 	struct dmar_rmrr_unit *rmrru;
4317 
4318 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4319 	if (!rmrru)
4320 		goto out;
4321 
4322 	rmrru->hdr = header;
4323 	rmrr = (struct acpi_dmar_reserved_memory *)header;
4324 	rmrru->base_address = rmrr->base_address;
4325 	rmrru->end_address = rmrr->end_address;
4326 
4327 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4328 				((void *)rmrr) + rmrr->header.length,
4329 				&rmrru->devices_cnt);
4330 	if (rmrru->devices_cnt && rmrru->devices == NULL)
4331 		goto free_rmrru;
4332 
4333 	list_add(&rmrru->list, &dmar_rmrr_units);
4334 
4335 	return 0;
4336 free_rmrru:
4337 	kfree(rmrru);
4338 out:
4339 	return -ENOMEM;
4340 }
4341 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)4342 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4343 {
4344 	struct dmar_atsr_unit *atsru;
4345 	struct acpi_dmar_atsr *tmp;
4346 
4347 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4348 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4349 		if (atsr->segment != tmp->segment)
4350 			continue;
4351 		if (atsr->header.length != tmp->header.length)
4352 			continue;
4353 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
4354 			return atsru;
4355 	}
4356 
4357 	return NULL;
4358 }
4359 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)4360 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4361 {
4362 	struct acpi_dmar_atsr *atsr;
4363 	struct dmar_atsr_unit *atsru;
4364 
4365 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4366 		return 0;
4367 
4368 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4369 	atsru = dmar_find_atsr(atsr);
4370 	if (atsru)
4371 		return 0;
4372 
4373 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4374 	if (!atsru)
4375 		return -ENOMEM;
4376 
4377 	/*
4378 	 * If memory is allocated from slab by ACPI _DSM method, we need to
4379 	 * copy the memory content because the memory buffer will be freed
4380 	 * on return.
4381 	 */
4382 	atsru->hdr = (void *)(atsru + 1);
4383 	memcpy(atsru->hdr, hdr, hdr->length);
4384 	atsru->include_all = atsr->flags & 0x1;
4385 	if (!atsru->include_all) {
4386 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4387 				(void *)atsr + atsr->header.length,
4388 				&atsru->devices_cnt);
4389 		if (atsru->devices_cnt && atsru->devices == NULL) {
4390 			kfree(atsru);
4391 			return -ENOMEM;
4392 		}
4393 	}
4394 
4395 	list_add_rcu(&atsru->list, &dmar_atsr_units);
4396 
4397 	return 0;
4398 }
4399 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)4400 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4401 {
4402 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4403 	kfree(atsru);
4404 }
4405 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)4406 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4407 {
4408 	struct acpi_dmar_atsr *atsr;
4409 	struct dmar_atsr_unit *atsru;
4410 
4411 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4412 	atsru = dmar_find_atsr(atsr);
4413 	if (atsru) {
4414 		list_del_rcu(&atsru->list);
4415 		synchronize_rcu();
4416 		intel_iommu_free_atsr(atsru);
4417 	}
4418 
4419 	return 0;
4420 }
4421 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)4422 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4423 {
4424 	int i;
4425 	struct device *dev;
4426 	struct acpi_dmar_atsr *atsr;
4427 	struct dmar_atsr_unit *atsru;
4428 
4429 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4430 	atsru = dmar_find_atsr(atsr);
4431 	if (!atsru)
4432 		return 0;
4433 
4434 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4435 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4436 					  i, dev)
4437 			return -EBUSY;
4438 	}
4439 
4440 	return 0;
4441 }
4442 
intel_iommu_add(struct dmar_drhd_unit * dmaru)4443 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4444 {
4445 	int sp, ret;
4446 	struct intel_iommu *iommu = dmaru->iommu;
4447 
4448 	if (g_iommus[iommu->seq_id])
4449 		return 0;
4450 
4451 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4452 		pr_warn("%s: Doesn't support hardware pass through.\n",
4453 			iommu->name);
4454 		return -ENXIO;
4455 	}
4456 	if (!ecap_sc_support(iommu->ecap) &&
4457 	    domain_update_iommu_snooping(iommu)) {
4458 		pr_warn("%s: Doesn't support snooping.\n",
4459 			iommu->name);
4460 		return -ENXIO;
4461 	}
4462 	sp = domain_update_iommu_superpage(iommu) - 1;
4463 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4464 		pr_warn("%s: Doesn't support large page.\n",
4465 			iommu->name);
4466 		return -ENXIO;
4467 	}
4468 
4469 	/*
4470 	 * Disable translation if already enabled prior to OS handover.
4471 	 */
4472 	if (iommu->gcmd & DMA_GCMD_TE)
4473 		iommu_disable_translation(iommu);
4474 
4475 	g_iommus[iommu->seq_id] = iommu;
4476 	ret = iommu_init_domains(iommu);
4477 	if (ret == 0)
4478 		ret = iommu_alloc_root_entry(iommu);
4479 	if (ret)
4480 		goto out;
4481 
4482 #ifdef CONFIG_INTEL_IOMMU_SVM
4483 	if (pasid_supported(iommu))
4484 		intel_svm_init(iommu);
4485 #endif
4486 
4487 	if (dmaru->ignored) {
4488 		/*
4489 		 * we always have to disable PMRs or DMA may fail on this device
4490 		 */
4491 		if (force_on)
4492 			iommu_disable_protect_mem_regions(iommu);
4493 		return 0;
4494 	}
4495 
4496 	intel_iommu_init_qi(iommu);
4497 	iommu_flush_write_buffer(iommu);
4498 
4499 #ifdef CONFIG_INTEL_IOMMU_SVM
4500 	if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4501 		ret = intel_svm_enable_prq(iommu);
4502 		if (ret)
4503 			goto disable_iommu;
4504 	}
4505 #endif
4506 	ret = dmar_set_interrupt(iommu);
4507 	if (ret)
4508 		goto disable_iommu;
4509 
4510 	iommu_set_root_entry(iommu);
4511 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4512 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4513 	iommu_enable_translation(iommu);
4514 
4515 	iommu_disable_protect_mem_regions(iommu);
4516 	return 0;
4517 
4518 disable_iommu:
4519 	disable_dmar_iommu(iommu);
4520 out:
4521 	free_dmar_iommu(iommu);
4522 	return ret;
4523 }
4524 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)4525 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4526 {
4527 	int ret = 0;
4528 	struct intel_iommu *iommu = dmaru->iommu;
4529 
4530 	if (!intel_iommu_enabled)
4531 		return 0;
4532 	if (iommu == NULL)
4533 		return -EINVAL;
4534 
4535 	if (insert) {
4536 		ret = intel_iommu_add(dmaru);
4537 	} else {
4538 		disable_dmar_iommu(iommu);
4539 		free_dmar_iommu(iommu);
4540 	}
4541 
4542 	return ret;
4543 }
4544 
intel_iommu_free_dmars(void)4545 static void intel_iommu_free_dmars(void)
4546 {
4547 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
4548 	struct dmar_atsr_unit *atsru, *atsr_n;
4549 
4550 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4551 		list_del(&rmrru->list);
4552 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4553 		kfree(rmrru);
4554 	}
4555 
4556 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4557 		list_del(&atsru->list);
4558 		intel_iommu_free_atsr(atsru);
4559 	}
4560 }
4561 
dmar_find_matched_atsr_unit(struct pci_dev * dev)4562 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4563 {
4564 	int i, ret = 1;
4565 	struct pci_bus *bus;
4566 	struct pci_dev *bridge = NULL;
4567 	struct device *tmp;
4568 	struct acpi_dmar_atsr *atsr;
4569 	struct dmar_atsr_unit *atsru;
4570 
4571 	dev = pci_physfn(dev);
4572 	for (bus = dev->bus; bus; bus = bus->parent) {
4573 		bridge = bus->self;
4574 		/* If it's an integrated device, allow ATS */
4575 		if (!bridge)
4576 			return 1;
4577 		/* Connected via non-PCIe: no ATS */
4578 		if (!pci_is_pcie(bridge) ||
4579 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4580 			return 0;
4581 		/* If we found the root port, look it up in the ATSR */
4582 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4583 			break;
4584 	}
4585 
4586 	rcu_read_lock();
4587 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4588 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4589 		if (atsr->segment != pci_domain_nr(dev->bus))
4590 			continue;
4591 
4592 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4593 			if (tmp == &bridge->dev)
4594 				goto out;
4595 
4596 		if (atsru->include_all)
4597 			goto out;
4598 	}
4599 	ret = 0;
4600 out:
4601 	rcu_read_unlock();
4602 
4603 	return ret;
4604 }
4605 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)4606 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4607 {
4608 	int ret;
4609 	struct dmar_rmrr_unit *rmrru;
4610 	struct dmar_atsr_unit *atsru;
4611 	struct acpi_dmar_atsr *atsr;
4612 	struct acpi_dmar_reserved_memory *rmrr;
4613 
4614 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4615 		return 0;
4616 
4617 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4618 		rmrr = container_of(rmrru->hdr,
4619 				    struct acpi_dmar_reserved_memory, header);
4620 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4621 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4622 				((void *)rmrr) + rmrr->header.length,
4623 				rmrr->segment, rmrru->devices,
4624 				rmrru->devices_cnt);
4625 			if (ret < 0)
4626 				return ret;
4627 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4628 			dmar_remove_dev_scope(info, rmrr->segment,
4629 				rmrru->devices, rmrru->devices_cnt);
4630 		}
4631 	}
4632 
4633 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
4634 		if (atsru->include_all)
4635 			continue;
4636 
4637 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4638 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4639 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4640 					(void *)atsr + atsr->header.length,
4641 					atsr->segment, atsru->devices,
4642 					atsru->devices_cnt);
4643 			if (ret > 0)
4644 				break;
4645 			else if (ret < 0)
4646 				return ret;
4647 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4648 			if (dmar_remove_dev_scope(info, atsr->segment,
4649 					atsru->devices, atsru->devices_cnt))
4650 				break;
4651 		}
4652 	}
4653 
4654 	return 0;
4655 }
4656 
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)4657 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4658 				       unsigned long val, void *v)
4659 {
4660 	struct memory_notify *mhp = v;
4661 	unsigned long long start, end;
4662 	unsigned long start_vpfn, last_vpfn;
4663 
4664 	switch (val) {
4665 	case MEM_GOING_ONLINE:
4666 		start = mhp->start_pfn << PAGE_SHIFT;
4667 		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4668 		if (iommu_domain_identity_map(si_domain, start, end)) {
4669 			pr_warn("Failed to build identity map for [%llx-%llx]\n",
4670 				start, end);
4671 			return NOTIFY_BAD;
4672 		}
4673 		break;
4674 
4675 	case MEM_OFFLINE:
4676 	case MEM_CANCEL_ONLINE:
4677 		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4678 		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4679 		while (start_vpfn <= last_vpfn) {
4680 			struct iova *iova;
4681 			struct dmar_drhd_unit *drhd;
4682 			struct intel_iommu *iommu;
4683 			struct page *freelist;
4684 
4685 			iova = find_iova(&si_domain->iovad, start_vpfn);
4686 			if (iova == NULL) {
4687 				pr_debug("Failed get IOVA for PFN %lx\n",
4688 					 start_vpfn);
4689 				break;
4690 			}
4691 
4692 			iova = split_and_remove_iova(&si_domain->iovad, iova,
4693 						     start_vpfn, last_vpfn);
4694 			if (iova == NULL) {
4695 				pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4696 					start_vpfn, last_vpfn);
4697 				return NOTIFY_BAD;
4698 			}
4699 
4700 			freelist = domain_unmap(si_domain, iova->pfn_lo,
4701 					       iova->pfn_hi);
4702 
4703 			rcu_read_lock();
4704 			for_each_active_iommu(iommu, drhd)
4705 				iommu_flush_iotlb_psi(iommu, si_domain,
4706 					iova->pfn_lo, iova_size(iova),
4707 					!freelist, 0);
4708 			rcu_read_unlock();
4709 			dma_free_pagelist(freelist);
4710 
4711 			start_vpfn = iova->pfn_hi + 1;
4712 			free_iova_mem(iova);
4713 		}
4714 		break;
4715 	}
4716 
4717 	return NOTIFY_OK;
4718 }
4719 
4720 static struct notifier_block intel_iommu_memory_nb = {
4721 	.notifier_call = intel_iommu_memory_notifier,
4722 	.priority = 0
4723 };
4724 
free_all_cpu_cached_iovas(unsigned int cpu)4725 static void free_all_cpu_cached_iovas(unsigned int cpu)
4726 {
4727 	int i;
4728 
4729 	for (i = 0; i < g_num_of_iommus; i++) {
4730 		struct intel_iommu *iommu = g_iommus[i];
4731 		struct dmar_domain *domain;
4732 		int did;
4733 
4734 		if (!iommu)
4735 			continue;
4736 
4737 		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4738 			domain = get_iommu_domain(iommu, (u16)did);
4739 
4740 			if (!domain)
4741 				continue;
4742 			free_cpu_cached_iovas(cpu, &domain->iovad);
4743 		}
4744 	}
4745 }
4746 
intel_iommu_cpu_dead(unsigned int cpu)4747 static int intel_iommu_cpu_dead(unsigned int cpu)
4748 {
4749 	free_all_cpu_cached_iovas(cpu);
4750 	return 0;
4751 }
4752 
intel_disable_iommus(void)4753 static void intel_disable_iommus(void)
4754 {
4755 	struct intel_iommu *iommu = NULL;
4756 	struct dmar_drhd_unit *drhd;
4757 
4758 	for_each_iommu(iommu, drhd)
4759 		iommu_disable_translation(iommu);
4760 }
4761 
dev_to_intel_iommu(struct device * dev)4762 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4763 {
4764 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4765 
4766 	return container_of(iommu_dev, struct intel_iommu, iommu);
4767 }
4768 
intel_iommu_show_version(struct device * dev,struct device_attribute * attr,char * buf)4769 static ssize_t intel_iommu_show_version(struct device *dev,
4770 					struct device_attribute *attr,
4771 					char *buf)
4772 {
4773 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4774 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4775 	return sprintf(buf, "%d:%d\n",
4776 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4777 }
4778 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4779 
intel_iommu_show_address(struct device * dev,struct device_attribute * attr,char * buf)4780 static ssize_t intel_iommu_show_address(struct device *dev,
4781 					struct device_attribute *attr,
4782 					char *buf)
4783 {
4784 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4785 	return sprintf(buf, "%llx\n", iommu->reg_phys);
4786 }
4787 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4788 
intel_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)4789 static ssize_t intel_iommu_show_cap(struct device *dev,
4790 				    struct device_attribute *attr,
4791 				    char *buf)
4792 {
4793 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4794 	return sprintf(buf, "%llx\n", iommu->cap);
4795 }
4796 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4797 
intel_iommu_show_ecap(struct device * dev,struct device_attribute * attr,char * buf)4798 static ssize_t intel_iommu_show_ecap(struct device *dev,
4799 				    struct device_attribute *attr,
4800 				    char *buf)
4801 {
4802 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4803 	return sprintf(buf, "%llx\n", iommu->ecap);
4804 }
4805 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4806 
intel_iommu_show_ndoms(struct device * dev,struct device_attribute * attr,char * buf)4807 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4808 				      struct device_attribute *attr,
4809 				      char *buf)
4810 {
4811 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4812 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4813 }
4814 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4815 
intel_iommu_show_ndoms_used(struct device * dev,struct device_attribute * attr,char * buf)4816 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4817 					   struct device_attribute *attr,
4818 					   char *buf)
4819 {
4820 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4821 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4822 						  cap_ndoms(iommu->cap)));
4823 }
4824 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4825 
4826 static struct attribute *intel_iommu_attrs[] = {
4827 	&dev_attr_version.attr,
4828 	&dev_attr_address.attr,
4829 	&dev_attr_cap.attr,
4830 	&dev_attr_ecap.attr,
4831 	&dev_attr_domains_supported.attr,
4832 	&dev_attr_domains_used.attr,
4833 	NULL,
4834 };
4835 
4836 static struct attribute_group intel_iommu_group = {
4837 	.name = "intel-iommu",
4838 	.attrs = intel_iommu_attrs,
4839 };
4840 
4841 const struct attribute_group *intel_iommu_groups[] = {
4842 	&intel_iommu_group,
4843 	NULL,
4844 };
4845 
has_untrusted_dev(void)4846 static inline bool has_untrusted_dev(void)
4847 {
4848 	struct pci_dev *pdev = NULL;
4849 
4850 	for_each_pci_dev(pdev)
4851 		if (pdev->untrusted)
4852 			return true;
4853 
4854 	return false;
4855 }
4856 
platform_optin_force_iommu(void)4857 static int __init platform_optin_force_iommu(void)
4858 {
4859 	if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4860 		return 0;
4861 
4862 	if (no_iommu || dmar_disabled)
4863 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4864 
4865 	/*
4866 	 * If Intel-IOMMU is disabled by default, we will apply identity
4867 	 * map for all devices except those marked as being untrusted.
4868 	 */
4869 	if (dmar_disabled)
4870 		iommu_identity_mapping |= IDENTMAP_ALL;
4871 
4872 	dmar_disabled = 0;
4873 	no_iommu = 0;
4874 
4875 	return 1;
4876 }
4877 
probe_acpi_namespace_devices(void)4878 static int __init probe_acpi_namespace_devices(void)
4879 {
4880 	struct dmar_drhd_unit *drhd;
4881 	/* To avoid a -Wunused-but-set-variable warning. */
4882 	struct intel_iommu *iommu __maybe_unused;
4883 	struct device *dev;
4884 	int i, ret = 0;
4885 
4886 	for_each_active_iommu(iommu, drhd) {
4887 		for_each_active_dev_scope(drhd->devices,
4888 					  drhd->devices_cnt, i, dev) {
4889 			struct acpi_device_physical_node *pn;
4890 			struct iommu_group *group;
4891 			struct acpi_device *adev;
4892 
4893 			if (dev->bus != &acpi_bus_type)
4894 				continue;
4895 
4896 			adev = to_acpi_device(dev);
4897 			mutex_lock(&adev->physical_node_lock);
4898 			list_for_each_entry(pn,
4899 					    &adev->physical_node_list, node) {
4900 				group = iommu_group_get(pn->dev);
4901 				if (group) {
4902 					iommu_group_put(group);
4903 					continue;
4904 				}
4905 
4906 				pn->dev->bus->iommu_ops = &intel_iommu_ops;
4907 				ret = iommu_probe_device(pn->dev);
4908 				if (ret)
4909 					break;
4910 			}
4911 			mutex_unlock(&adev->physical_node_lock);
4912 
4913 			if (ret)
4914 				return ret;
4915 		}
4916 	}
4917 
4918 	return 0;
4919 }
4920 
intel_iommu_init(void)4921 int __init intel_iommu_init(void)
4922 {
4923 	int ret = -ENODEV;
4924 	struct dmar_drhd_unit *drhd;
4925 	struct intel_iommu *iommu;
4926 
4927 	/*
4928 	 * Intel IOMMU is required for a TXT/tboot launch or platform
4929 	 * opt in, so enforce that.
4930 	 */
4931 	force_on = tboot_force_iommu() || platform_optin_force_iommu();
4932 
4933 	if (iommu_init_mempool()) {
4934 		if (force_on)
4935 			panic("tboot: Failed to initialize iommu memory\n");
4936 		return -ENOMEM;
4937 	}
4938 
4939 	down_write(&dmar_global_lock);
4940 	if (dmar_table_init()) {
4941 		if (force_on)
4942 			panic("tboot: Failed to initialize DMAR table\n");
4943 		goto out_free_dmar;
4944 	}
4945 
4946 	if (dmar_dev_scope_init() < 0) {
4947 		if (force_on)
4948 			panic("tboot: Failed to initialize DMAR device scope\n");
4949 		goto out_free_dmar;
4950 	}
4951 
4952 	up_write(&dmar_global_lock);
4953 
4954 	/*
4955 	 * The bus notifier takes the dmar_global_lock, so lockdep will
4956 	 * complain later when we register it under the lock.
4957 	 */
4958 	dmar_register_bus_notifier();
4959 
4960 	down_write(&dmar_global_lock);
4961 
4962 	if (no_iommu || dmar_disabled) {
4963 		/*
4964 		 * We exit the function here to ensure IOMMU's remapping and
4965 		 * mempool aren't setup, which means that the IOMMU's PMRs
4966 		 * won't be disabled via the call to init_dmars(). So disable
4967 		 * it explicitly here. The PMRs were setup by tboot prior to
4968 		 * calling SENTER, but the kernel is expected to reset/tear
4969 		 * down the PMRs.
4970 		 */
4971 		if (intel_iommu_tboot_noforce) {
4972 			for_each_iommu(iommu, drhd)
4973 				iommu_disable_protect_mem_regions(iommu);
4974 		}
4975 
4976 		/*
4977 		 * Make sure the IOMMUs are switched off, even when we
4978 		 * boot into a kexec kernel and the previous kernel left
4979 		 * them enabled
4980 		 */
4981 		intel_disable_iommus();
4982 		goto out_free_dmar;
4983 	}
4984 
4985 	if (list_empty(&dmar_rmrr_units))
4986 		pr_info("No RMRR found\n");
4987 
4988 	if (list_empty(&dmar_atsr_units))
4989 		pr_info("No ATSR found\n");
4990 
4991 	if (dmar_init_reserved_ranges()) {
4992 		if (force_on)
4993 			panic("tboot: Failed to reserve iommu ranges\n");
4994 		goto out_free_reserved_range;
4995 	}
4996 
4997 	if (dmar_map_gfx)
4998 		intel_iommu_gfx_mapped = 1;
4999 
5000 	init_no_remapping_devices();
5001 
5002 	ret = init_dmars();
5003 	if (ret) {
5004 		if (force_on)
5005 			panic("tboot: Failed to initialize DMARs\n");
5006 		pr_err("Initialization failed\n");
5007 		goto out_free_reserved_range;
5008 	}
5009 	up_write(&dmar_global_lock);
5010 
5011 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5012 	/*
5013 	 * If the system has no untrusted device or the user has decided
5014 	 * to disable the bounce page mechanisms, we don't need swiotlb.
5015 	 * Mark this and the pre-allocated bounce pages will be released
5016 	 * later.
5017 	 */
5018 	if (!has_untrusted_dev() || intel_no_bounce)
5019 		swiotlb = 0;
5020 #endif
5021 	dma_ops = &intel_dma_ops;
5022 
5023 	init_iommu_pm_ops();
5024 
5025 	for_each_active_iommu(iommu, drhd) {
5026 		iommu_device_sysfs_add(&iommu->iommu, NULL,
5027 				       intel_iommu_groups,
5028 				       "%s", iommu->name);
5029 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5030 		iommu_device_register(&iommu->iommu);
5031 	}
5032 
5033 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
5034 	if (si_domain && !hw_pass_through)
5035 		register_memory_notifier(&intel_iommu_memory_nb);
5036 	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5037 			  intel_iommu_cpu_dead);
5038 
5039 	down_read(&dmar_global_lock);
5040 	if (probe_acpi_namespace_devices())
5041 		pr_warn("ACPI name space devices didn't probe correctly\n");
5042 	up_read(&dmar_global_lock);
5043 
5044 	/* Finally, we enable the DMA remapping hardware. */
5045 	for_each_iommu(iommu, drhd) {
5046 		if (!drhd->ignored && !translation_pre_enabled(iommu))
5047 			iommu_enable_translation(iommu);
5048 
5049 		iommu_disable_protect_mem_regions(iommu);
5050 	}
5051 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5052 
5053 	intel_iommu_enabled = 1;
5054 	intel_iommu_debugfs_init();
5055 
5056 	return 0;
5057 
5058 out_free_reserved_range:
5059 	put_iova_domain(&reserved_iova_list);
5060 out_free_dmar:
5061 	intel_iommu_free_dmars();
5062 	up_write(&dmar_global_lock);
5063 	iommu_exit_mempool();
5064 	return ret;
5065 }
5066 
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)5067 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5068 {
5069 	struct intel_iommu *iommu = opaque;
5070 
5071 	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5072 	return 0;
5073 }
5074 
5075 /*
5076  * NB - intel-iommu lacks any sort of reference counting for the users of
5077  * dependent devices.  If multiple endpoints have intersecting dependent
5078  * devices, unbinding the driver from any one of them will possibly leave
5079  * the others unable to operate.
5080  */
domain_context_clear(struct intel_iommu * iommu,struct device * dev)5081 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5082 {
5083 	if (!iommu || !dev || !dev_is_pci(dev))
5084 		return;
5085 
5086 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5087 }
5088 
__dmar_remove_one_dev_info(struct device_domain_info * info)5089 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
5090 {
5091 	struct dmar_domain *domain;
5092 	struct intel_iommu *iommu;
5093 	unsigned long flags;
5094 
5095 	assert_spin_locked(&device_domain_lock);
5096 
5097 	if (WARN_ON(!info))
5098 		return;
5099 
5100 	iommu = info->iommu;
5101 	domain = info->domain;
5102 
5103 	if (info->dev) {
5104 		if (dev_is_pci(info->dev) && sm_supported(iommu))
5105 			intel_pasid_tear_down_entry(iommu, info->dev,
5106 					PASID_RID2PASID);
5107 
5108 		iommu_disable_dev_iotlb(info);
5109 		domain_context_clear(iommu, info->dev);
5110 		intel_pasid_free_table(info->dev);
5111 	}
5112 
5113 	unlink_domain_info(info);
5114 
5115 	spin_lock_irqsave(&iommu->lock, flags);
5116 	domain_detach_iommu(domain, iommu);
5117 	spin_unlock_irqrestore(&iommu->lock, flags);
5118 
5119 	/* free the private domain */
5120 	if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5121 	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5122 	    list_empty(&domain->devices))
5123 		domain_exit(info->domain);
5124 
5125 	free_devinfo_mem(info);
5126 }
5127 
dmar_remove_one_dev_info(struct device * dev)5128 static void dmar_remove_one_dev_info(struct device *dev)
5129 {
5130 	struct device_domain_info *info;
5131 	unsigned long flags;
5132 
5133 	spin_lock_irqsave(&device_domain_lock, flags);
5134 	info = dev->archdata.iommu;
5135 	if (info)
5136 		__dmar_remove_one_dev_info(info);
5137 	spin_unlock_irqrestore(&device_domain_lock, flags);
5138 }
5139 
md_domain_init(struct dmar_domain * domain,int guest_width)5140 static int md_domain_init(struct dmar_domain *domain, int guest_width)
5141 {
5142 	int adjust_width;
5143 
5144 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5145 	domain_reserve_special_ranges(domain);
5146 
5147 	/* calculate AGAW */
5148 	domain->gaw = guest_width;
5149 	adjust_width = guestwidth_to_adjustwidth(guest_width);
5150 	domain->agaw = width_to_agaw(adjust_width);
5151 
5152 	domain->iommu_coherency = 0;
5153 	domain->iommu_snooping = 0;
5154 	domain->iommu_superpage = 0;
5155 	domain->max_addr = 0;
5156 
5157 	/* always allocate the top pgd */
5158 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5159 	if (!domain->pgd)
5160 		return -ENOMEM;
5161 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5162 	return 0;
5163 }
5164 
intel_iommu_domain_alloc(unsigned type)5165 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5166 {
5167 	struct dmar_domain *dmar_domain;
5168 	struct iommu_domain *domain;
5169 
5170 	switch (type) {
5171 	case IOMMU_DOMAIN_DMA:
5172 	/* fallthrough */
5173 	case IOMMU_DOMAIN_UNMANAGED:
5174 		dmar_domain = alloc_domain(0);
5175 		if (!dmar_domain) {
5176 			pr_err("Can't allocate dmar_domain\n");
5177 			return NULL;
5178 		}
5179 		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5180 			pr_err("Domain initialization failed\n");
5181 			domain_exit(dmar_domain);
5182 			return NULL;
5183 		}
5184 
5185 		if (type == IOMMU_DOMAIN_DMA &&
5186 		    init_iova_flush_queue(&dmar_domain->iovad,
5187 					  iommu_flush_iova, iova_entry_free)) {
5188 			pr_warn("iova flush queue initialization failed\n");
5189 			intel_iommu_strict = 1;
5190 		}
5191 
5192 		domain_update_iommu_cap(dmar_domain);
5193 
5194 		domain = &dmar_domain->domain;
5195 		domain->geometry.aperture_start = 0;
5196 		domain->geometry.aperture_end   =
5197 				__DOMAIN_MAX_ADDR(dmar_domain->gaw);
5198 		domain->geometry.force_aperture = true;
5199 
5200 		return domain;
5201 	case IOMMU_DOMAIN_IDENTITY:
5202 		return &si_domain->domain;
5203 	default:
5204 		return NULL;
5205 	}
5206 
5207 	return NULL;
5208 }
5209 
intel_iommu_domain_free(struct iommu_domain * domain)5210 static void intel_iommu_domain_free(struct iommu_domain *domain)
5211 {
5212 	if (domain != &si_domain->domain)
5213 		domain_exit(to_dmar_domain(domain));
5214 }
5215 
5216 /*
5217  * Check whether a @domain could be attached to the @dev through the
5218  * aux-domain attach/detach APIs.
5219  */
5220 static inline bool
is_aux_domain(struct device * dev,struct iommu_domain * domain)5221 is_aux_domain(struct device *dev, struct iommu_domain *domain)
5222 {
5223 	struct device_domain_info *info = dev->archdata.iommu;
5224 
5225 	return info && info->auxd_enabled &&
5226 			domain->type == IOMMU_DOMAIN_UNMANAGED;
5227 }
5228 
auxiliary_link_device(struct dmar_domain * domain,struct device * dev)5229 static void auxiliary_link_device(struct dmar_domain *domain,
5230 				  struct device *dev)
5231 {
5232 	struct device_domain_info *info = dev->archdata.iommu;
5233 
5234 	assert_spin_locked(&device_domain_lock);
5235 	if (WARN_ON(!info))
5236 		return;
5237 
5238 	domain->auxd_refcnt++;
5239 	list_add(&domain->auxd, &info->auxiliary_domains);
5240 }
5241 
auxiliary_unlink_device(struct dmar_domain * domain,struct device * dev)5242 static void auxiliary_unlink_device(struct dmar_domain *domain,
5243 				    struct device *dev)
5244 {
5245 	struct device_domain_info *info = dev->archdata.iommu;
5246 
5247 	assert_spin_locked(&device_domain_lock);
5248 	if (WARN_ON(!info))
5249 		return;
5250 
5251 	list_del(&domain->auxd);
5252 	domain->auxd_refcnt--;
5253 
5254 	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5255 		intel_pasid_free_id(domain->default_pasid);
5256 }
5257 
aux_domain_add_dev(struct dmar_domain * domain,struct device * dev)5258 static int aux_domain_add_dev(struct dmar_domain *domain,
5259 			      struct device *dev)
5260 {
5261 	int ret;
5262 	u8 bus, devfn;
5263 	unsigned long flags;
5264 	struct intel_iommu *iommu;
5265 
5266 	iommu = device_to_iommu(dev, &bus, &devfn);
5267 	if (!iommu)
5268 		return -ENODEV;
5269 
5270 	if (domain->default_pasid <= 0) {
5271 		int pasid;
5272 
5273 		pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5274 					     pci_max_pasids(to_pci_dev(dev)),
5275 					     GFP_KERNEL);
5276 		if (pasid <= 0) {
5277 			pr_err("Can't allocate default pasid\n");
5278 			return -ENODEV;
5279 		}
5280 		domain->default_pasid = pasid;
5281 	}
5282 
5283 	spin_lock_irqsave(&device_domain_lock, flags);
5284 	/*
5285 	 * iommu->lock must be held to attach domain to iommu and setup the
5286 	 * pasid entry for second level translation.
5287 	 */
5288 	spin_lock(&iommu->lock);
5289 	ret = domain_attach_iommu(domain, iommu);
5290 	if (ret)
5291 		goto attach_failed;
5292 
5293 	/* Setup the PASID entry for mediated devices: */
5294 	ret = intel_pasid_setup_second_level(iommu, domain, dev,
5295 					     domain->default_pasid);
5296 	if (ret)
5297 		goto table_failed;
5298 	spin_unlock(&iommu->lock);
5299 
5300 	auxiliary_link_device(domain, dev);
5301 
5302 	spin_unlock_irqrestore(&device_domain_lock, flags);
5303 
5304 	return 0;
5305 
5306 table_failed:
5307 	domain_detach_iommu(domain, iommu);
5308 attach_failed:
5309 	spin_unlock(&iommu->lock);
5310 	spin_unlock_irqrestore(&device_domain_lock, flags);
5311 	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5312 		intel_pasid_free_id(domain->default_pasid);
5313 
5314 	return ret;
5315 }
5316 
aux_domain_remove_dev(struct dmar_domain * domain,struct device * dev)5317 static void aux_domain_remove_dev(struct dmar_domain *domain,
5318 				  struct device *dev)
5319 {
5320 	struct device_domain_info *info;
5321 	struct intel_iommu *iommu;
5322 	unsigned long flags;
5323 
5324 	if (!is_aux_domain(dev, &domain->domain))
5325 		return;
5326 
5327 	spin_lock_irqsave(&device_domain_lock, flags);
5328 	info = dev->archdata.iommu;
5329 	iommu = info->iommu;
5330 
5331 	auxiliary_unlink_device(domain, dev);
5332 
5333 	spin_lock(&iommu->lock);
5334 	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5335 	domain_detach_iommu(domain, iommu);
5336 	spin_unlock(&iommu->lock);
5337 
5338 	spin_unlock_irqrestore(&device_domain_lock, flags);
5339 }
5340 
prepare_domain_attach_device(struct iommu_domain * domain,struct device * dev)5341 static int prepare_domain_attach_device(struct iommu_domain *domain,
5342 					struct device *dev)
5343 {
5344 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5345 	struct intel_iommu *iommu;
5346 	int addr_width;
5347 	u8 bus, devfn;
5348 
5349 	iommu = device_to_iommu(dev, &bus, &devfn);
5350 	if (!iommu)
5351 		return -ENODEV;
5352 
5353 	/* check if this iommu agaw is sufficient for max mapped address */
5354 	addr_width = agaw_to_width(iommu->agaw);
5355 	if (addr_width > cap_mgaw(iommu->cap))
5356 		addr_width = cap_mgaw(iommu->cap);
5357 
5358 	if (dmar_domain->max_addr > (1LL << addr_width)) {
5359 		dev_err(dev, "%s: iommu width (%d) is not "
5360 		        "sufficient for the mapped address (%llx)\n",
5361 		        __func__, addr_width, dmar_domain->max_addr);
5362 		return -EFAULT;
5363 	}
5364 	dmar_domain->gaw = addr_width;
5365 
5366 	/*
5367 	 * Knock out extra levels of page tables if necessary
5368 	 */
5369 	while (iommu->agaw < dmar_domain->agaw) {
5370 		struct dma_pte *pte;
5371 
5372 		pte = dmar_domain->pgd;
5373 		if (dma_pte_present(pte)) {
5374 			dmar_domain->pgd = (struct dma_pte *)
5375 				phys_to_virt(dma_pte_addr(pte));
5376 			free_pgtable_page(pte);
5377 		}
5378 		dmar_domain->agaw--;
5379 	}
5380 
5381 	return 0;
5382 }
5383 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)5384 static int intel_iommu_attach_device(struct iommu_domain *domain,
5385 				     struct device *dev)
5386 {
5387 	int ret;
5388 
5389 	if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5390 	    device_is_rmrr_locked(dev)) {
5391 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
5392 		return -EPERM;
5393 	}
5394 
5395 	if (is_aux_domain(dev, domain))
5396 		return -EPERM;
5397 
5398 	/* normally dev is not mapped */
5399 	if (unlikely(domain_context_mapped(dev))) {
5400 		struct dmar_domain *old_domain;
5401 
5402 		old_domain = find_domain(dev);
5403 		if (old_domain)
5404 			dmar_remove_one_dev_info(dev);
5405 	}
5406 
5407 	ret = prepare_domain_attach_device(domain, dev);
5408 	if (ret)
5409 		return ret;
5410 
5411 	return domain_add_dev_info(to_dmar_domain(domain), dev);
5412 }
5413 
intel_iommu_aux_attach_device(struct iommu_domain * domain,struct device * dev)5414 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5415 					 struct device *dev)
5416 {
5417 	int ret;
5418 
5419 	if (!is_aux_domain(dev, domain))
5420 		return -EPERM;
5421 
5422 	ret = prepare_domain_attach_device(domain, dev);
5423 	if (ret)
5424 		return ret;
5425 
5426 	return aux_domain_add_dev(to_dmar_domain(domain), dev);
5427 }
5428 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)5429 static void intel_iommu_detach_device(struct iommu_domain *domain,
5430 				      struct device *dev)
5431 {
5432 	dmar_remove_one_dev_info(dev);
5433 }
5434 
intel_iommu_aux_detach_device(struct iommu_domain * domain,struct device * dev)5435 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5436 					  struct device *dev)
5437 {
5438 	aux_domain_remove_dev(to_dmar_domain(domain), dev);
5439 }
5440 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)5441 static int intel_iommu_map(struct iommu_domain *domain,
5442 			   unsigned long iova, phys_addr_t hpa,
5443 			   size_t size, int iommu_prot)
5444 {
5445 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5446 	u64 max_addr;
5447 	int prot = 0;
5448 	int ret;
5449 
5450 	if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5451 		return -EINVAL;
5452 
5453 	if (iommu_prot & IOMMU_READ)
5454 		prot |= DMA_PTE_READ;
5455 	if (iommu_prot & IOMMU_WRITE)
5456 		prot |= DMA_PTE_WRITE;
5457 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5458 		prot |= DMA_PTE_SNP;
5459 
5460 	max_addr = iova + size;
5461 	if (dmar_domain->max_addr < max_addr) {
5462 		u64 end;
5463 
5464 		/* check if minimum agaw is sufficient for mapped address */
5465 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5466 		if (end < max_addr) {
5467 			pr_err("%s: iommu width (%d) is not "
5468 			       "sufficient for the mapped address (%llx)\n",
5469 			       __func__, dmar_domain->gaw, max_addr);
5470 			return -EFAULT;
5471 		}
5472 		dmar_domain->max_addr = max_addr;
5473 	}
5474 	/* Round up size to next multiple of PAGE_SIZE, if it and
5475 	   the low bits of hpa would take us onto the next page */
5476 	size = aligned_nrpages(hpa, size);
5477 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5478 				 hpa >> VTD_PAGE_SHIFT, size, prot);
5479 	return ret;
5480 }
5481 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)5482 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5483 				unsigned long iova, size_t size,
5484 				struct iommu_iotlb_gather *gather)
5485 {
5486 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5487 	struct page *freelist = NULL;
5488 	unsigned long start_pfn, last_pfn;
5489 	unsigned int npages;
5490 	int iommu_id, level = 0;
5491 
5492 	/* Cope with horrid API which requires us to unmap more than the
5493 	   size argument if it happens to be a large-page mapping. */
5494 	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5495 	if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5496 		return 0;
5497 
5498 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5499 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5500 
5501 	start_pfn = iova >> VTD_PAGE_SHIFT;
5502 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5503 
5504 	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5505 
5506 	npages = last_pfn - start_pfn + 1;
5507 
5508 	for_each_domain_iommu(iommu_id, dmar_domain)
5509 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5510 				      start_pfn, npages, !freelist, 0);
5511 
5512 	dma_free_pagelist(freelist);
5513 
5514 	if (dmar_domain->max_addr == iova + size)
5515 		dmar_domain->max_addr = iova;
5516 
5517 	return size;
5518 }
5519 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)5520 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5521 					    dma_addr_t iova)
5522 {
5523 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5524 	struct dma_pte *pte;
5525 	int level = 0;
5526 	u64 phys = 0;
5527 
5528 	if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5529 		return 0;
5530 
5531 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5532 	if (pte)
5533 		phys = dma_pte_addr(pte);
5534 
5535 	return phys;
5536 }
5537 
scalable_mode_support(void)5538 static inline bool scalable_mode_support(void)
5539 {
5540 	struct dmar_drhd_unit *drhd;
5541 	struct intel_iommu *iommu;
5542 	bool ret = true;
5543 
5544 	rcu_read_lock();
5545 	for_each_active_iommu(iommu, drhd) {
5546 		if (!sm_supported(iommu)) {
5547 			ret = false;
5548 			break;
5549 		}
5550 	}
5551 	rcu_read_unlock();
5552 
5553 	return ret;
5554 }
5555 
iommu_pasid_support(void)5556 static inline bool iommu_pasid_support(void)
5557 {
5558 	struct dmar_drhd_unit *drhd;
5559 	struct intel_iommu *iommu;
5560 	bool ret = true;
5561 
5562 	rcu_read_lock();
5563 	for_each_active_iommu(iommu, drhd) {
5564 		if (!pasid_supported(iommu)) {
5565 			ret = false;
5566 			break;
5567 		}
5568 	}
5569 	rcu_read_unlock();
5570 
5571 	return ret;
5572 }
5573 
intel_iommu_capable(enum iommu_cap cap)5574 static bool intel_iommu_capable(enum iommu_cap cap)
5575 {
5576 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
5577 		return domain_update_iommu_snooping(NULL) == 1;
5578 	if (cap == IOMMU_CAP_INTR_REMAP)
5579 		return irq_remapping_enabled == 1;
5580 
5581 	return false;
5582 }
5583 
intel_iommu_add_device(struct device * dev)5584 static int intel_iommu_add_device(struct device *dev)
5585 {
5586 	struct dmar_domain *dmar_domain;
5587 	struct iommu_domain *domain;
5588 	struct intel_iommu *iommu;
5589 	struct iommu_group *group;
5590 	u8 bus, devfn;
5591 	int ret;
5592 
5593 	iommu = device_to_iommu(dev, &bus, &devfn);
5594 	if (!iommu)
5595 		return -ENODEV;
5596 
5597 	iommu_device_link(&iommu->iommu, dev);
5598 
5599 	if (translation_pre_enabled(iommu))
5600 		dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5601 
5602 	group = iommu_group_get_for_dev(dev);
5603 
5604 	if (IS_ERR(group))
5605 		return PTR_ERR(group);
5606 
5607 	iommu_group_put(group);
5608 
5609 	domain = iommu_get_domain_for_dev(dev);
5610 	dmar_domain = to_dmar_domain(domain);
5611 	if (domain->type == IOMMU_DOMAIN_DMA) {
5612 		if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
5613 			ret = iommu_request_dm_for_dev(dev);
5614 			if (ret) {
5615 				dmar_remove_one_dev_info(dev);
5616 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5617 				domain_add_dev_info(si_domain, dev);
5618 				dev_info(dev,
5619 					 "Device uses a private identity domain.\n");
5620 			}
5621 		}
5622 	} else {
5623 		if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
5624 			ret = iommu_request_dma_domain_for_dev(dev);
5625 			if (ret) {
5626 				dmar_remove_one_dev_info(dev);
5627 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5628 				if (!get_private_domain_for_dev(dev)) {
5629 					dev_warn(dev,
5630 						 "Failed to get a private domain.\n");
5631 					return -ENOMEM;
5632 				}
5633 
5634 				dev_info(dev,
5635 					 "Device uses a private dma domain.\n");
5636 			}
5637 		}
5638 	}
5639 
5640 	if (device_needs_bounce(dev)) {
5641 		dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5642 		set_dma_ops(dev, &bounce_dma_ops);
5643 	}
5644 
5645 	return 0;
5646 }
5647 
intel_iommu_remove_device(struct device * dev)5648 static void intel_iommu_remove_device(struct device *dev)
5649 {
5650 	struct intel_iommu *iommu;
5651 	u8 bus, devfn;
5652 
5653 	iommu = device_to_iommu(dev, &bus, &devfn);
5654 	if (!iommu)
5655 		return;
5656 
5657 	dmar_remove_one_dev_info(dev);
5658 
5659 	iommu_group_remove_device(dev);
5660 
5661 	iommu_device_unlink(&iommu->iommu, dev);
5662 
5663 	if (device_needs_bounce(dev))
5664 		set_dma_ops(dev, NULL);
5665 }
5666 
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)5667 static void intel_iommu_get_resv_regions(struct device *device,
5668 					 struct list_head *head)
5669 {
5670 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5671 	struct iommu_resv_region *reg;
5672 	struct dmar_rmrr_unit *rmrr;
5673 	struct device *i_dev;
5674 	int i;
5675 
5676 	down_read(&dmar_global_lock);
5677 	for_each_rmrr_units(rmrr) {
5678 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5679 					  i, i_dev) {
5680 			struct iommu_resv_region *resv;
5681 			enum iommu_resv_type type;
5682 			size_t length;
5683 
5684 			if (i_dev != device &&
5685 			    !is_downstream_to_pci_bridge(device, i_dev))
5686 				continue;
5687 
5688 			length = rmrr->end_address - rmrr->base_address + 1;
5689 
5690 			type = device_rmrr_is_relaxable(device) ?
5691 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5692 
5693 			resv = iommu_alloc_resv_region(rmrr->base_address,
5694 						       length, prot, type);
5695 			if (!resv)
5696 				break;
5697 
5698 			list_add_tail(&resv->list, head);
5699 		}
5700 	}
5701 	up_read(&dmar_global_lock);
5702 
5703 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5704 	if (dev_is_pci(device)) {
5705 		struct pci_dev *pdev = to_pci_dev(device);
5706 
5707 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5708 			reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5709 						      IOMMU_RESV_DIRECT);
5710 			if (reg)
5711 				list_add_tail(&reg->list, head);
5712 		}
5713 	}
5714 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5715 
5716 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5717 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5718 				      0, IOMMU_RESV_MSI);
5719 	if (!reg)
5720 		return;
5721 	list_add_tail(&reg->list, head);
5722 }
5723 
intel_iommu_put_resv_regions(struct device * dev,struct list_head * head)5724 static void intel_iommu_put_resv_regions(struct device *dev,
5725 					 struct list_head *head)
5726 {
5727 	struct iommu_resv_region *entry, *next;
5728 
5729 	list_for_each_entry_safe(entry, next, head, list)
5730 		kfree(entry);
5731 }
5732 
intel_iommu_enable_pasid(struct intel_iommu * iommu,struct device * dev)5733 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5734 {
5735 	struct device_domain_info *info;
5736 	struct context_entry *context;
5737 	struct dmar_domain *domain;
5738 	unsigned long flags;
5739 	u64 ctx_lo;
5740 	int ret;
5741 
5742 	domain = find_domain(dev);
5743 	if (!domain)
5744 		return -EINVAL;
5745 
5746 	spin_lock_irqsave(&device_domain_lock, flags);
5747 	spin_lock(&iommu->lock);
5748 
5749 	ret = -EINVAL;
5750 	info = dev->archdata.iommu;
5751 	if (!info || !info->pasid_supported)
5752 		goto out;
5753 
5754 	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5755 	if (WARN_ON(!context))
5756 		goto out;
5757 
5758 	ctx_lo = context[0].lo;
5759 
5760 	if (!(ctx_lo & CONTEXT_PASIDE)) {
5761 		ctx_lo |= CONTEXT_PASIDE;
5762 		context[0].lo = ctx_lo;
5763 		wmb();
5764 		iommu->flush.flush_context(iommu,
5765 					   domain->iommu_did[iommu->seq_id],
5766 					   PCI_DEVID(info->bus, info->devfn),
5767 					   DMA_CCMD_MASK_NOBIT,
5768 					   DMA_CCMD_DEVICE_INVL);
5769 	}
5770 
5771 	/* Enable PASID support in the device, if it wasn't already */
5772 	if (!info->pasid_enabled)
5773 		iommu_enable_dev_iotlb(info);
5774 
5775 	ret = 0;
5776 
5777  out:
5778 	spin_unlock(&iommu->lock);
5779 	spin_unlock_irqrestore(&device_domain_lock, flags);
5780 
5781 	return ret;
5782 }
5783 
intel_iommu_apply_resv_region(struct device * dev,struct iommu_domain * domain,struct iommu_resv_region * region)5784 static void intel_iommu_apply_resv_region(struct device *dev,
5785 					  struct iommu_domain *domain,
5786 					  struct iommu_resv_region *region)
5787 {
5788 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5789 	unsigned long start, end;
5790 
5791 	start = IOVA_PFN(region->start);
5792 	end   = IOVA_PFN(region->start + region->length - 1);
5793 
5794 	WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5795 }
5796 
5797 #ifdef CONFIG_INTEL_IOMMU_SVM
intel_svm_device_to_iommu(struct device * dev)5798 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5799 {
5800 	struct intel_iommu *iommu;
5801 	u8 bus, devfn;
5802 
5803 	if (iommu_dummy(dev)) {
5804 		dev_warn(dev,
5805 			 "No IOMMU translation for device; cannot enable SVM\n");
5806 		return NULL;
5807 	}
5808 
5809 	iommu = device_to_iommu(dev, &bus, &devfn);
5810 	if ((!iommu)) {
5811 		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5812 		return NULL;
5813 	}
5814 
5815 	return iommu;
5816 }
5817 #endif /* CONFIG_INTEL_IOMMU_SVM */
5818 
intel_iommu_enable_auxd(struct device * dev)5819 static int intel_iommu_enable_auxd(struct device *dev)
5820 {
5821 	struct device_domain_info *info;
5822 	struct intel_iommu *iommu;
5823 	unsigned long flags;
5824 	u8 bus, devfn;
5825 	int ret;
5826 
5827 	iommu = device_to_iommu(dev, &bus, &devfn);
5828 	if (!iommu || dmar_disabled)
5829 		return -EINVAL;
5830 
5831 	if (!sm_supported(iommu) || !pasid_supported(iommu))
5832 		return -EINVAL;
5833 
5834 	ret = intel_iommu_enable_pasid(iommu, dev);
5835 	if (ret)
5836 		return -ENODEV;
5837 
5838 	spin_lock_irqsave(&device_domain_lock, flags);
5839 	info = dev->archdata.iommu;
5840 	info->auxd_enabled = 1;
5841 	spin_unlock_irqrestore(&device_domain_lock, flags);
5842 
5843 	return 0;
5844 }
5845 
intel_iommu_disable_auxd(struct device * dev)5846 static int intel_iommu_disable_auxd(struct device *dev)
5847 {
5848 	struct device_domain_info *info;
5849 	unsigned long flags;
5850 
5851 	spin_lock_irqsave(&device_domain_lock, flags);
5852 	info = dev->archdata.iommu;
5853 	if (!WARN_ON(!info))
5854 		info->auxd_enabled = 0;
5855 	spin_unlock_irqrestore(&device_domain_lock, flags);
5856 
5857 	return 0;
5858 }
5859 
5860 /*
5861  * A PCI express designated vendor specific extended capability is defined
5862  * in the section 3.7 of Intel scalable I/O virtualization technical spec
5863  * for system software and tools to detect endpoint devices supporting the
5864  * Intel scalable IO virtualization without host driver dependency.
5865  *
5866  * Returns the address of the matching extended capability structure within
5867  * the device's PCI configuration space or 0 if the device does not support
5868  * it.
5869  */
siov_find_pci_dvsec(struct pci_dev * pdev)5870 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5871 {
5872 	int pos;
5873 	u16 vendor, id;
5874 
5875 	pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5876 	while (pos) {
5877 		pci_read_config_word(pdev, pos + 4, &vendor);
5878 		pci_read_config_word(pdev, pos + 8, &id);
5879 		if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5880 			return pos;
5881 
5882 		pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5883 	}
5884 
5885 	return 0;
5886 }
5887 
5888 static bool
intel_iommu_dev_has_feat(struct device * dev,enum iommu_dev_features feat)5889 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5890 {
5891 	if (feat == IOMMU_DEV_FEAT_AUX) {
5892 		int ret;
5893 
5894 		if (!dev_is_pci(dev) || dmar_disabled ||
5895 		    !scalable_mode_support() || !iommu_pasid_support())
5896 			return false;
5897 
5898 		ret = pci_pasid_features(to_pci_dev(dev));
5899 		if (ret < 0)
5900 			return false;
5901 
5902 		return !!siov_find_pci_dvsec(to_pci_dev(dev));
5903 	}
5904 
5905 	return false;
5906 }
5907 
5908 static int
intel_iommu_dev_enable_feat(struct device * dev,enum iommu_dev_features feat)5909 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5910 {
5911 	if (feat == IOMMU_DEV_FEAT_AUX)
5912 		return intel_iommu_enable_auxd(dev);
5913 
5914 	return -ENODEV;
5915 }
5916 
5917 static int
intel_iommu_dev_disable_feat(struct device * dev,enum iommu_dev_features feat)5918 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5919 {
5920 	if (feat == IOMMU_DEV_FEAT_AUX)
5921 		return intel_iommu_disable_auxd(dev);
5922 
5923 	return -ENODEV;
5924 }
5925 
5926 static bool
intel_iommu_dev_feat_enabled(struct device * dev,enum iommu_dev_features feat)5927 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5928 {
5929 	struct device_domain_info *info = dev->archdata.iommu;
5930 
5931 	if (feat == IOMMU_DEV_FEAT_AUX)
5932 		return scalable_mode_support() && info && info->auxd_enabled;
5933 
5934 	return false;
5935 }
5936 
5937 static int
intel_iommu_aux_get_pasid(struct iommu_domain * domain,struct device * dev)5938 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5939 {
5940 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5941 
5942 	return dmar_domain->default_pasid > 0 ?
5943 			dmar_domain->default_pasid : -EINVAL;
5944 }
5945 
intel_iommu_is_attach_deferred(struct iommu_domain * domain,struct device * dev)5946 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5947 					   struct device *dev)
5948 {
5949 	return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5950 }
5951 
5952 const struct iommu_ops intel_iommu_ops = {
5953 	.capable		= intel_iommu_capable,
5954 	.domain_alloc		= intel_iommu_domain_alloc,
5955 	.domain_free		= intel_iommu_domain_free,
5956 	.attach_dev		= intel_iommu_attach_device,
5957 	.detach_dev		= intel_iommu_detach_device,
5958 	.aux_attach_dev		= intel_iommu_aux_attach_device,
5959 	.aux_detach_dev		= intel_iommu_aux_detach_device,
5960 	.aux_get_pasid		= intel_iommu_aux_get_pasid,
5961 	.map			= intel_iommu_map,
5962 	.unmap			= intel_iommu_unmap,
5963 	.iova_to_phys		= intel_iommu_iova_to_phys,
5964 	.add_device		= intel_iommu_add_device,
5965 	.remove_device		= intel_iommu_remove_device,
5966 	.get_resv_regions	= intel_iommu_get_resv_regions,
5967 	.put_resv_regions	= intel_iommu_put_resv_regions,
5968 	.apply_resv_region	= intel_iommu_apply_resv_region,
5969 	.device_group		= pci_device_group,
5970 	.dev_has_feat		= intel_iommu_dev_has_feat,
5971 	.dev_feat_enabled	= intel_iommu_dev_feat_enabled,
5972 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
5973 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
5974 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
5975 	.pgsize_bitmap		= INTEL_IOMMU_PGSIZES,
5976 };
5977 
quirk_iommu_igfx(struct pci_dev * dev)5978 static void quirk_iommu_igfx(struct pci_dev *dev)
5979 {
5980 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5981 	dmar_map_gfx = 0;
5982 }
5983 
5984 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5985 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5986 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5987 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5988 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5989 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5992 
5993 /* Broadwell igfx malfunctions with dmar */
5994 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5995 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5996 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5997 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5998 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5999 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6000 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6001 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6002 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6003 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6004 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6005 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6006 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6007 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6008 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6009 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6010 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6011 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6012 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6013 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6014 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6015 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6016 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6017 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
6018 
quirk_iommu_rwbf(struct pci_dev * dev)6019 static void quirk_iommu_rwbf(struct pci_dev *dev)
6020 {
6021 	/*
6022 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
6023 	 * but needs it. Same seems to hold for the desktop versions.
6024 	 */
6025 	pci_info(dev, "Forcing write-buffer flush capability\n");
6026 	rwbf_quirk = 1;
6027 }
6028 
6029 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6030 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6031 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6032 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6033 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6034 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6035 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6036 
6037 #define GGC 0x52
6038 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
6039 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
6040 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
6041 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
6042 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
6043 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
6044 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
6045 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
6046 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)6047 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6048 {
6049 	unsigned short ggc;
6050 
6051 	if (pci_read_config_word(dev, GGC, &ggc))
6052 		return;
6053 
6054 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
6055 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6056 		dmar_map_gfx = 0;
6057 	} else if (dmar_map_gfx) {
6058 		/* we have to ensure the gfx device is idle before we flush */
6059 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6060 		intel_iommu_strict = 1;
6061        }
6062 }
6063 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6064 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6065 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6066 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6067 
6068 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6069    ISOCH DMAR unit for the Azalia sound device, but not give it any
6070    TLB entries, which causes it to deadlock. Check for that.  We do
6071    this in a function called from init_dmars(), instead of in a PCI
6072    quirk, because we don't want to print the obnoxious "BIOS broken"
6073    message if VT-d is actually disabled.
6074 */
check_tylersburg_isoch(void)6075 static void __init check_tylersburg_isoch(void)
6076 {
6077 	struct pci_dev *pdev;
6078 	uint32_t vtisochctrl;
6079 
6080 	/* If there's no Azalia in the system anyway, forget it. */
6081 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6082 	if (!pdev)
6083 		return;
6084 	pci_dev_put(pdev);
6085 
6086 	/* System Management Registers. Might be hidden, in which case
6087 	   we can't do the sanity check. But that's OK, because the
6088 	   known-broken BIOSes _don't_ actually hide it, so far. */
6089 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6090 	if (!pdev)
6091 		return;
6092 
6093 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6094 		pci_dev_put(pdev);
6095 		return;
6096 	}
6097 
6098 	pci_dev_put(pdev);
6099 
6100 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6101 	if (vtisochctrl & 1)
6102 		return;
6103 
6104 	/* Drop all bits other than the number of TLB entries */
6105 	vtisochctrl &= 0x1c;
6106 
6107 	/* If we have the recommended number of TLB entries (16), fine. */
6108 	if (vtisochctrl == 0x10)
6109 		return;
6110 
6111 	/* Zero TLB entries? You get to ride the short bus to school. */
6112 	if (!vtisochctrl) {
6113 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6114 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6115 		     dmi_get_system_info(DMI_BIOS_VENDOR),
6116 		     dmi_get_system_info(DMI_BIOS_VERSION),
6117 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
6118 		iommu_identity_mapping |= IDENTMAP_AZALIA;
6119 		return;
6120 	}
6121 
6122 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6123 	       vtisochctrl);
6124 }
6125