1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22 
23 #include <linux/acpi.h>
24 #include <linux/acpi_iort.h>
25 #include <linux/bitfield.h>
26 #include <linux/bitops.h>
27 #include <linux/crash_dump.h>
28 #include <linux/delay.h>
29 #include <linux/dma-iommu.h>
30 #include <linux/err.h>
31 #include <linux/interrupt.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/module.h>
35 #include <linux/msi.h>
36 #include <linux/of.h>
37 #include <linux/of_address.h>
38 #include <linux/of_iommu.h>
39 #include <linux/of_platform.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42 
43 #include <linux/amba/bus.h>
44 
45 #include "io-pgtable.h"
46 
47 /* MMIO registers */
48 #define ARM_SMMU_IDR0			0x0
49 #define IDR0_ST_LVL			GENMASK(28, 27)
50 #define IDR0_ST_LVL_2LVL		1
51 #define IDR0_STALL_MODEL		GENMASK(25, 24)
52 #define IDR0_STALL_MODEL_STALL		0
53 #define IDR0_STALL_MODEL_FORCE		2
54 #define IDR0_TTENDIAN			GENMASK(22, 21)
55 #define IDR0_TTENDIAN_MIXED		0
56 #define IDR0_TTENDIAN_LE		2
57 #define IDR0_TTENDIAN_BE		3
58 #define IDR0_CD2L			(1 << 19)
59 #define IDR0_VMID16			(1 << 18)
60 #define IDR0_PRI			(1 << 16)
61 #define IDR0_SEV			(1 << 14)
62 #define IDR0_MSI			(1 << 13)
63 #define IDR0_ASID16			(1 << 12)
64 #define IDR0_ATS			(1 << 10)
65 #define IDR0_HYP			(1 << 9)
66 #define IDR0_COHACC			(1 << 4)
67 #define IDR0_TTF			GENMASK(3, 2)
68 #define IDR0_TTF_AARCH64		2
69 #define IDR0_TTF_AARCH32_64		3
70 #define IDR0_S1P			(1 << 1)
71 #define IDR0_S2P			(1 << 0)
72 
73 #define ARM_SMMU_IDR1			0x4
74 #define IDR1_TABLES_PRESET		(1 << 30)
75 #define IDR1_QUEUES_PRESET		(1 << 29)
76 #define IDR1_REL			(1 << 28)
77 #define IDR1_CMDQS			GENMASK(25, 21)
78 #define IDR1_EVTQS			GENMASK(20, 16)
79 #define IDR1_PRIQS			GENMASK(15, 11)
80 #define IDR1_SSIDSIZE			GENMASK(10, 6)
81 #define IDR1_SIDSIZE			GENMASK(5, 0)
82 
83 #define ARM_SMMU_IDR5			0x14
84 #define IDR5_STALL_MAX			GENMASK(31, 16)
85 #define IDR5_GRAN64K			(1 << 6)
86 #define IDR5_GRAN16K			(1 << 5)
87 #define IDR5_GRAN4K			(1 << 4)
88 #define IDR5_OAS			GENMASK(2, 0)
89 #define IDR5_OAS_32_BIT			0
90 #define IDR5_OAS_36_BIT			1
91 #define IDR5_OAS_40_BIT			2
92 #define IDR5_OAS_42_BIT			3
93 #define IDR5_OAS_44_BIT			4
94 #define IDR5_OAS_48_BIT			5
95 #define IDR5_OAS_52_BIT			6
96 #define IDR5_VAX			GENMASK(11, 10)
97 #define IDR5_VAX_52_BIT			1
98 
99 #define ARM_SMMU_CR0			0x20
100 #define CR0_CMDQEN			(1 << 3)
101 #define CR0_EVTQEN			(1 << 2)
102 #define CR0_PRIQEN			(1 << 1)
103 #define CR0_SMMUEN			(1 << 0)
104 
105 #define ARM_SMMU_CR0ACK			0x24
106 
107 #define ARM_SMMU_CR1			0x28
108 #define CR1_TABLE_SH			GENMASK(11, 10)
109 #define CR1_TABLE_OC			GENMASK(9, 8)
110 #define CR1_TABLE_IC			GENMASK(7, 6)
111 #define CR1_QUEUE_SH			GENMASK(5, 4)
112 #define CR1_QUEUE_OC			GENMASK(3, 2)
113 #define CR1_QUEUE_IC			GENMASK(1, 0)
114 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
115 #define CR1_CACHE_NC			0
116 #define CR1_CACHE_WB			1
117 #define CR1_CACHE_WT			2
118 
119 #define ARM_SMMU_CR2			0x2c
120 #define CR2_PTM				(1 << 2)
121 #define CR2_RECINVSID			(1 << 1)
122 #define CR2_E2H				(1 << 0)
123 
124 #define ARM_SMMU_GBPA			0x44
125 #define GBPA_UPDATE			(1 << 31)
126 #define GBPA_ABORT			(1 << 20)
127 
128 #define ARM_SMMU_IRQ_CTRL		0x50
129 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
130 #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
131 #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
132 
133 #define ARM_SMMU_IRQ_CTRLACK		0x54
134 
135 #define ARM_SMMU_GERROR			0x60
136 #define GERROR_SFM_ERR			(1 << 8)
137 #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
138 #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
139 #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
140 #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
141 #define GERROR_PRIQ_ABT_ERR		(1 << 3)
142 #define GERROR_EVTQ_ABT_ERR		(1 << 2)
143 #define GERROR_CMDQ_ERR			(1 << 0)
144 #define GERROR_ERR_MASK			0xfd
145 
146 #define ARM_SMMU_GERRORN		0x64
147 
148 #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
149 #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
150 #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
151 
152 #define ARM_SMMU_STRTAB_BASE		0x80
153 #define STRTAB_BASE_RA			(1UL << 62)
154 #define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
155 
156 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
157 #define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
158 #define STRTAB_BASE_CFG_FMT_LINEAR	0
159 #define STRTAB_BASE_CFG_FMT_2LVL	1
160 #define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
161 #define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
162 
163 #define ARM_SMMU_CMDQ_BASE		0x90
164 #define ARM_SMMU_CMDQ_PROD		0x98
165 #define ARM_SMMU_CMDQ_CONS		0x9c
166 
167 #define ARM_SMMU_EVTQ_BASE		0xa0
168 #define ARM_SMMU_EVTQ_PROD		0x100a8
169 #define ARM_SMMU_EVTQ_CONS		0x100ac
170 #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
171 #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
172 #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
173 
174 #define ARM_SMMU_PRIQ_BASE		0xc0
175 #define ARM_SMMU_PRIQ_PROD		0x100c8
176 #define ARM_SMMU_PRIQ_CONS		0x100cc
177 #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
178 #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
179 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
180 
181 /* Common MSI config fields */
182 #define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
183 #define MSI_CFG2_SH			GENMASK(5, 4)
184 #define MSI_CFG2_MEMATTR		GENMASK(3, 0)
185 
186 /* Common memory attribute values */
187 #define ARM_SMMU_SH_NSH			0
188 #define ARM_SMMU_SH_OSH			2
189 #define ARM_SMMU_SH_ISH			3
190 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
191 #define ARM_SMMU_MEMATTR_OIWB		0xf
192 
193 #define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG			(1 << 31)
196 #define Q_OVF(q, p)			((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p)			((q)->base +			\
198 					 Q_IDX(q, p) * (q)->ent_dwords)
199 
200 #define Q_BASE_RWA			(1UL << 62)
201 #define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
202 #define Q_BASE_LOG2SIZE			GENMASK(4, 0)
203 
204 /*
205  * Stream table.
206  *
207  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
208  * 2lvl: 128k L1 entries,
209  *       256 lazy entries per table (each table covers a PCI bus)
210  */
211 #define STRTAB_L1_SZ_SHIFT		20
212 #define STRTAB_SPLIT			8
213 
214 #define STRTAB_L1_DESC_DWORDS		1
215 #define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
216 #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
217 
218 #define STRTAB_STE_DWORDS		8
219 #define STRTAB_STE_0_V			(1UL << 0)
220 #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
221 #define STRTAB_STE_0_CFG_ABORT		0
222 #define STRTAB_STE_0_CFG_BYPASS		4
223 #define STRTAB_STE_0_CFG_S1_TRANS	5
224 #define STRTAB_STE_0_CFG_S2_TRANS	6
225 
226 #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
227 #define STRTAB_STE_0_S1FMT_LINEAR	0
228 #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
229 #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
230 
231 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
232 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
233 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
234 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
235 #define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
236 #define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
237 #define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
238 
239 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
240 
241 #define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
242 #define STRTAB_STE_1_EATS_ABT		0UL
243 #define STRTAB_STE_1_EATS_TRANS		1UL
244 #define STRTAB_STE_1_EATS_S1CHK		2UL
245 
246 #define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
247 #define STRTAB_STE_1_STRW_NSEL1		0UL
248 #define STRTAB_STE_1_STRW_EL2		2UL
249 
250 #define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
251 #define STRTAB_STE_1_SHCFG_INCOMING	1UL
252 
253 #define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
254 #define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
255 #define STRTAB_STE_2_S2AA64		(1UL << 51)
256 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
257 #define STRTAB_STE_2_S2PTW		(1UL << 54)
258 #define STRTAB_STE_2_S2R		(1UL << 58)
259 
260 #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
261 
262 /* Context descriptor (stage-1 only) */
263 #define CTXDESC_CD_DWORDS		8
264 #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
265 #define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
266 #define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
267 #define ARM64_TCR_TG0			GENMASK_ULL(15, 14)
268 #define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
269 #define ARM64_TCR_IRGN0			GENMASK_ULL(9, 8)
270 #define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
271 #define ARM64_TCR_ORGN0			GENMASK_ULL(11, 10)
272 #define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
273 #define ARM64_TCR_SH0			GENMASK_ULL(13, 12)
274 #define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
275 #define ARM64_TCR_EPD0			(1ULL << 7)
276 #define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
277 #define ARM64_TCR_EPD1			(1ULL << 23)
278 
279 #define CTXDESC_CD_0_ENDI		(1UL << 15)
280 #define CTXDESC_CD_0_V			(1UL << 31)
281 
282 #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
283 #define ARM64_TCR_IPS			GENMASK_ULL(34, 32)
284 #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
285 #define ARM64_TCR_TBI0			(1ULL << 37)
286 
287 #define CTXDESC_CD_0_AA64		(1UL << 41)
288 #define CTXDESC_CD_0_S			(1UL << 44)
289 #define CTXDESC_CD_0_R			(1UL << 45)
290 #define CTXDESC_CD_0_A			(1UL << 46)
291 #define CTXDESC_CD_0_ASET		(1UL << 47)
292 #define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
293 
294 #define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
295 
296 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
297 #define ARM_SMMU_TCR2CD(tcr, fld)	FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
298 					FIELD_GET(ARM64_TCR_##fld, tcr))
299 
300 /* Command queue */
301 #define CMDQ_ENT_DWORDS			2
302 #define CMDQ_MAX_SZ_SHIFT		8
303 
304 #define CMDQ_CONS_ERR			GENMASK(30, 24)
305 #define CMDQ_ERR_CERROR_NONE_IDX	0
306 #define CMDQ_ERR_CERROR_ILL_IDX		1
307 #define CMDQ_ERR_CERROR_ABT_IDX		2
308 
309 #define CMDQ_0_OP			GENMASK_ULL(7, 0)
310 #define CMDQ_0_SSV			(1UL << 11)
311 
312 #define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
313 #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
314 #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
315 
316 #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
317 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
318 #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
319 
320 #define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
321 #define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
322 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
323 #define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
324 #define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
325 
326 #define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
327 #define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
328 #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
329 #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
330 
331 #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
332 #define CMDQ_SYNC_0_CS_NONE		0
333 #define CMDQ_SYNC_0_CS_IRQ		1
334 #define CMDQ_SYNC_0_CS_SEV		2
335 #define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
336 #define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
337 #define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
338 #define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
339 
340 /* Event queue */
341 #define EVTQ_ENT_DWORDS			4
342 #define EVTQ_MAX_SZ_SHIFT		7
343 
344 #define EVTQ_0_ID			GENMASK_ULL(7, 0)
345 
346 /* PRI queue */
347 #define PRIQ_ENT_DWORDS			2
348 #define PRIQ_MAX_SZ_SHIFT		8
349 
350 #define PRIQ_0_SID			GENMASK_ULL(31, 0)
351 #define PRIQ_0_SSID			GENMASK_ULL(51, 32)
352 #define PRIQ_0_PERM_PRIV		(1UL << 58)
353 #define PRIQ_0_PERM_EXEC		(1UL << 59)
354 #define PRIQ_0_PERM_READ		(1UL << 60)
355 #define PRIQ_0_PERM_WRITE		(1UL << 61)
356 #define PRIQ_0_PRG_LAST			(1UL << 62)
357 #define PRIQ_0_SSID_V			(1UL << 63)
358 
359 #define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
360 #define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
361 
362 /* High-level queue structures */
363 #define ARM_SMMU_POLL_TIMEOUT_US	100
364 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US	1000000 /* 1s! */
365 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT	10
366 
367 #define MSI_IOVA_BASE			0x8000000
368 #define MSI_IOVA_LENGTH			0x100000
369 
370 static bool disable_bypass = 1;
371 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
372 MODULE_PARM_DESC(disable_bypass,
373 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
374 
375 enum pri_resp {
376 	PRI_RESP_DENY = 0,
377 	PRI_RESP_FAIL = 1,
378 	PRI_RESP_SUCC = 2,
379 };
380 
381 enum arm_smmu_msi_index {
382 	EVTQ_MSI_INDEX,
383 	GERROR_MSI_INDEX,
384 	PRIQ_MSI_INDEX,
385 	ARM_SMMU_MAX_MSIS,
386 };
387 
388 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
389 	[EVTQ_MSI_INDEX] = {
390 		ARM_SMMU_EVTQ_IRQ_CFG0,
391 		ARM_SMMU_EVTQ_IRQ_CFG1,
392 		ARM_SMMU_EVTQ_IRQ_CFG2,
393 	},
394 	[GERROR_MSI_INDEX] = {
395 		ARM_SMMU_GERROR_IRQ_CFG0,
396 		ARM_SMMU_GERROR_IRQ_CFG1,
397 		ARM_SMMU_GERROR_IRQ_CFG2,
398 	},
399 	[PRIQ_MSI_INDEX] = {
400 		ARM_SMMU_PRIQ_IRQ_CFG0,
401 		ARM_SMMU_PRIQ_IRQ_CFG1,
402 		ARM_SMMU_PRIQ_IRQ_CFG2,
403 	},
404 };
405 
406 struct arm_smmu_cmdq_ent {
407 	/* Common fields */
408 	u8				opcode;
409 	bool				substream_valid;
410 
411 	/* Command-specific fields */
412 	union {
413 		#define CMDQ_OP_PREFETCH_CFG	0x1
414 		struct {
415 			u32			sid;
416 			u8			size;
417 			u64			addr;
418 		} prefetch;
419 
420 		#define CMDQ_OP_CFGI_STE	0x3
421 		#define CMDQ_OP_CFGI_ALL	0x4
422 		struct {
423 			u32			sid;
424 			union {
425 				bool		leaf;
426 				u8		span;
427 			};
428 		} cfgi;
429 
430 		#define CMDQ_OP_TLBI_NH_ASID	0x11
431 		#define CMDQ_OP_TLBI_NH_VA	0x12
432 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
433 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
434 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
435 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
436 		struct {
437 			u16			asid;
438 			u16			vmid;
439 			bool			leaf;
440 			u64			addr;
441 		} tlbi;
442 
443 		#define CMDQ_OP_PRI_RESP	0x41
444 		struct {
445 			u32			sid;
446 			u32			ssid;
447 			u16			grpid;
448 			enum pri_resp		resp;
449 		} pri;
450 
451 		#define CMDQ_OP_CMD_SYNC	0x46
452 		struct {
453 			u32			msidata;
454 			u64			msiaddr;
455 		} sync;
456 	};
457 };
458 
459 struct arm_smmu_queue {
460 	int				irq; /* Wired interrupt */
461 
462 	__le64				*base;
463 	dma_addr_t			base_dma;
464 	u64				q_base;
465 
466 	size_t				ent_dwords;
467 	u32				max_n_shift;
468 	u32				prod;
469 	u32				cons;
470 
471 	u32 __iomem			*prod_reg;
472 	u32 __iomem			*cons_reg;
473 };
474 
475 struct arm_smmu_cmdq {
476 	struct arm_smmu_queue		q;
477 	spinlock_t			lock;
478 };
479 
480 struct arm_smmu_evtq {
481 	struct arm_smmu_queue		q;
482 	u32				max_stalls;
483 };
484 
485 struct arm_smmu_priq {
486 	struct arm_smmu_queue		q;
487 };
488 
489 /* High-level stream table and context descriptor structures */
490 struct arm_smmu_strtab_l1_desc {
491 	u8				span;
492 
493 	__le64				*l2ptr;
494 	dma_addr_t			l2ptr_dma;
495 };
496 
497 struct arm_smmu_s1_cfg {
498 	__le64				*cdptr;
499 	dma_addr_t			cdptr_dma;
500 
501 	struct arm_smmu_ctx_desc {
502 		u16	asid;
503 		u64	ttbr;
504 		u64	tcr;
505 		u64	mair;
506 	}				cd;
507 };
508 
509 struct arm_smmu_s2_cfg {
510 	u16				vmid;
511 	u64				vttbr;
512 	u64				vtcr;
513 };
514 
515 struct arm_smmu_strtab_ent {
516 	/*
517 	 * An STE is "assigned" if the master emitting the corresponding SID
518 	 * is attached to a domain. The behaviour of an unassigned STE is
519 	 * determined by the disable_bypass parameter, whereas an assigned
520 	 * STE behaves according to s1_cfg/s2_cfg, which themselves are
521 	 * configured according to the domain type.
522 	 */
523 	bool				assigned;
524 	struct arm_smmu_s1_cfg		*s1_cfg;
525 	struct arm_smmu_s2_cfg		*s2_cfg;
526 };
527 
528 struct arm_smmu_strtab_cfg {
529 	__le64				*strtab;
530 	dma_addr_t			strtab_dma;
531 	struct arm_smmu_strtab_l1_desc	*l1_desc;
532 	unsigned int			num_l1_ents;
533 
534 	u64				strtab_base;
535 	u32				strtab_base_cfg;
536 };
537 
538 /* An SMMUv3 instance */
539 struct arm_smmu_device {
540 	struct device			*dev;
541 	void __iomem			*base;
542 
543 #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
544 #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
545 #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
546 #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
547 #define ARM_SMMU_FEAT_PRI		(1 << 4)
548 #define ARM_SMMU_FEAT_ATS		(1 << 5)
549 #define ARM_SMMU_FEAT_SEV		(1 << 6)
550 #define ARM_SMMU_FEAT_MSI		(1 << 7)
551 #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
552 #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
553 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
554 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
555 #define ARM_SMMU_FEAT_HYP		(1 << 12)
556 #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
557 #define ARM_SMMU_FEAT_VAX		(1 << 14)
558 	u32				features;
559 
560 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
561 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
562 	u32				options;
563 
564 	struct arm_smmu_cmdq		cmdq;
565 	struct arm_smmu_evtq		evtq;
566 	struct arm_smmu_priq		priq;
567 
568 	int				gerr_irq;
569 	int				combined_irq;
570 	atomic_t			sync_nr;
571 
572 	unsigned long			ias; /* IPA */
573 	unsigned long			oas; /* PA */
574 	unsigned long			pgsize_bitmap;
575 
576 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
577 	unsigned int			asid_bits;
578 	DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
579 
580 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
581 	unsigned int			vmid_bits;
582 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
583 
584 	unsigned int			ssid_bits;
585 	unsigned int			sid_bits;
586 
587 	struct arm_smmu_strtab_cfg	strtab_cfg;
588 
589 	u32				sync_count;
590 
591 	/* IOMMU core code handle */
592 	struct iommu_device		iommu;
593 };
594 
595 /* SMMU private data for each master */
596 struct arm_smmu_master_data {
597 	struct arm_smmu_device		*smmu;
598 	struct arm_smmu_strtab_ent	ste;
599 };
600 
601 /* SMMU private data for an IOMMU domain */
602 enum arm_smmu_domain_stage {
603 	ARM_SMMU_DOMAIN_S1 = 0,
604 	ARM_SMMU_DOMAIN_S2,
605 	ARM_SMMU_DOMAIN_NESTED,
606 	ARM_SMMU_DOMAIN_BYPASS,
607 };
608 
609 struct arm_smmu_domain {
610 	struct arm_smmu_device		*smmu;
611 	struct mutex			init_mutex; /* Protects smmu pointer */
612 
613 	struct io_pgtable_ops		*pgtbl_ops;
614 
615 	enum arm_smmu_domain_stage	stage;
616 	union {
617 		struct arm_smmu_s1_cfg	s1_cfg;
618 		struct arm_smmu_s2_cfg	s2_cfg;
619 	};
620 
621 	struct iommu_domain		domain;
622 };
623 
624 struct arm_smmu_option_prop {
625 	u32 opt;
626 	const char *prop;
627 };
628 
629 static struct arm_smmu_option_prop arm_smmu_options[] = {
630 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
631 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
632 	{ 0, NULL},
633 };
634 
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)635 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
636 						 struct arm_smmu_device *smmu)
637 {
638 	if ((offset > SZ_64K) &&
639 	    (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
640 		offset -= SZ_64K;
641 
642 	return smmu->base + offset;
643 }
644 
to_smmu_domain(struct iommu_domain * dom)645 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
646 {
647 	return container_of(dom, struct arm_smmu_domain, domain);
648 }
649 
parse_driver_options(struct arm_smmu_device * smmu)650 static void parse_driver_options(struct arm_smmu_device *smmu)
651 {
652 	int i = 0;
653 
654 	do {
655 		if (of_property_read_bool(smmu->dev->of_node,
656 						arm_smmu_options[i].prop)) {
657 			smmu->options |= arm_smmu_options[i].opt;
658 			dev_notice(smmu->dev, "option %s\n",
659 				arm_smmu_options[i].prop);
660 		}
661 	} while (arm_smmu_options[++i].opt);
662 }
663 
664 /* Low-level queue manipulation functions */
queue_full(struct arm_smmu_queue * q)665 static bool queue_full(struct arm_smmu_queue *q)
666 {
667 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
668 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
669 }
670 
queue_empty(struct arm_smmu_queue * q)671 static bool queue_empty(struct arm_smmu_queue *q)
672 {
673 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
675 }
676 
queue_sync_cons(struct arm_smmu_queue * q)677 static void queue_sync_cons(struct arm_smmu_queue *q)
678 {
679 	q->cons = readl_relaxed(q->cons_reg);
680 }
681 
queue_inc_cons(struct arm_smmu_queue * q)682 static void queue_inc_cons(struct arm_smmu_queue *q)
683 {
684 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
685 
686 	q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
687 	writel(q->cons, q->cons_reg);
688 }
689 
queue_sync_prod(struct arm_smmu_queue * q)690 static int queue_sync_prod(struct arm_smmu_queue *q)
691 {
692 	int ret = 0;
693 	u32 prod = readl_relaxed(q->prod_reg);
694 
695 	if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
696 		ret = -EOVERFLOW;
697 
698 	q->prod = prod;
699 	return ret;
700 }
701 
queue_inc_prod(struct arm_smmu_queue * q)702 static void queue_inc_prod(struct arm_smmu_queue *q)
703 {
704 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
705 
706 	q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
707 	writel(q->prod, q->prod_reg);
708 }
709 
710 /*
711  * Wait for the SMMU to consume items. If drain is true, wait until the queue
712  * is empty. Otherwise, wait until there is at least one free slot.
713  */
queue_poll_cons(struct arm_smmu_queue * q,bool sync,bool wfe)714 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
715 {
716 	ktime_t timeout;
717 	unsigned int delay = 1, spin_cnt = 0;
718 
719 	/* Wait longer if it's a CMD_SYNC */
720 	timeout = ktime_add_us(ktime_get(), sync ?
721 					    ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
722 					    ARM_SMMU_POLL_TIMEOUT_US);
723 
724 	while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
725 		if (ktime_compare(ktime_get(), timeout) > 0)
726 			return -ETIMEDOUT;
727 
728 		if (wfe) {
729 			wfe();
730 		} else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
731 			cpu_relax();
732 			continue;
733 		} else {
734 			udelay(delay);
735 			delay *= 2;
736 			spin_cnt = 0;
737 		}
738 	}
739 
740 	return 0;
741 }
742 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)743 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
744 {
745 	int i;
746 
747 	for (i = 0; i < n_dwords; ++i)
748 		*dst++ = cpu_to_le64(*src++);
749 }
750 
queue_insert_raw(struct arm_smmu_queue * q,u64 * ent)751 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
752 {
753 	if (queue_full(q))
754 		return -ENOSPC;
755 
756 	queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
757 	queue_inc_prod(q);
758 	return 0;
759 }
760 
queue_read(__le64 * dst,u64 * src,size_t n_dwords)761 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
762 {
763 	int i;
764 
765 	for (i = 0; i < n_dwords; ++i)
766 		*dst++ = le64_to_cpu(*src++);
767 }
768 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)769 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
770 {
771 	if (queue_empty(q))
772 		return -EAGAIN;
773 
774 	queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
775 	queue_inc_cons(q);
776 	return 0;
777 }
778 
779 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)780 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
781 {
782 	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
783 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
784 
785 	switch (ent->opcode) {
786 	case CMDQ_OP_TLBI_EL2_ALL:
787 	case CMDQ_OP_TLBI_NSNH_ALL:
788 		break;
789 	case CMDQ_OP_PREFETCH_CFG:
790 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
791 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
792 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
793 		break;
794 	case CMDQ_OP_CFGI_STE:
795 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
796 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
797 		break;
798 	case CMDQ_OP_CFGI_ALL:
799 		/* Cover the entire SID range */
800 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
801 		break;
802 	case CMDQ_OP_TLBI_NH_VA:
803 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
804 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
805 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
806 		break;
807 	case CMDQ_OP_TLBI_S2_IPA:
808 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
809 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
810 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
811 		break;
812 	case CMDQ_OP_TLBI_NH_ASID:
813 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
814 		/* Fallthrough */
815 	case CMDQ_OP_TLBI_S12_VMALL:
816 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
817 		break;
818 	case CMDQ_OP_PRI_RESP:
819 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
820 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
821 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
822 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
823 		switch (ent->pri.resp) {
824 		case PRI_RESP_DENY:
825 		case PRI_RESP_FAIL:
826 		case PRI_RESP_SUCC:
827 			break;
828 		default:
829 			return -EINVAL;
830 		}
831 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
832 		break;
833 	case CMDQ_OP_CMD_SYNC:
834 		if (ent->sync.msiaddr)
835 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
836 		else
837 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
838 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
839 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
840 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
841 		cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
842 		break;
843 	default:
844 		return -ENOENT;
845 	}
846 
847 	return 0;
848 }
849 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)850 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
851 {
852 	static const char *cerror_str[] = {
853 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
854 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
855 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
856 	};
857 
858 	int i;
859 	u64 cmd[CMDQ_ENT_DWORDS];
860 	struct arm_smmu_queue *q = &smmu->cmdq.q;
861 	u32 cons = readl_relaxed(q->cons_reg);
862 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
863 	struct arm_smmu_cmdq_ent cmd_sync = {
864 		.opcode = CMDQ_OP_CMD_SYNC,
865 	};
866 
867 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
868 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
869 
870 	switch (idx) {
871 	case CMDQ_ERR_CERROR_ABT_IDX:
872 		dev_err(smmu->dev, "retrying command fetch\n");
873 	case CMDQ_ERR_CERROR_NONE_IDX:
874 		return;
875 	case CMDQ_ERR_CERROR_ILL_IDX:
876 		/* Fallthrough */
877 	default:
878 		break;
879 	}
880 
881 	/*
882 	 * We may have concurrent producers, so we need to be careful
883 	 * not to touch any of the shadow cmdq state.
884 	 */
885 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
886 	dev_err(smmu->dev, "skipping command in error state:\n");
887 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
888 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
889 
890 	/* Convert the erroneous command into a CMD_SYNC */
891 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
892 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
893 		return;
894 	}
895 
896 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
897 }
898 
arm_smmu_cmdq_insert_cmd(struct arm_smmu_device * smmu,u64 * cmd)899 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
900 {
901 	struct arm_smmu_queue *q = &smmu->cmdq.q;
902 	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
903 
904 	while (queue_insert_raw(q, cmd) == -ENOSPC) {
905 		if (queue_poll_cons(q, false, wfe))
906 			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
907 	}
908 }
909 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)910 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
911 				    struct arm_smmu_cmdq_ent *ent)
912 {
913 	u64 cmd[CMDQ_ENT_DWORDS];
914 	unsigned long flags;
915 
916 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
917 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
918 			 ent->opcode);
919 		return;
920 	}
921 
922 	spin_lock_irqsave(&smmu->cmdq.lock, flags);
923 	arm_smmu_cmdq_insert_cmd(smmu, cmd);
924 	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
925 }
926 
927 /*
928  * The difference between val and sync_idx is bounded by the maximum size of
929  * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
930  */
__arm_smmu_sync_poll_msi(struct arm_smmu_device * smmu,u32 sync_idx)931 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
932 {
933 	ktime_t timeout;
934 	u32 val;
935 
936 	timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
937 	val = smp_cond_load_acquire(&smmu->sync_count,
938 				    (int)(VAL - sync_idx) >= 0 ||
939 				    !ktime_before(ktime_get(), timeout));
940 
941 	return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
942 }
943 
__arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device * smmu)944 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
945 {
946 	u64 cmd[CMDQ_ENT_DWORDS];
947 	unsigned long flags;
948 	struct arm_smmu_cmdq_ent ent = {
949 		.opcode = CMDQ_OP_CMD_SYNC,
950 		.sync	= {
951 			.msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
952 			.msiaddr = virt_to_phys(&smmu->sync_count),
953 		},
954 	};
955 
956 	arm_smmu_cmdq_build_cmd(cmd, &ent);
957 
958 	spin_lock_irqsave(&smmu->cmdq.lock, flags);
959 	arm_smmu_cmdq_insert_cmd(smmu, cmd);
960 	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
961 
962 	return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
963 }
964 
__arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)965 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
966 {
967 	u64 cmd[CMDQ_ENT_DWORDS];
968 	unsigned long flags;
969 	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
970 	struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
971 	int ret;
972 
973 	arm_smmu_cmdq_build_cmd(cmd, &ent);
974 
975 	spin_lock_irqsave(&smmu->cmdq.lock, flags);
976 	arm_smmu_cmdq_insert_cmd(smmu, cmd);
977 	ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
978 	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
979 
980 	return ret;
981 }
982 
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)983 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
984 {
985 	int ret;
986 	bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
987 		   (smmu->features & ARM_SMMU_FEAT_COHERENCY);
988 
989 	ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
990 		  : __arm_smmu_cmdq_issue_sync(smmu);
991 	if (ret)
992 		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
993 }
994 
995 /* Context descriptor manipulation functions */
arm_smmu_cpu_tcr_to_cd(u64 tcr)996 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
997 {
998 	u64 val = 0;
999 
1000 	/* Repack the TCR. Just care about TTBR0 for now */
1001 	val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1002 	val |= ARM_SMMU_TCR2CD(tcr, TG0);
1003 	val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1004 	val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1005 	val |= ARM_SMMU_TCR2CD(tcr, SH0);
1006 	val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1007 	val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1008 	val |= ARM_SMMU_TCR2CD(tcr, IPS);
1009 	val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1010 
1011 	return val;
1012 }
1013 
arm_smmu_write_ctx_desc(struct arm_smmu_device * smmu,struct arm_smmu_s1_cfg * cfg)1014 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1015 				    struct arm_smmu_s1_cfg *cfg)
1016 {
1017 	u64 val;
1018 
1019 	/*
1020 	 * We don't need to issue any invalidation here, as we'll invalidate
1021 	 * the STE when installing the new entry anyway.
1022 	 */
1023 	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1024 #ifdef __BIG_ENDIAN
1025 	      CTXDESC_CD_0_ENDI |
1026 #endif
1027 	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1028 	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1029 	      CTXDESC_CD_0_V;
1030 
1031 	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1032 	if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1033 		val |= CTXDESC_CD_0_S;
1034 
1035 	cfg->cdptr[0] = cpu_to_le64(val);
1036 
1037 	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1038 	cfg->cdptr[1] = cpu_to_le64(val);
1039 
1040 	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1041 }
1042 
1043 /* Stream table manipulation functions */
1044 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1045 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1046 {
1047 	u64 val = 0;
1048 
1049 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1050 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1051 
1052 	*dst = cpu_to_le64(val);
1053 }
1054 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1055 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1056 {
1057 	struct arm_smmu_cmdq_ent cmd = {
1058 		.opcode	= CMDQ_OP_CFGI_STE,
1059 		.cfgi	= {
1060 			.sid	= sid,
1061 			.leaf	= true,
1062 		},
1063 	};
1064 
1065 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1066 	arm_smmu_cmdq_issue_sync(smmu);
1067 }
1068 
arm_smmu_write_strtab_ent(struct arm_smmu_device * smmu,u32 sid,__le64 * dst,struct arm_smmu_strtab_ent * ste)1069 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1070 				      __le64 *dst, struct arm_smmu_strtab_ent *ste)
1071 {
1072 	/*
1073 	 * This is hideously complicated, but we only really care about
1074 	 * three cases at the moment:
1075 	 *
1076 	 * 1. Invalid (all zero) -> bypass/fault (init)
1077 	 * 2. Bypass/fault -> translation/bypass (attach)
1078 	 * 3. Translation/bypass -> bypass/fault (detach)
1079 	 *
1080 	 * Given that we can't update the STE atomically and the SMMU
1081 	 * doesn't read the thing in a defined order, that leaves us
1082 	 * with the following maintenance requirements:
1083 	 *
1084 	 * 1. Update Config, return (init time STEs aren't live)
1085 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1086 	 * 3. Update Config, sync
1087 	 */
1088 	u64 val = le64_to_cpu(dst[0]);
1089 	bool ste_live = false;
1090 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1091 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1092 		.prefetch	= {
1093 			.sid	= sid,
1094 		},
1095 	};
1096 
1097 	if (val & STRTAB_STE_0_V) {
1098 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1099 		case STRTAB_STE_0_CFG_BYPASS:
1100 			break;
1101 		case STRTAB_STE_0_CFG_S1_TRANS:
1102 		case STRTAB_STE_0_CFG_S2_TRANS:
1103 			ste_live = true;
1104 			break;
1105 		case STRTAB_STE_0_CFG_ABORT:
1106 			if (disable_bypass)
1107 				break;
1108 		default:
1109 			BUG(); /* STE corruption */
1110 		}
1111 	}
1112 
1113 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1114 	val = STRTAB_STE_0_V;
1115 
1116 	/* Bypass/fault */
1117 	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1118 		if (!ste->assigned && disable_bypass)
1119 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1120 		else
1121 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1122 
1123 		dst[0] = cpu_to_le64(val);
1124 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1125 						STRTAB_STE_1_SHCFG_INCOMING));
1126 		dst[2] = 0; /* Nuke the VMID */
1127 		/*
1128 		 * The SMMU can perform negative caching, so we must sync
1129 		 * the STE regardless of whether the old value was live.
1130 		 */
1131 		if (smmu)
1132 			arm_smmu_sync_ste_for_sid(smmu, sid);
1133 		return;
1134 	}
1135 
1136 	if (ste->s1_cfg) {
1137 		BUG_ON(ste_live);
1138 		dst[1] = cpu_to_le64(
1139 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1140 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1141 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1142 #ifdef CONFIG_PCI_ATS
1143 			 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1144 #endif
1145 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1146 
1147 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1148 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1149 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1150 
1151 		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1152 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1153 	}
1154 
1155 	if (ste->s2_cfg) {
1156 		BUG_ON(ste_live);
1157 		dst[2] = cpu_to_le64(
1158 			 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1159 			 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1160 #ifdef __BIG_ENDIAN
1161 			 STRTAB_STE_2_S2ENDI |
1162 #endif
1163 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1164 			 STRTAB_STE_2_S2R);
1165 
1166 		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1167 
1168 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1169 	}
1170 
1171 	arm_smmu_sync_ste_for_sid(smmu, sid);
1172 	dst[0] = cpu_to_le64(val);
1173 	arm_smmu_sync_ste_for_sid(smmu, sid);
1174 
1175 	/* It's likely that we'll want to use the new STE soon */
1176 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1177 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1178 }
1179 
arm_smmu_init_bypass_stes(u64 * strtab,unsigned int nent)1180 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1181 {
1182 	unsigned int i;
1183 	struct arm_smmu_strtab_ent ste = { .assigned = false };
1184 
1185 	for (i = 0; i < nent; ++i) {
1186 		arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1187 		strtab += STRTAB_STE_DWORDS;
1188 	}
1189 }
1190 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1191 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1192 {
1193 	size_t size;
1194 	void *strtab;
1195 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1196 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1197 
1198 	if (desc->l2ptr)
1199 		return 0;
1200 
1201 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1202 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1203 
1204 	desc->span = STRTAB_SPLIT + 1;
1205 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1206 					  GFP_KERNEL | __GFP_ZERO);
1207 	if (!desc->l2ptr) {
1208 		dev_err(smmu->dev,
1209 			"failed to allocate l2 stream table for SID %u\n",
1210 			sid);
1211 		return -ENOMEM;
1212 	}
1213 
1214 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1215 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1216 	return 0;
1217 }
1218 
1219 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1220 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1221 {
1222 	int i;
1223 	struct arm_smmu_device *smmu = dev;
1224 	struct arm_smmu_queue *q = &smmu->evtq.q;
1225 	u64 evt[EVTQ_ENT_DWORDS];
1226 
1227 	do {
1228 		while (!queue_remove_raw(q, evt)) {
1229 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1230 
1231 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1232 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1233 				dev_info(smmu->dev, "\t0x%016llx\n",
1234 					 (unsigned long long)evt[i]);
1235 
1236 		}
1237 
1238 		/*
1239 		 * Not much we can do on overflow, so scream and pretend we're
1240 		 * trying harder.
1241 		 */
1242 		if (queue_sync_prod(q) == -EOVERFLOW)
1243 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1244 	} while (!queue_empty(q));
1245 
1246 	/* Sync our overflow flag, as we believe we're up to speed */
1247 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1248 	return IRQ_HANDLED;
1249 }
1250 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1251 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1252 {
1253 	u32 sid, ssid;
1254 	u16 grpid;
1255 	bool ssv, last;
1256 
1257 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1258 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1259 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1260 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1261 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1262 
1263 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1264 	dev_info(smmu->dev,
1265 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1266 		 sid, ssid, grpid, last ? "L" : "",
1267 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1268 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1269 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1270 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1271 		 evt[1] & PRIQ_1_ADDR_MASK);
1272 
1273 	if (last) {
1274 		struct arm_smmu_cmdq_ent cmd = {
1275 			.opcode			= CMDQ_OP_PRI_RESP,
1276 			.substream_valid	= ssv,
1277 			.pri			= {
1278 				.sid	= sid,
1279 				.ssid	= ssid,
1280 				.grpid	= grpid,
1281 				.resp	= PRI_RESP_DENY,
1282 			},
1283 		};
1284 
1285 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1286 	}
1287 }
1288 
arm_smmu_priq_thread(int irq,void * dev)1289 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1290 {
1291 	struct arm_smmu_device *smmu = dev;
1292 	struct arm_smmu_queue *q = &smmu->priq.q;
1293 	u64 evt[PRIQ_ENT_DWORDS];
1294 
1295 	do {
1296 		while (!queue_remove_raw(q, evt))
1297 			arm_smmu_handle_ppr(smmu, evt);
1298 
1299 		if (queue_sync_prod(q) == -EOVERFLOW)
1300 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1301 	} while (!queue_empty(q));
1302 
1303 	/* Sync our overflow flag, as we believe we're up to speed */
1304 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1305 	writel(q->cons, q->cons_reg);
1306 	return IRQ_HANDLED;
1307 }
1308 
1309 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1310 
arm_smmu_gerror_handler(int irq,void * dev)1311 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1312 {
1313 	u32 gerror, gerrorn, active;
1314 	struct arm_smmu_device *smmu = dev;
1315 
1316 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1317 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1318 
1319 	active = gerror ^ gerrorn;
1320 	if (!(active & GERROR_ERR_MASK))
1321 		return IRQ_NONE; /* No errors pending */
1322 
1323 	dev_warn(smmu->dev,
1324 		 "unexpected global error reported (0x%08x), this could be serious\n",
1325 		 active);
1326 
1327 	if (active & GERROR_SFM_ERR) {
1328 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1329 		arm_smmu_device_disable(smmu);
1330 	}
1331 
1332 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1333 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1334 
1335 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1336 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1337 
1338 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1339 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1340 
1341 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1342 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1343 
1344 	if (active & GERROR_PRIQ_ABT_ERR)
1345 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1346 
1347 	if (active & GERROR_EVTQ_ABT_ERR)
1348 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1349 
1350 	if (active & GERROR_CMDQ_ERR)
1351 		arm_smmu_cmdq_skip_err(smmu);
1352 
1353 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1354 	return IRQ_HANDLED;
1355 }
1356 
arm_smmu_combined_irq_thread(int irq,void * dev)1357 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1358 {
1359 	struct arm_smmu_device *smmu = dev;
1360 
1361 	arm_smmu_evtq_thread(irq, dev);
1362 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1363 		arm_smmu_priq_thread(irq, dev);
1364 
1365 	return IRQ_HANDLED;
1366 }
1367 
arm_smmu_combined_irq_handler(int irq,void * dev)1368 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1369 {
1370 	arm_smmu_gerror_handler(irq, dev);
1371 	return IRQ_WAKE_THREAD;
1372 }
1373 
1374 /* IO_PGTABLE API */
__arm_smmu_tlb_sync(struct arm_smmu_device * smmu)1375 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1376 {
1377 	arm_smmu_cmdq_issue_sync(smmu);
1378 }
1379 
arm_smmu_tlb_sync(void * cookie)1380 static void arm_smmu_tlb_sync(void *cookie)
1381 {
1382 	struct arm_smmu_domain *smmu_domain = cookie;
1383 	__arm_smmu_tlb_sync(smmu_domain->smmu);
1384 }
1385 
arm_smmu_tlb_inv_context(void * cookie)1386 static void arm_smmu_tlb_inv_context(void *cookie)
1387 {
1388 	struct arm_smmu_domain *smmu_domain = cookie;
1389 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1390 	struct arm_smmu_cmdq_ent cmd;
1391 
1392 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1393 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
1394 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1395 		cmd.tlbi.vmid	= 0;
1396 	} else {
1397 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1398 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1399 	}
1400 
1401 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1402 	__arm_smmu_tlb_sync(smmu);
1403 }
1404 
arm_smmu_tlb_inv_range_nosync(unsigned long iova,size_t size,size_t granule,bool leaf,void * cookie)1405 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1406 					  size_t granule, bool leaf, void *cookie)
1407 {
1408 	struct arm_smmu_domain *smmu_domain = cookie;
1409 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1410 	struct arm_smmu_cmdq_ent cmd = {
1411 		.tlbi = {
1412 			.leaf	= leaf,
1413 			.addr	= iova,
1414 		},
1415 	};
1416 
1417 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1418 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1419 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1420 	} else {
1421 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1422 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1423 	}
1424 
1425 	do {
1426 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1427 		cmd.tlbi.addr += granule;
1428 	} while (size -= granule);
1429 }
1430 
1431 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1432 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1433 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
1434 	.tlb_sync	= arm_smmu_tlb_sync,
1435 };
1436 
1437 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1438 static bool arm_smmu_capable(enum iommu_cap cap)
1439 {
1440 	switch (cap) {
1441 	case IOMMU_CAP_CACHE_COHERENCY:
1442 		return true;
1443 	case IOMMU_CAP_NOEXEC:
1444 		return true;
1445 	default:
1446 		return false;
1447 	}
1448 }
1449 
arm_smmu_domain_alloc(unsigned type)1450 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1451 {
1452 	struct arm_smmu_domain *smmu_domain;
1453 
1454 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1455 	    type != IOMMU_DOMAIN_DMA &&
1456 	    type != IOMMU_DOMAIN_IDENTITY)
1457 		return NULL;
1458 
1459 	/*
1460 	 * Allocate the domain and initialise some of its data structures.
1461 	 * We can't really do anything meaningful until we've added a
1462 	 * master.
1463 	 */
1464 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1465 	if (!smmu_domain)
1466 		return NULL;
1467 
1468 	if (type == IOMMU_DOMAIN_DMA &&
1469 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1470 		kfree(smmu_domain);
1471 		return NULL;
1472 	}
1473 
1474 	mutex_init(&smmu_domain->init_mutex);
1475 	return &smmu_domain->domain;
1476 }
1477 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1478 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1479 {
1480 	int idx, size = 1 << span;
1481 
1482 	do {
1483 		idx = find_first_zero_bit(map, size);
1484 		if (idx == size)
1485 			return -ENOSPC;
1486 	} while (test_and_set_bit(idx, map));
1487 
1488 	return idx;
1489 }
1490 
arm_smmu_bitmap_free(unsigned long * map,int idx)1491 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1492 {
1493 	clear_bit(idx, map);
1494 }
1495 
arm_smmu_domain_free(struct iommu_domain * domain)1496 static void arm_smmu_domain_free(struct iommu_domain *domain)
1497 {
1498 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1499 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1500 
1501 	iommu_put_dma_cookie(domain);
1502 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1503 
1504 	/* Free the CD and ASID, if we allocated them */
1505 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1506 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1507 
1508 		if (cfg->cdptr) {
1509 			dmam_free_coherent(smmu_domain->smmu->dev,
1510 					   CTXDESC_CD_DWORDS << 3,
1511 					   cfg->cdptr,
1512 					   cfg->cdptr_dma);
1513 
1514 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1515 		}
1516 	} else {
1517 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1518 		if (cfg->vmid)
1519 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1520 	}
1521 
1522 	kfree(smmu_domain);
1523 }
1524 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1525 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1526 				       struct io_pgtable_cfg *pgtbl_cfg)
1527 {
1528 	int ret;
1529 	int asid;
1530 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1531 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1532 
1533 	asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1534 	if (asid < 0)
1535 		return asid;
1536 
1537 	cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1538 					 &cfg->cdptr_dma,
1539 					 GFP_KERNEL | __GFP_ZERO);
1540 	if (!cfg->cdptr) {
1541 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1542 		ret = -ENOMEM;
1543 		goto out_free_asid;
1544 	}
1545 
1546 	cfg->cd.asid	= (u16)asid;
1547 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1548 	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1549 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1550 	return 0;
1551 
1552 out_free_asid:
1553 	arm_smmu_bitmap_free(smmu->asid_map, asid);
1554 	return ret;
1555 }
1556 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1557 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1558 				       struct io_pgtable_cfg *pgtbl_cfg)
1559 {
1560 	int vmid;
1561 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1562 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1563 
1564 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1565 	if (vmid < 0)
1566 		return vmid;
1567 
1568 	cfg->vmid	= (u16)vmid;
1569 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1570 	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1571 	return 0;
1572 }
1573 
arm_smmu_domain_finalise(struct iommu_domain * domain)1574 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1575 {
1576 	int ret;
1577 	unsigned long ias, oas;
1578 	enum io_pgtable_fmt fmt;
1579 	struct io_pgtable_cfg pgtbl_cfg;
1580 	struct io_pgtable_ops *pgtbl_ops;
1581 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1582 				 struct io_pgtable_cfg *);
1583 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1584 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1585 
1586 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1587 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1588 		return 0;
1589 	}
1590 
1591 	/* Restrict the stage to what we can actually support */
1592 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1593 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1594 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1595 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1596 
1597 	switch (smmu_domain->stage) {
1598 	case ARM_SMMU_DOMAIN_S1:
1599 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1600 		ias = min_t(unsigned long, ias, VA_BITS);
1601 		oas = smmu->ias;
1602 		fmt = ARM_64_LPAE_S1;
1603 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1604 		break;
1605 	case ARM_SMMU_DOMAIN_NESTED:
1606 	case ARM_SMMU_DOMAIN_S2:
1607 		ias = smmu->ias;
1608 		oas = smmu->oas;
1609 		fmt = ARM_64_LPAE_S2;
1610 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1611 		break;
1612 	default:
1613 		return -EINVAL;
1614 	}
1615 
1616 	pgtbl_cfg = (struct io_pgtable_cfg) {
1617 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1618 		.ias		= ias,
1619 		.oas		= oas,
1620 		.tlb		= &arm_smmu_gather_ops,
1621 		.iommu_dev	= smmu->dev,
1622 	};
1623 
1624 	if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1625 		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1626 
1627 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1628 	if (!pgtbl_ops)
1629 		return -ENOMEM;
1630 
1631 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1632 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1633 	domain->geometry.force_aperture = true;
1634 
1635 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1636 	if (ret < 0) {
1637 		free_io_pgtable_ops(pgtbl_ops);
1638 		return ret;
1639 	}
1640 
1641 	smmu_domain->pgtbl_ops = pgtbl_ops;
1642 	return 0;
1643 }
1644 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1645 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1646 {
1647 	__le64 *step;
1648 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1649 
1650 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1651 		struct arm_smmu_strtab_l1_desc *l1_desc;
1652 		int idx;
1653 
1654 		/* Two-level walk */
1655 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1656 		l1_desc = &cfg->l1_desc[idx];
1657 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1658 		step = &l1_desc->l2ptr[idx];
1659 	} else {
1660 		/* Simple linear lookup */
1661 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1662 	}
1663 
1664 	return step;
1665 }
1666 
arm_smmu_install_ste_for_dev(struct iommu_fwspec * fwspec)1667 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1668 {
1669 	int i, j;
1670 	struct arm_smmu_master_data *master = fwspec->iommu_priv;
1671 	struct arm_smmu_device *smmu = master->smmu;
1672 
1673 	for (i = 0; i < fwspec->num_ids; ++i) {
1674 		u32 sid = fwspec->ids[i];
1675 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1676 
1677 		/* Bridged PCI devices may end up with duplicated IDs */
1678 		for (j = 0; j < i; j++)
1679 			if (fwspec->ids[j] == sid)
1680 				break;
1681 		if (j < i)
1682 			continue;
1683 
1684 		arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1685 	}
1686 }
1687 
arm_smmu_detach_dev(struct device * dev)1688 static void arm_smmu_detach_dev(struct device *dev)
1689 {
1690 	struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1691 
1692 	master->ste.assigned = false;
1693 	arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1694 }
1695 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)1696 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1697 {
1698 	int ret = 0;
1699 	struct arm_smmu_device *smmu;
1700 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1701 	struct arm_smmu_master_data *master;
1702 	struct arm_smmu_strtab_ent *ste;
1703 
1704 	if (!dev->iommu_fwspec)
1705 		return -ENOENT;
1706 
1707 	master = dev->iommu_fwspec->iommu_priv;
1708 	smmu = master->smmu;
1709 	ste = &master->ste;
1710 
1711 	/* Already attached to a different domain? */
1712 	if (ste->assigned)
1713 		arm_smmu_detach_dev(dev);
1714 
1715 	mutex_lock(&smmu_domain->init_mutex);
1716 
1717 	if (!smmu_domain->smmu) {
1718 		smmu_domain->smmu = smmu;
1719 		ret = arm_smmu_domain_finalise(domain);
1720 		if (ret) {
1721 			smmu_domain->smmu = NULL;
1722 			goto out_unlock;
1723 		}
1724 	} else if (smmu_domain->smmu != smmu) {
1725 		dev_err(dev,
1726 			"cannot attach to SMMU %s (upstream of %s)\n",
1727 			dev_name(smmu_domain->smmu->dev),
1728 			dev_name(smmu->dev));
1729 		ret = -ENXIO;
1730 		goto out_unlock;
1731 	}
1732 
1733 	ste->assigned = true;
1734 
1735 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1736 		ste->s1_cfg = NULL;
1737 		ste->s2_cfg = NULL;
1738 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1739 		ste->s1_cfg = &smmu_domain->s1_cfg;
1740 		ste->s2_cfg = NULL;
1741 		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1742 	} else {
1743 		ste->s1_cfg = NULL;
1744 		ste->s2_cfg = &smmu_domain->s2_cfg;
1745 	}
1746 
1747 	arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1748 out_unlock:
1749 	mutex_unlock(&smmu_domain->init_mutex);
1750 	return ret;
1751 }
1752 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot)1753 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1754 			phys_addr_t paddr, size_t size, int prot)
1755 {
1756 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1757 
1758 	if (!ops)
1759 		return -ENODEV;
1760 
1761 	return ops->map(ops, iova, paddr, size, prot);
1762 }
1763 
1764 static size_t
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)1765 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1766 {
1767 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1768 
1769 	if (!ops)
1770 		return 0;
1771 
1772 	return ops->unmap(ops, iova, size);
1773 }
1774 
arm_smmu_iotlb_sync(struct iommu_domain * domain)1775 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1776 {
1777 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1778 
1779 	if (smmu)
1780 		__arm_smmu_tlb_sync(smmu);
1781 }
1782 
1783 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)1784 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1785 {
1786 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1787 
1788 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
1789 		return iova;
1790 
1791 	if (!ops)
1792 		return 0;
1793 
1794 	return ops->iova_to_phys(ops, iova);
1795 }
1796 
1797 static struct platform_driver arm_smmu_driver;
1798 
arm_smmu_match_node(struct device * dev,void * data)1799 static int arm_smmu_match_node(struct device *dev, void *data)
1800 {
1801 	return dev->fwnode == data;
1802 }
1803 
1804 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)1805 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1806 {
1807 	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1808 						fwnode, arm_smmu_match_node);
1809 	put_device(dev);
1810 	return dev ? dev_get_drvdata(dev) : NULL;
1811 }
1812 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)1813 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1814 {
1815 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1816 
1817 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1818 		limit *= 1UL << STRTAB_SPLIT;
1819 
1820 	return sid < limit;
1821 }
1822 
1823 static struct iommu_ops arm_smmu_ops;
1824 
arm_smmu_add_device(struct device * dev)1825 static int arm_smmu_add_device(struct device *dev)
1826 {
1827 	int i, ret;
1828 	struct arm_smmu_device *smmu;
1829 	struct arm_smmu_master_data *master;
1830 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1831 	struct iommu_group *group;
1832 
1833 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1834 		return -ENODEV;
1835 	/*
1836 	 * We _can_ actually withstand dodgy bus code re-calling add_device()
1837 	 * without an intervening remove_device()/of_xlate() sequence, but
1838 	 * we're not going to do so quietly...
1839 	 */
1840 	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1841 		master = fwspec->iommu_priv;
1842 		smmu = master->smmu;
1843 	} else {
1844 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1845 		if (!smmu)
1846 			return -ENODEV;
1847 		master = kzalloc(sizeof(*master), GFP_KERNEL);
1848 		if (!master)
1849 			return -ENOMEM;
1850 
1851 		master->smmu = smmu;
1852 		fwspec->iommu_priv = master;
1853 	}
1854 
1855 	/* Check the SIDs are in range of the SMMU and our stream table */
1856 	for (i = 0; i < fwspec->num_ids; i++) {
1857 		u32 sid = fwspec->ids[i];
1858 
1859 		if (!arm_smmu_sid_in_range(smmu, sid))
1860 			return -ERANGE;
1861 
1862 		/* Ensure l2 strtab is initialised */
1863 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1864 			ret = arm_smmu_init_l2_strtab(smmu, sid);
1865 			if (ret)
1866 				return ret;
1867 		}
1868 	}
1869 
1870 	group = iommu_group_get_for_dev(dev);
1871 	if (!IS_ERR(group)) {
1872 		iommu_group_put(group);
1873 		iommu_device_link(&smmu->iommu, dev);
1874 	}
1875 
1876 	return PTR_ERR_OR_ZERO(group);
1877 }
1878 
arm_smmu_remove_device(struct device * dev)1879 static void arm_smmu_remove_device(struct device *dev)
1880 {
1881 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1882 	struct arm_smmu_master_data *master;
1883 	struct arm_smmu_device *smmu;
1884 
1885 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1886 		return;
1887 
1888 	master = fwspec->iommu_priv;
1889 	smmu = master->smmu;
1890 	if (master && master->ste.assigned)
1891 		arm_smmu_detach_dev(dev);
1892 	iommu_group_remove_device(dev);
1893 	iommu_device_unlink(&smmu->iommu, dev);
1894 	kfree(master);
1895 	iommu_fwspec_free(dev);
1896 }
1897 
arm_smmu_device_group(struct device * dev)1898 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1899 {
1900 	struct iommu_group *group;
1901 
1902 	/*
1903 	 * We don't support devices sharing stream IDs other than PCI RID
1904 	 * aliases, since the necessary ID-to-device lookup becomes rather
1905 	 * impractical given a potential sparse 32-bit stream ID space.
1906 	 */
1907 	if (dev_is_pci(dev))
1908 		group = pci_device_group(dev);
1909 	else
1910 		group = generic_device_group(dev);
1911 
1912 	return group;
1913 }
1914 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1915 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1916 				    enum iommu_attr attr, void *data)
1917 {
1918 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1919 
1920 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1921 		return -EINVAL;
1922 
1923 	switch (attr) {
1924 	case DOMAIN_ATTR_NESTING:
1925 		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1926 		return 0;
1927 	default:
1928 		return -ENODEV;
1929 	}
1930 }
1931 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1932 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1933 				    enum iommu_attr attr, void *data)
1934 {
1935 	int ret = 0;
1936 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1937 
1938 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1939 		return -EINVAL;
1940 
1941 	mutex_lock(&smmu_domain->init_mutex);
1942 
1943 	switch (attr) {
1944 	case DOMAIN_ATTR_NESTING:
1945 		if (smmu_domain->smmu) {
1946 			ret = -EPERM;
1947 			goto out_unlock;
1948 		}
1949 
1950 		if (*(int *)data)
1951 			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1952 		else
1953 			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1954 
1955 		break;
1956 	default:
1957 		ret = -ENODEV;
1958 	}
1959 
1960 out_unlock:
1961 	mutex_unlock(&smmu_domain->init_mutex);
1962 	return ret;
1963 }
1964 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)1965 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1966 {
1967 	return iommu_fwspec_add_ids(dev, args->args, 1);
1968 }
1969 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)1970 static void arm_smmu_get_resv_regions(struct device *dev,
1971 				      struct list_head *head)
1972 {
1973 	struct iommu_resv_region *region;
1974 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1975 
1976 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1977 					 prot, IOMMU_RESV_SW_MSI);
1978 	if (!region)
1979 		return;
1980 
1981 	list_add_tail(&region->list, head);
1982 
1983 	iommu_dma_get_resv_regions(dev, head);
1984 }
1985 
arm_smmu_put_resv_regions(struct device * dev,struct list_head * head)1986 static void arm_smmu_put_resv_regions(struct device *dev,
1987 				      struct list_head *head)
1988 {
1989 	struct iommu_resv_region *entry, *next;
1990 
1991 	list_for_each_entry_safe(entry, next, head, list)
1992 		kfree(entry);
1993 }
1994 
1995 static struct iommu_ops arm_smmu_ops = {
1996 	.capable		= arm_smmu_capable,
1997 	.domain_alloc		= arm_smmu_domain_alloc,
1998 	.domain_free		= arm_smmu_domain_free,
1999 	.attach_dev		= arm_smmu_attach_dev,
2000 	.map			= arm_smmu_map,
2001 	.unmap			= arm_smmu_unmap,
2002 	.flush_iotlb_all	= arm_smmu_iotlb_sync,
2003 	.iotlb_sync		= arm_smmu_iotlb_sync,
2004 	.iova_to_phys		= arm_smmu_iova_to_phys,
2005 	.add_device		= arm_smmu_add_device,
2006 	.remove_device		= arm_smmu_remove_device,
2007 	.device_group		= arm_smmu_device_group,
2008 	.domain_get_attr	= arm_smmu_domain_get_attr,
2009 	.domain_set_attr	= arm_smmu_domain_set_attr,
2010 	.of_xlate		= arm_smmu_of_xlate,
2011 	.get_resv_regions	= arm_smmu_get_resv_regions,
2012 	.put_resv_regions	= arm_smmu_put_resv_regions,
2013 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2014 };
2015 
2016 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords)2017 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2018 				   struct arm_smmu_queue *q,
2019 				   unsigned long prod_off,
2020 				   unsigned long cons_off,
2021 				   size_t dwords)
2022 {
2023 	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2024 
2025 	q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2026 	if (!q->base) {
2027 		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2028 			qsz);
2029 		return -ENOMEM;
2030 	}
2031 
2032 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2033 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2034 	q->ent_dwords	= dwords;
2035 
2036 	q->q_base  = Q_BASE_RWA;
2037 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2038 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2039 
2040 	q->prod = q->cons = 0;
2041 	return 0;
2042 }
2043 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2044 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2045 {
2046 	int ret;
2047 
2048 	/* cmdq */
2049 	spin_lock_init(&smmu->cmdq.lock);
2050 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2051 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2052 	if (ret)
2053 		return ret;
2054 
2055 	/* evtq */
2056 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2057 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2058 	if (ret)
2059 		return ret;
2060 
2061 	/* priq */
2062 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2063 		return 0;
2064 
2065 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2066 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2067 }
2068 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2069 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2070 {
2071 	unsigned int i;
2072 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2073 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2074 	void *strtab = smmu->strtab_cfg.strtab;
2075 
2076 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2077 	if (!cfg->l1_desc) {
2078 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2079 		return -ENOMEM;
2080 	}
2081 
2082 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2083 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2084 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2085 	}
2086 
2087 	return 0;
2088 }
2089 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2090 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2091 {
2092 	void *strtab;
2093 	u64 reg;
2094 	u32 size, l1size;
2095 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2096 
2097 	/* Calculate the L1 size, capped to the SIDSIZE. */
2098 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2099 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2100 	cfg->num_l1_ents = 1 << size;
2101 
2102 	size += STRTAB_SPLIT;
2103 	if (size < smmu->sid_bits)
2104 		dev_warn(smmu->dev,
2105 			 "2-level strtab only covers %u/%u bits of SID\n",
2106 			 size, smmu->sid_bits);
2107 
2108 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2109 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2110 				     GFP_KERNEL | __GFP_ZERO);
2111 	if (!strtab) {
2112 		dev_err(smmu->dev,
2113 			"failed to allocate l1 stream table (%u bytes)\n",
2114 			size);
2115 		return -ENOMEM;
2116 	}
2117 	cfg->strtab = strtab;
2118 
2119 	/* Configure strtab_base_cfg for 2 levels */
2120 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2121 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2122 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2123 	cfg->strtab_base_cfg = reg;
2124 
2125 	return arm_smmu_init_l1_strtab(smmu);
2126 }
2127 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2128 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2129 {
2130 	void *strtab;
2131 	u64 reg;
2132 	u32 size;
2133 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2134 
2135 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2136 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2137 				     GFP_KERNEL | __GFP_ZERO);
2138 	if (!strtab) {
2139 		dev_err(smmu->dev,
2140 			"failed to allocate linear stream table (%u bytes)\n",
2141 			size);
2142 		return -ENOMEM;
2143 	}
2144 	cfg->strtab = strtab;
2145 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2146 
2147 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2148 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2149 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2150 	cfg->strtab_base_cfg = reg;
2151 
2152 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2153 	return 0;
2154 }
2155 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2156 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2157 {
2158 	u64 reg;
2159 	int ret;
2160 
2161 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2162 		ret = arm_smmu_init_strtab_2lvl(smmu);
2163 	else
2164 		ret = arm_smmu_init_strtab_linear(smmu);
2165 
2166 	if (ret)
2167 		return ret;
2168 
2169 	/* Set the strtab base address */
2170 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2171 	reg |= STRTAB_BASE_RA;
2172 	smmu->strtab_cfg.strtab_base = reg;
2173 
2174 	/* Allocate the first VMID for stage-2 bypass STEs */
2175 	set_bit(0, smmu->vmid_map);
2176 	return 0;
2177 }
2178 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2179 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2180 {
2181 	int ret;
2182 
2183 	atomic_set(&smmu->sync_nr, 0);
2184 	ret = arm_smmu_init_queues(smmu);
2185 	if (ret)
2186 		return ret;
2187 
2188 	return arm_smmu_init_strtab(smmu);
2189 }
2190 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2191 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2192 				   unsigned int reg_off, unsigned int ack_off)
2193 {
2194 	u32 reg;
2195 
2196 	writel_relaxed(val, smmu->base + reg_off);
2197 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2198 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2199 }
2200 
2201 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2202 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2203 {
2204 	int ret;
2205 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2206 
2207 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2208 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2209 	if (ret)
2210 		return ret;
2211 
2212 	reg &= ~clr;
2213 	reg |= set;
2214 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2215 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2216 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2217 
2218 	if (ret)
2219 		dev_err(smmu->dev, "GBPA not responding to update\n");
2220 	return ret;
2221 }
2222 
arm_smmu_free_msis(void * data)2223 static void arm_smmu_free_msis(void *data)
2224 {
2225 	struct device *dev = data;
2226 	platform_msi_domain_free_irqs(dev);
2227 }
2228 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2229 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2230 {
2231 	phys_addr_t doorbell;
2232 	struct device *dev = msi_desc_to_dev(desc);
2233 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2234 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2235 
2236 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2237 	doorbell &= MSI_CFG0_ADDR_MASK;
2238 
2239 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2240 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2241 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2242 }
2243 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2244 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2245 {
2246 	struct msi_desc *desc;
2247 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2248 	struct device *dev = smmu->dev;
2249 
2250 	/* Clear the MSI address regs */
2251 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2252 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2253 
2254 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2255 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2256 	else
2257 		nvec--;
2258 
2259 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2260 		return;
2261 
2262 	if (!dev->msi_domain) {
2263 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2264 		return;
2265 	}
2266 
2267 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2268 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2269 	if (ret) {
2270 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2271 		return;
2272 	}
2273 
2274 	for_each_msi_entry(desc, dev) {
2275 		switch (desc->platform.msi_index) {
2276 		case EVTQ_MSI_INDEX:
2277 			smmu->evtq.q.irq = desc->irq;
2278 			break;
2279 		case GERROR_MSI_INDEX:
2280 			smmu->gerr_irq = desc->irq;
2281 			break;
2282 		case PRIQ_MSI_INDEX:
2283 			smmu->priq.q.irq = desc->irq;
2284 			break;
2285 		default:	/* Unknown */
2286 			continue;
2287 		}
2288 	}
2289 
2290 	/* Add callback to free MSIs on teardown */
2291 	devm_add_action(dev, arm_smmu_free_msis, dev);
2292 }
2293 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2294 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2295 {
2296 	int irq, ret;
2297 
2298 	arm_smmu_setup_msis(smmu);
2299 
2300 	/* Request interrupt lines */
2301 	irq = smmu->evtq.q.irq;
2302 	if (irq) {
2303 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2304 						arm_smmu_evtq_thread,
2305 						IRQF_ONESHOT,
2306 						"arm-smmu-v3-evtq", smmu);
2307 		if (ret < 0)
2308 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2309 	} else {
2310 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2311 	}
2312 
2313 	irq = smmu->gerr_irq;
2314 	if (irq) {
2315 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2316 				       0, "arm-smmu-v3-gerror", smmu);
2317 		if (ret < 0)
2318 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2319 	} else {
2320 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2321 	}
2322 
2323 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2324 		irq = smmu->priq.q.irq;
2325 		if (irq) {
2326 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2327 							arm_smmu_priq_thread,
2328 							IRQF_ONESHOT,
2329 							"arm-smmu-v3-priq",
2330 							smmu);
2331 			if (ret < 0)
2332 				dev_warn(smmu->dev,
2333 					 "failed to enable priq irq\n");
2334 		} else {
2335 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2336 		}
2337 	}
2338 }
2339 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2340 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2341 {
2342 	int ret, irq;
2343 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2344 
2345 	/* Disable IRQs first */
2346 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2347 				      ARM_SMMU_IRQ_CTRLACK);
2348 	if (ret) {
2349 		dev_err(smmu->dev, "failed to disable irqs\n");
2350 		return ret;
2351 	}
2352 
2353 	irq = smmu->combined_irq;
2354 	if (irq) {
2355 		/*
2356 		 * Cavium ThunderX2 implementation doesn't not support unique
2357 		 * irq lines. Use single irq line for all the SMMUv3 interrupts.
2358 		 */
2359 		ret = devm_request_threaded_irq(smmu->dev, irq,
2360 					arm_smmu_combined_irq_handler,
2361 					arm_smmu_combined_irq_thread,
2362 					IRQF_ONESHOT,
2363 					"arm-smmu-v3-combined-irq", smmu);
2364 		if (ret < 0)
2365 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2366 	} else
2367 		arm_smmu_setup_unique_irqs(smmu);
2368 
2369 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2370 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2371 
2372 	/* Enable interrupt generation on the SMMU */
2373 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2374 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2375 	if (ret)
2376 		dev_warn(smmu->dev, "failed to enable irqs\n");
2377 
2378 	return 0;
2379 }
2380 
arm_smmu_device_disable(struct arm_smmu_device * smmu)2381 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2382 {
2383 	int ret;
2384 
2385 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2386 	if (ret)
2387 		dev_err(smmu->dev, "failed to clear cr0\n");
2388 
2389 	return ret;
2390 }
2391 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)2392 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2393 {
2394 	int ret;
2395 	u32 reg, enables;
2396 	struct arm_smmu_cmdq_ent cmd;
2397 
2398 	/* Clear CR0 and sync (disables SMMU and queue processing) */
2399 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2400 	if (reg & CR0_SMMUEN) {
2401 		if (is_kdump_kernel()) {
2402 			arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2403 			arm_smmu_device_disable(smmu);
2404 			return -EBUSY;
2405 		}
2406 
2407 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2408 	}
2409 
2410 	ret = arm_smmu_device_disable(smmu);
2411 	if (ret)
2412 		return ret;
2413 
2414 	/* CR1 (table and queue memory attributes) */
2415 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2416 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2417 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2418 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2419 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2420 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2421 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2422 
2423 	/* CR2 (random crap) */
2424 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2425 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2426 
2427 	/* Stream table */
2428 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
2429 		       smmu->base + ARM_SMMU_STRTAB_BASE);
2430 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2431 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2432 
2433 	/* Command queue */
2434 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2435 	writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2436 	writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2437 
2438 	enables = CR0_CMDQEN;
2439 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2440 				      ARM_SMMU_CR0ACK);
2441 	if (ret) {
2442 		dev_err(smmu->dev, "failed to enable command queue\n");
2443 		return ret;
2444 	}
2445 
2446 	/* Invalidate any cached configuration */
2447 	cmd.opcode = CMDQ_OP_CFGI_ALL;
2448 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2449 	arm_smmu_cmdq_issue_sync(smmu);
2450 
2451 	/* Invalidate any stale TLB entries */
2452 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
2453 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2454 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2455 	}
2456 
2457 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2458 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2459 	arm_smmu_cmdq_issue_sync(smmu);
2460 
2461 	/* Event queue */
2462 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2463 	writel_relaxed(smmu->evtq.q.prod,
2464 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2465 	writel_relaxed(smmu->evtq.q.cons,
2466 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2467 
2468 	enables |= CR0_EVTQEN;
2469 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2470 				      ARM_SMMU_CR0ACK);
2471 	if (ret) {
2472 		dev_err(smmu->dev, "failed to enable event queue\n");
2473 		return ret;
2474 	}
2475 
2476 	/* PRI queue */
2477 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2478 		writeq_relaxed(smmu->priq.q.q_base,
2479 			       smmu->base + ARM_SMMU_PRIQ_BASE);
2480 		writel_relaxed(smmu->priq.q.prod,
2481 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2482 		writel_relaxed(smmu->priq.q.cons,
2483 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2484 
2485 		enables |= CR0_PRIQEN;
2486 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2487 					      ARM_SMMU_CR0ACK);
2488 		if (ret) {
2489 			dev_err(smmu->dev, "failed to enable PRI queue\n");
2490 			return ret;
2491 		}
2492 	}
2493 
2494 	ret = arm_smmu_setup_irqs(smmu);
2495 	if (ret) {
2496 		dev_err(smmu->dev, "failed to setup irqs\n");
2497 		return ret;
2498 	}
2499 
2500 
2501 	/* Enable the SMMU interface, or ensure bypass */
2502 	if (!bypass || disable_bypass) {
2503 		enables |= CR0_SMMUEN;
2504 	} else {
2505 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2506 		if (ret)
2507 			return ret;
2508 	}
2509 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2510 				      ARM_SMMU_CR0ACK);
2511 	if (ret) {
2512 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
2513 		return ret;
2514 	}
2515 
2516 	return 0;
2517 }
2518 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)2519 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2520 {
2521 	u32 reg;
2522 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2523 
2524 	/* IDR0 */
2525 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2526 
2527 	/* 2-level structures */
2528 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2529 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2530 
2531 	if (reg & IDR0_CD2L)
2532 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2533 
2534 	/*
2535 	 * Translation table endianness.
2536 	 * We currently require the same endianness as the CPU, but this
2537 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2538 	 */
2539 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2540 	case IDR0_TTENDIAN_MIXED:
2541 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2542 		break;
2543 #ifdef __BIG_ENDIAN
2544 	case IDR0_TTENDIAN_BE:
2545 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
2546 		break;
2547 #else
2548 	case IDR0_TTENDIAN_LE:
2549 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
2550 		break;
2551 #endif
2552 	default:
2553 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2554 		return -ENXIO;
2555 	}
2556 
2557 	/* Boolean feature flags */
2558 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2559 		smmu->features |= ARM_SMMU_FEAT_PRI;
2560 
2561 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2562 		smmu->features |= ARM_SMMU_FEAT_ATS;
2563 
2564 	if (reg & IDR0_SEV)
2565 		smmu->features |= ARM_SMMU_FEAT_SEV;
2566 
2567 	if (reg & IDR0_MSI)
2568 		smmu->features |= ARM_SMMU_FEAT_MSI;
2569 
2570 	if (reg & IDR0_HYP)
2571 		smmu->features |= ARM_SMMU_FEAT_HYP;
2572 
2573 	/*
2574 	 * The coherency feature as set by FW is used in preference to the ID
2575 	 * register, but warn on mismatch.
2576 	 */
2577 	if (!!(reg & IDR0_COHACC) != coherent)
2578 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2579 			 coherent ? "true" : "false");
2580 
2581 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2582 	case IDR0_STALL_MODEL_FORCE:
2583 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2584 		/* Fallthrough */
2585 	case IDR0_STALL_MODEL_STALL:
2586 		smmu->features |= ARM_SMMU_FEAT_STALLS;
2587 	}
2588 
2589 	if (reg & IDR0_S1P)
2590 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2591 
2592 	if (reg & IDR0_S2P)
2593 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2594 
2595 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2596 		dev_err(smmu->dev, "no translation support!\n");
2597 		return -ENXIO;
2598 	}
2599 
2600 	/* We only support the AArch64 table format at present */
2601 	switch (FIELD_GET(IDR0_TTF, reg)) {
2602 	case IDR0_TTF_AARCH32_64:
2603 		smmu->ias = 40;
2604 		/* Fallthrough */
2605 	case IDR0_TTF_AARCH64:
2606 		break;
2607 	default:
2608 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
2609 		return -ENXIO;
2610 	}
2611 
2612 	/* ASID/VMID sizes */
2613 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2614 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2615 
2616 	/* IDR1 */
2617 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2618 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2619 		dev_err(smmu->dev, "embedded implementation not supported\n");
2620 		return -ENXIO;
2621 	}
2622 
2623 	/* Queue sizes, capped at 4k */
2624 	smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2625 					 FIELD_GET(IDR1_CMDQS, reg));
2626 	if (!smmu->cmdq.q.max_n_shift) {
2627 		/* Odd alignment restrictions on the base, so ignore for now */
2628 		dev_err(smmu->dev, "unit-length command queue not supported\n");
2629 		return -ENXIO;
2630 	}
2631 
2632 	smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2633 					 FIELD_GET(IDR1_EVTQS, reg));
2634 	smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2635 					 FIELD_GET(IDR1_PRIQS, reg));
2636 
2637 	/* SID/SSID sizes */
2638 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2639 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2640 
2641 	/*
2642 	 * If the SMMU supports fewer bits than would fill a single L2 stream
2643 	 * table, use a linear table instead.
2644 	 */
2645 	if (smmu->sid_bits <= STRTAB_SPLIT)
2646 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2647 
2648 	/* IDR5 */
2649 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2650 
2651 	/* Maximum number of outstanding stalls */
2652 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2653 
2654 	/* Page sizes */
2655 	if (reg & IDR5_GRAN64K)
2656 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2657 	if (reg & IDR5_GRAN16K)
2658 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2659 	if (reg & IDR5_GRAN4K)
2660 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2661 
2662 	/* Input address size */
2663 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2664 		smmu->features |= ARM_SMMU_FEAT_VAX;
2665 
2666 	/* Output address size */
2667 	switch (FIELD_GET(IDR5_OAS, reg)) {
2668 	case IDR5_OAS_32_BIT:
2669 		smmu->oas = 32;
2670 		break;
2671 	case IDR5_OAS_36_BIT:
2672 		smmu->oas = 36;
2673 		break;
2674 	case IDR5_OAS_40_BIT:
2675 		smmu->oas = 40;
2676 		break;
2677 	case IDR5_OAS_42_BIT:
2678 		smmu->oas = 42;
2679 		break;
2680 	case IDR5_OAS_44_BIT:
2681 		smmu->oas = 44;
2682 		break;
2683 	case IDR5_OAS_52_BIT:
2684 		smmu->oas = 52;
2685 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2686 		break;
2687 	default:
2688 		dev_info(smmu->dev,
2689 			"unknown output address size. Truncating to 48-bit\n");
2690 		/* Fallthrough */
2691 	case IDR5_OAS_48_BIT:
2692 		smmu->oas = 48;
2693 	}
2694 
2695 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
2696 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2697 	else
2698 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2699 
2700 	/* Set the DMA mask for our table walker */
2701 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2702 		dev_warn(smmu->dev,
2703 			 "failed to set DMA mask for table walker\n");
2704 
2705 	smmu->ias = max(smmu->ias, smmu->oas);
2706 
2707 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2708 		 smmu->ias, smmu->oas, smmu->features);
2709 	return 0;
2710 }
2711 
2712 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)2713 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2714 {
2715 	switch (model) {
2716 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2717 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2718 		break;
2719 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2720 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2721 		break;
2722 	}
2723 
2724 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2725 }
2726 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)2727 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2728 				      struct arm_smmu_device *smmu)
2729 {
2730 	struct acpi_iort_smmu_v3 *iort_smmu;
2731 	struct device *dev = smmu->dev;
2732 	struct acpi_iort_node *node;
2733 
2734 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2735 
2736 	/* Retrieve SMMUv3 specific data */
2737 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2738 
2739 	acpi_smmu_get_options(iort_smmu->model, smmu);
2740 
2741 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2742 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2743 
2744 	return 0;
2745 }
2746 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)2747 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2748 					     struct arm_smmu_device *smmu)
2749 {
2750 	return -ENODEV;
2751 }
2752 #endif
2753 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)2754 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2755 				    struct arm_smmu_device *smmu)
2756 {
2757 	struct device *dev = &pdev->dev;
2758 	u32 cells;
2759 	int ret = -EINVAL;
2760 
2761 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2762 		dev_err(dev, "missing #iommu-cells property\n");
2763 	else if (cells != 1)
2764 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2765 	else
2766 		ret = 0;
2767 
2768 	parse_driver_options(smmu);
2769 
2770 	if (of_dma_is_coherent(dev->of_node))
2771 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2772 
2773 	return ret;
2774 }
2775 
arm_smmu_resource_size(struct arm_smmu_device * smmu)2776 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2777 {
2778 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2779 		return SZ_64K;
2780 	else
2781 		return SZ_128K;
2782 }
2783 
arm_smmu_device_probe(struct platform_device * pdev)2784 static int arm_smmu_device_probe(struct platform_device *pdev)
2785 {
2786 	int irq, ret;
2787 	struct resource *res;
2788 	resource_size_t ioaddr;
2789 	struct arm_smmu_device *smmu;
2790 	struct device *dev = &pdev->dev;
2791 	bool bypass;
2792 
2793 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2794 	if (!smmu) {
2795 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2796 		return -ENOMEM;
2797 	}
2798 	smmu->dev = dev;
2799 
2800 	if (dev->of_node) {
2801 		ret = arm_smmu_device_dt_probe(pdev, smmu);
2802 	} else {
2803 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
2804 		if (ret == -ENODEV)
2805 			return ret;
2806 	}
2807 
2808 	/* Set bypass mode according to firmware probing result */
2809 	bypass = !!ret;
2810 
2811 	/* Base address */
2812 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2813 	if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2814 		dev_err(dev, "MMIO region too small (%pr)\n", res);
2815 		return -EINVAL;
2816 	}
2817 	ioaddr = res->start;
2818 
2819 	smmu->base = devm_ioremap_resource(dev, res);
2820 	if (IS_ERR(smmu->base))
2821 		return PTR_ERR(smmu->base);
2822 
2823 	/* Interrupt lines */
2824 
2825 	irq = platform_get_irq_byname(pdev, "combined");
2826 	if (irq > 0)
2827 		smmu->combined_irq = irq;
2828 	else {
2829 		irq = platform_get_irq_byname(pdev, "eventq");
2830 		if (irq > 0)
2831 			smmu->evtq.q.irq = irq;
2832 
2833 		irq = platform_get_irq_byname(pdev, "priq");
2834 		if (irq > 0)
2835 			smmu->priq.q.irq = irq;
2836 
2837 		irq = platform_get_irq_byname(pdev, "gerror");
2838 		if (irq > 0)
2839 			smmu->gerr_irq = irq;
2840 	}
2841 	/* Probe the h/w */
2842 	ret = arm_smmu_device_hw_probe(smmu);
2843 	if (ret)
2844 		return ret;
2845 
2846 	/* Initialise in-memory data structures */
2847 	ret = arm_smmu_init_structures(smmu);
2848 	if (ret)
2849 		return ret;
2850 
2851 	/* Record our private device structure */
2852 	platform_set_drvdata(pdev, smmu);
2853 
2854 	/* Reset the device */
2855 	ret = arm_smmu_device_reset(smmu, bypass);
2856 	if (ret)
2857 		return ret;
2858 
2859 	/* And we're up. Go go go! */
2860 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2861 				     "smmu3.%pa", &ioaddr);
2862 	if (ret)
2863 		return ret;
2864 
2865 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2866 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2867 
2868 	ret = iommu_device_register(&smmu->iommu);
2869 	if (ret) {
2870 		dev_err(dev, "Failed to register iommu\n");
2871 		return ret;
2872 	}
2873 
2874 #ifdef CONFIG_PCI
2875 	if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2876 		pci_request_acs();
2877 		ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2878 		if (ret)
2879 			return ret;
2880 	}
2881 #endif
2882 #ifdef CONFIG_ARM_AMBA
2883 	if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2884 		ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2885 		if (ret)
2886 			return ret;
2887 	}
2888 #endif
2889 	if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2890 		ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2891 		if (ret)
2892 			return ret;
2893 	}
2894 	return 0;
2895 }
2896 
arm_smmu_device_remove(struct platform_device * pdev)2897 static int arm_smmu_device_remove(struct platform_device *pdev)
2898 {
2899 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2900 
2901 	arm_smmu_device_disable(smmu);
2902 
2903 	return 0;
2904 }
2905 
arm_smmu_device_shutdown(struct platform_device * pdev)2906 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2907 {
2908 	arm_smmu_device_remove(pdev);
2909 }
2910 
2911 static const struct of_device_id arm_smmu_of_match[] = {
2912 	{ .compatible = "arm,smmu-v3", },
2913 	{ },
2914 };
2915 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2916 
2917 static struct platform_driver arm_smmu_driver = {
2918 	.driver	= {
2919 		.name		= "arm-smmu-v3",
2920 		.of_match_table	= of_match_ptr(arm_smmu_of_match),
2921 	},
2922 	.probe	= arm_smmu_device_probe,
2923 	.remove	= arm_smmu_device_remove,
2924 	.shutdown = arm_smmu_device_shutdown,
2925 };
2926 module_platform_driver(arm_smmu_driver);
2927 
2928 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2929 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2930 MODULE_LICENSE("GPL v2");
2931