1 /*
2 * IOMMU API for ARM architected SMMUv3 implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) 2015 ARM Limited
17 *
18 * Author: Will Deacon <will.deacon@arm.com>
19 *
20 * This driver is powered by bad coffee and bombay mix.
21 */
22
23 #include <linux/acpi.h>
24 #include <linux/acpi_iort.h>
25 #include <linux/bitfield.h>
26 #include <linux/bitops.h>
27 #include <linux/crash_dump.h>
28 #include <linux/delay.h>
29 #include <linux/dma-iommu.h>
30 #include <linux/err.h>
31 #include <linux/interrupt.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/module.h>
35 #include <linux/msi.h>
36 #include <linux/of.h>
37 #include <linux/of_address.h>
38 #include <linux/of_iommu.h>
39 #include <linux/of_platform.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42
43 #include <linux/amba/bus.h>
44
45 #include "io-pgtable.h"
46
47 /* MMIO registers */
48 #define ARM_SMMU_IDR0 0x0
49 #define IDR0_ST_LVL GENMASK(28, 27)
50 #define IDR0_ST_LVL_2LVL 1
51 #define IDR0_STALL_MODEL GENMASK(25, 24)
52 #define IDR0_STALL_MODEL_STALL 0
53 #define IDR0_STALL_MODEL_FORCE 2
54 #define IDR0_TTENDIAN GENMASK(22, 21)
55 #define IDR0_TTENDIAN_MIXED 0
56 #define IDR0_TTENDIAN_LE 2
57 #define IDR0_TTENDIAN_BE 3
58 #define IDR0_CD2L (1 << 19)
59 #define IDR0_VMID16 (1 << 18)
60 #define IDR0_PRI (1 << 16)
61 #define IDR0_SEV (1 << 14)
62 #define IDR0_MSI (1 << 13)
63 #define IDR0_ASID16 (1 << 12)
64 #define IDR0_ATS (1 << 10)
65 #define IDR0_HYP (1 << 9)
66 #define IDR0_COHACC (1 << 4)
67 #define IDR0_TTF GENMASK(3, 2)
68 #define IDR0_TTF_AARCH64 2
69 #define IDR0_TTF_AARCH32_64 3
70 #define IDR0_S1P (1 << 1)
71 #define IDR0_S2P (1 << 0)
72
73 #define ARM_SMMU_IDR1 0x4
74 #define IDR1_TABLES_PRESET (1 << 30)
75 #define IDR1_QUEUES_PRESET (1 << 29)
76 #define IDR1_REL (1 << 28)
77 #define IDR1_CMDQS GENMASK(25, 21)
78 #define IDR1_EVTQS GENMASK(20, 16)
79 #define IDR1_PRIQS GENMASK(15, 11)
80 #define IDR1_SSIDSIZE GENMASK(10, 6)
81 #define IDR1_SIDSIZE GENMASK(5, 0)
82
83 #define ARM_SMMU_IDR5 0x14
84 #define IDR5_STALL_MAX GENMASK(31, 16)
85 #define IDR5_GRAN64K (1 << 6)
86 #define IDR5_GRAN16K (1 << 5)
87 #define IDR5_GRAN4K (1 << 4)
88 #define IDR5_OAS GENMASK(2, 0)
89 #define IDR5_OAS_32_BIT 0
90 #define IDR5_OAS_36_BIT 1
91 #define IDR5_OAS_40_BIT 2
92 #define IDR5_OAS_42_BIT 3
93 #define IDR5_OAS_44_BIT 4
94 #define IDR5_OAS_48_BIT 5
95 #define IDR5_OAS_52_BIT 6
96 #define IDR5_VAX GENMASK(11, 10)
97 #define IDR5_VAX_52_BIT 1
98
99 #define ARM_SMMU_CR0 0x20
100 #define CR0_CMDQEN (1 << 3)
101 #define CR0_EVTQEN (1 << 2)
102 #define CR0_PRIQEN (1 << 1)
103 #define CR0_SMMUEN (1 << 0)
104
105 #define ARM_SMMU_CR0ACK 0x24
106
107 #define ARM_SMMU_CR1 0x28
108 #define CR1_TABLE_SH GENMASK(11, 10)
109 #define CR1_TABLE_OC GENMASK(9, 8)
110 #define CR1_TABLE_IC GENMASK(7, 6)
111 #define CR1_QUEUE_SH GENMASK(5, 4)
112 #define CR1_QUEUE_OC GENMASK(3, 2)
113 #define CR1_QUEUE_IC GENMASK(1, 0)
114 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
115 #define CR1_CACHE_NC 0
116 #define CR1_CACHE_WB 1
117 #define CR1_CACHE_WT 2
118
119 #define ARM_SMMU_CR2 0x2c
120 #define CR2_PTM (1 << 2)
121 #define CR2_RECINVSID (1 << 1)
122 #define CR2_E2H (1 << 0)
123
124 #define ARM_SMMU_GBPA 0x44
125 #define GBPA_UPDATE (1 << 31)
126 #define GBPA_ABORT (1 << 20)
127
128 #define ARM_SMMU_IRQ_CTRL 0x50
129 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
130 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
131 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
132
133 #define ARM_SMMU_IRQ_CTRLACK 0x54
134
135 #define ARM_SMMU_GERROR 0x60
136 #define GERROR_SFM_ERR (1 << 8)
137 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
138 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
139 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
140 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
141 #define GERROR_PRIQ_ABT_ERR (1 << 3)
142 #define GERROR_EVTQ_ABT_ERR (1 << 2)
143 #define GERROR_CMDQ_ERR (1 << 0)
144 #define GERROR_ERR_MASK 0xfd
145
146 #define ARM_SMMU_GERRORN 0x64
147
148 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
149 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
150 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
151
152 #define ARM_SMMU_STRTAB_BASE 0x80
153 #define STRTAB_BASE_RA (1UL << 62)
154 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
155
156 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
157 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
158 #define STRTAB_BASE_CFG_FMT_LINEAR 0
159 #define STRTAB_BASE_CFG_FMT_2LVL 1
160 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
161 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
162
163 #define ARM_SMMU_CMDQ_BASE 0x90
164 #define ARM_SMMU_CMDQ_PROD 0x98
165 #define ARM_SMMU_CMDQ_CONS 0x9c
166
167 #define ARM_SMMU_EVTQ_BASE 0xa0
168 #define ARM_SMMU_EVTQ_PROD 0x100a8
169 #define ARM_SMMU_EVTQ_CONS 0x100ac
170 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
171 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
172 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
173
174 #define ARM_SMMU_PRIQ_BASE 0xc0
175 #define ARM_SMMU_PRIQ_PROD 0x100c8
176 #define ARM_SMMU_PRIQ_CONS 0x100cc
177 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
178 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
179 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
180
181 /* Common MSI config fields */
182 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
183 #define MSI_CFG2_SH GENMASK(5, 4)
184 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
185
186 /* Common memory attribute values */
187 #define ARM_SMMU_SH_NSH 0
188 #define ARM_SMMU_SH_OSH 2
189 #define ARM_SMMU_SH_ISH 3
190 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
191 #define ARM_SMMU_MEMATTR_OIWB 0xf
192
193 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG (1 << 31)
196 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p) ((q)->base + \
198 Q_IDX(q, p) * (q)->ent_dwords)
199
200 #define Q_BASE_RWA (1UL << 62)
201 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
202 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
203
204 /*
205 * Stream table.
206 *
207 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
208 * 2lvl: 128k L1 entries,
209 * 256 lazy entries per table (each table covers a PCI bus)
210 */
211 #define STRTAB_L1_SZ_SHIFT 20
212 #define STRTAB_SPLIT 8
213
214 #define STRTAB_L1_DESC_DWORDS 1
215 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
216 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
217
218 #define STRTAB_STE_DWORDS 8
219 #define STRTAB_STE_0_V (1UL << 0)
220 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
221 #define STRTAB_STE_0_CFG_ABORT 0
222 #define STRTAB_STE_0_CFG_BYPASS 4
223 #define STRTAB_STE_0_CFG_S1_TRANS 5
224 #define STRTAB_STE_0_CFG_S2_TRANS 6
225
226 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
227 #define STRTAB_STE_0_S1FMT_LINEAR 0
228 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
229 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
230
231 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
232 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
233 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
234 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
235 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
236 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
237 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
238
239 #define STRTAB_STE_1_S1STALLD (1UL << 27)
240
241 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
242 #define STRTAB_STE_1_EATS_ABT 0UL
243 #define STRTAB_STE_1_EATS_TRANS 1UL
244 #define STRTAB_STE_1_EATS_S1CHK 2UL
245
246 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
247 #define STRTAB_STE_1_STRW_NSEL1 0UL
248 #define STRTAB_STE_1_STRW_EL2 2UL
249
250 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
251 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
252
253 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
254 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
255 #define STRTAB_STE_2_S2AA64 (1UL << 51)
256 #define STRTAB_STE_2_S2ENDI (1UL << 52)
257 #define STRTAB_STE_2_S2PTW (1UL << 54)
258 #define STRTAB_STE_2_S2R (1UL << 58)
259
260 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
261
262 /* Context descriptor (stage-1 only) */
263 #define CTXDESC_CD_DWORDS 8
264 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
265 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
266 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
267 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
268 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
269 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
270 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
271 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
272 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
273 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
274 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
275 #define ARM64_TCR_EPD0 (1ULL << 7)
276 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
277 #define ARM64_TCR_EPD1 (1ULL << 23)
278
279 #define CTXDESC_CD_0_ENDI (1UL << 15)
280 #define CTXDESC_CD_0_V (1UL << 31)
281
282 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
283 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
284 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
285 #define ARM64_TCR_TBI0 (1ULL << 37)
286
287 #define CTXDESC_CD_0_AA64 (1UL << 41)
288 #define CTXDESC_CD_0_S (1UL << 44)
289 #define CTXDESC_CD_0_R (1UL << 45)
290 #define CTXDESC_CD_0_A (1UL << 46)
291 #define CTXDESC_CD_0_ASET (1UL << 47)
292 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
293
294 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
295
296 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
297 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
298 FIELD_GET(ARM64_TCR_##fld, tcr))
299
300 /* Command queue */
301 #define CMDQ_ENT_DWORDS 2
302 #define CMDQ_MAX_SZ_SHIFT 8
303
304 #define CMDQ_CONS_ERR GENMASK(30, 24)
305 #define CMDQ_ERR_CERROR_NONE_IDX 0
306 #define CMDQ_ERR_CERROR_ILL_IDX 1
307 #define CMDQ_ERR_CERROR_ABT_IDX 2
308
309 #define CMDQ_0_OP GENMASK_ULL(7, 0)
310 #define CMDQ_0_SSV (1UL << 11)
311
312 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
313 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
314 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
315
316 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
317 #define CMDQ_CFGI_1_LEAF (1UL << 0)
318 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
319
320 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
321 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
322 #define CMDQ_TLBI_1_LEAF (1UL << 0)
323 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
324 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
325
326 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
327 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
328 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
329 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
330
331 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
332 #define CMDQ_SYNC_0_CS_NONE 0
333 #define CMDQ_SYNC_0_CS_IRQ 1
334 #define CMDQ_SYNC_0_CS_SEV 2
335 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
336 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
337 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
338 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
339
340 /* Event queue */
341 #define EVTQ_ENT_DWORDS 4
342 #define EVTQ_MAX_SZ_SHIFT 7
343
344 #define EVTQ_0_ID GENMASK_ULL(7, 0)
345
346 /* PRI queue */
347 #define PRIQ_ENT_DWORDS 2
348 #define PRIQ_MAX_SZ_SHIFT 8
349
350 #define PRIQ_0_SID GENMASK_ULL(31, 0)
351 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
352 #define PRIQ_0_PERM_PRIV (1UL << 58)
353 #define PRIQ_0_PERM_EXEC (1UL << 59)
354 #define PRIQ_0_PERM_READ (1UL << 60)
355 #define PRIQ_0_PERM_WRITE (1UL << 61)
356 #define PRIQ_0_PRG_LAST (1UL << 62)
357 #define PRIQ_0_SSID_V (1UL << 63)
358
359 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
360 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
361
362 /* High-level queue structures */
363 #define ARM_SMMU_POLL_TIMEOUT_US 100
364 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
365 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
366
367 #define MSI_IOVA_BASE 0x8000000
368 #define MSI_IOVA_LENGTH 0x100000
369
370 static bool disable_bypass = 1;
371 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
372 MODULE_PARM_DESC(disable_bypass,
373 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
374
375 enum pri_resp {
376 PRI_RESP_DENY = 0,
377 PRI_RESP_FAIL = 1,
378 PRI_RESP_SUCC = 2,
379 };
380
381 enum arm_smmu_msi_index {
382 EVTQ_MSI_INDEX,
383 GERROR_MSI_INDEX,
384 PRIQ_MSI_INDEX,
385 ARM_SMMU_MAX_MSIS,
386 };
387
388 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
389 [EVTQ_MSI_INDEX] = {
390 ARM_SMMU_EVTQ_IRQ_CFG0,
391 ARM_SMMU_EVTQ_IRQ_CFG1,
392 ARM_SMMU_EVTQ_IRQ_CFG2,
393 },
394 [GERROR_MSI_INDEX] = {
395 ARM_SMMU_GERROR_IRQ_CFG0,
396 ARM_SMMU_GERROR_IRQ_CFG1,
397 ARM_SMMU_GERROR_IRQ_CFG2,
398 },
399 [PRIQ_MSI_INDEX] = {
400 ARM_SMMU_PRIQ_IRQ_CFG0,
401 ARM_SMMU_PRIQ_IRQ_CFG1,
402 ARM_SMMU_PRIQ_IRQ_CFG2,
403 },
404 };
405
406 struct arm_smmu_cmdq_ent {
407 /* Common fields */
408 u8 opcode;
409 bool substream_valid;
410
411 /* Command-specific fields */
412 union {
413 #define CMDQ_OP_PREFETCH_CFG 0x1
414 struct {
415 u32 sid;
416 u8 size;
417 u64 addr;
418 } prefetch;
419
420 #define CMDQ_OP_CFGI_STE 0x3
421 #define CMDQ_OP_CFGI_ALL 0x4
422 struct {
423 u32 sid;
424 union {
425 bool leaf;
426 u8 span;
427 };
428 } cfgi;
429
430 #define CMDQ_OP_TLBI_NH_ASID 0x11
431 #define CMDQ_OP_TLBI_NH_VA 0x12
432 #define CMDQ_OP_TLBI_EL2_ALL 0x20
433 #define CMDQ_OP_TLBI_S12_VMALL 0x28
434 #define CMDQ_OP_TLBI_S2_IPA 0x2a
435 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
436 struct {
437 u16 asid;
438 u16 vmid;
439 bool leaf;
440 u64 addr;
441 } tlbi;
442
443 #define CMDQ_OP_PRI_RESP 0x41
444 struct {
445 u32 sid;
446 u32 ssid;
447 u16 grpid;
448 enum pri_resp resp;
449 } pri;
450
451 #define CMDQ_OP_CMD_SYNC 0x46
452 struct {
453 u32 msidata;
454 u64 msiaddr;
455 } sync;
456 };
457 };
458
459 struct arm_smmu_queue {
460 int irq; /* Wired interrupt */
461
462 __le64 *base;
463 dma_addr_t base_dma;
464 u64 q_base;
465
466 size_t ent_dwords;
467 u32 max_n_shift;
468 u32 prod;
469 u32 cons;
470
471 u32 __iomem *prod_reg;
472 u32 __iomem *cons_reg;
473 };
474
475 struct arm_smmu_cmdq {
476 struct arm_smmu_queue q;
477 spinlock_t lock;
478 };
479
480 struct arm_smmu_evtq {
481 struct arm_smmu_queue q;
482 u32 max_stalls;
483 };
484
485 struct arm_smmu_priq {
486 struct arm_smmu_queue q;
487 };
488
489 /* High-level stream table and context descriptor structures */
490 struct arm_smmu_strtab_l1_desc {
491 u8 span;
492
493 __le64 *l2ptr;
494 dma_addr_t l2ptr_dma;
495 };
496
497 struct arm_smmu_s1_cfg {
498 __le64 *cdptr;
499 dma_addr_t cdptr_dma;
500
501 struct arm_smmu_ctx_desc {
502 u16 asid;
503 u64 ttbr;
504 u64 tcr;
505 u64 mair;
506 } cd;
507 };
508
509 struct arm_smmu_s2_cfg {
510 u16 vmid;
511 u64 vttbr;
512 u64 vtcr;
513 };
514
515 struct arm_smmu_strtab_ent {
516 /*
517 * An STE is "assigned" if the master emitting the corresponding SID
518 * is attached to a domain. The behaviour of an unassigned STE is
519 * determined by the disable_bypass parameter, whereas an assigned
520 * STE behaves according to s1_cfg/s2_cfg, which themselves are
521 * configured according to the domain type.
522 */
523 bool assigned;
524 struct arm_smmu_s1_cfg *s1_cfg;
525 struct arm_smmu_s2_cfg *s2_cfg;
526 };
527
528 struct arm_smmu_strtab_cfg {
529 __le64 *strtab;
530 dma_addr_t strtab_dma;
531 struct arm_smmu_strtab_l1_desc *l1_desc;
532 unsigned int num_l1_ents;
533
534 u64 strtab_base;
535 u32 strtab_base_cfg;
536 };
537
538 /* An SMMUv3 instance */
539 struct arm_smmu_device {
540 struct device *dev;
541 void __iomem *base;
542
543 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
544 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
545 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
546 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
547 #define ARM_SMMU_FEAT_PRI (1 << 4)
548 #define ARM_SMMU_FEAT_ATS (1 << 5)
549 #define ARM_SMMU_FEAT_SEV (1 << 6)
550 #define ARM_SMMU_FEAT_MSI (1 << 7)
551 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
552 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
553 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
554 #define ARM_SMMU_FEAT_STALLS (1 << 11)
555 #define ARM_SMMU_FEAT_HYP (1 << 12)
556 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
557 #define ARM_SMMU_FEAT_VAX (1 << 14)
558 u32 features;
559
560 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
561 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
562 u32 options;
563
564 struct arm_smmu_cmdq cmdq;
565 struct arm_smmu_evtq evtq;
566 struct arm_smmu_priq priq;
567
568 int gerr_irq;
569 int combined_irq;
570 atomic_t sync_nr;
571
572 unsigned long ias; /* IPA */
573 unsigned long oas; /* PA */
574 unsigned long pgsize_bitmap;
575
576 #define ARM_SMMU_MAX_ASIDS (1 << 16)
577 unsigned int asid_bits;
578 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
579
580 #define ARM_SMMU_MAX_VMIDS (1 << 16)
581 unsigned int vmid_bits;
582 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
583
584 unsigned int ssid_bits;
585 unsigned int sid_bits;
586
587 struct arm_smmu_strtab_cfg strtab_cfg;
588
589 u32 sync_count;
590
591 /* IOMMU core code handle */
592 struct iommu_device iommu;
593 };
594
595 /* SMMU private data for each master */
596 struct arm_smmu_master_data {
597 struct arm_smmu_device *smmu;
598 struct arm_smmu_strtab_ent ste;
599 };
600
601 /* SMMU private data for an IOMMU domain */
602 enum arm_smmu_domain_stage {
603 ARM_SMMU_DOMAIN_S1 = 0,
604 ARM_SMMU_DOMAIN_S2,
605 ARM_SMMU_DOMAIN_NESTED,
606 ARM_SMMU_DOMAIN_BYPASS,
607 };
608
609 struct arm_smmu_domain {
610 struct arm_smmu_device *smmu;
611 struct mutex init_mutex; /* Protects smmu pointer */
612
613 struct io_pgtable_ops *pgtbl_ops;
614
615 enum arm_smmu_domain_stage stage;
616 union {
617 struct arm_smmu_s1_cfg s1_cfg;
618 struct arm_smmu_s2_cfg s2_cfg;
619 };
620
621 struct iommu_domain domain;
622 };
623
624 struct arm_smmu_option_prop {
625 u32 opt;
626 const char *prop;
627 };
628
629 static struct arm_smmu_option_prop arm_smmu_options[] = {
630 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
631 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
632 { 0, NULL},
633 };
634
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)635 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
636 struct arm_smmu_device *smmu)
637 {
638 if ((offset > SZ_64K) &&
639 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
640 offset -= SZ_64K;
641
642 return smmu->base + offset;
643 }
644
to_smmu_domain(struct iommu_domain * dom)645 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
646 {
647 return container_of(dom, struct arm_smmu_domain, domain);
648 }
649
parse_driver_options(struct arm_smmu_device * smmu)650 static void parse_driver_options(struct arm_smmu_device *smmu)
651 {
652 int i = 0;
653
654 do {
655 if (of_property_read_bool(smmu->dev->of_node,
656 arm_smmu_options[i].prop)) {
657 smmu->options |= arm_smmu_options[i].opt;
658 dev_notice(smmu->dev, "option %s\n",
659 arm_smmu_options[i].prop);
660 }
661 } while (arm_smmu_options[++i].opt);
662 }
663
664 /* Low-level queue manipulation functions */
queue_full(struct arm_smmu_queue * q)665 static bool queue_full(struct arm_smmu_queue *q)
666 {
667 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
668 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
669 }
670
queue_empty(struct arm_smmu_queue * q)671 static bool queue_empty(struct arm_smmu_queue *q)
672 {
673 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
675 }
676
queue_sync_cons(struct arm_smmu_queue * q)677 static void queue_sync_cons(struct arm_smmu_queue *q)
678 {
679 q->cons = readl_relaxed(q->cons_reg);
680 }
681
queue_inc_cons(struct arm_smmu_queue * q)682 static void queue_inc_cons(struct arm_smmu_queue *q)
683 {
684 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
685
686 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
687 writel(q->cons, q->cons_reg);
688 }
689
queue_sync_prod(struct arm_smmu_queue * q)690 static int queue_sync_prod(struct arm_smmu_queue *q)
691 {
692 int ret = 0;
693 u32 prod = readl_relaxed(q->prod_reg);
694
695 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
696 ret = -EOVERFLOW;
697
698 q->prod = prod;
699 return ret;
700 }
701
queue_inc_prod(struct arm_smmu_queue * q)702 static void queue_inc_prod(struct arm_smmu_queue *q)
703 {
704 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
705
706 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
707 writel(q->prod, q->prod_reg);
708 }
709
710 /*
711 * Wait for the SMMU to consume items. If drain is true, wait until the queue
712 * is empty. Otherwise, wait until there is at least one free slot.
713 */
queue_poll_cons(struct arm_smmu_queue * q,bool sync,bool wfe)714 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
715 {
716 ktime_t timeout;
717 unsigned int delay = 1, spin_cnt = 0;
718
719 /* Wait longer if it's a CMD_SYNC */
720 timeout = ktime_add_us(ktime_get(), sync ?
721 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
722 ARM_SMMU_POLL_TIMEOUT_US);
723
724 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
725 if (ktime_compare(ktime_get(), timeout) > 0)
726 return -ETIMEDOUT;
727
728 if (wfe) {
729 wfe();
730 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
731 cpu_relax();
732 continue;
733 } else {
734 udelay(delay);
735 delay *= 2;
736 spin_cnt = 0;
737 }
738 }
739
740 return 0;
741 }
742
queue_write(__le64 * dst,u64 * src,size_t n_dwords)743 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
744 {
745 int i;
746
747 for (i = 0; i < n_dwords; ++i)
748 *dst++ = cpu_to_le64(*src++);
749 }
750
queue_insert_raw(struct arm_smmu_queue * q,u64 * ent)751 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
752 {
753 if (queue_full(q))
754 return -ENOSPC;
755
756 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
757 queue_inc_prod(q);
758 return 0;
759 }
760
queue_read(__le64 * dst,u64 * src,size_t n_dwords)761 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
762 {
763 int i;
764
765 for (i = 0; i < n_dwords; ++i)
766 *dst++ = le64_to_cpu(*src++);
767 }
768
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)769 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
770 {
771 if (queue_empty(q))
772 return -EAGAIN;
773
774 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
775 queue_inc_cons(q);
776 return 0;
777 }
778
779 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)780 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
781 {
782 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
783 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
784
785 switch (ent->opcode) {
786 case CMDQ_OP_TLBI_EL2_ALL:
787 case CMDQ_OP_TLBI_NSNH_ALL:
788 break;
789 case CMDQ_OP_PREFETCH_CFG:
790 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
791 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
792 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
793 break;
794 case CMDQ_OP_CFGI_STE:
795 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
796 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
797 break;
798 case CMDQ_OP_CFGI_ALL:
799 /* Cover the entire SID range */
800 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
801 break;
802 case CMDQ_OP_TLBI_NH_VA:
803 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
804 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
805 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
806 break;
807 case CMDQ_OP_TLBI_S2_IPA:
808 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
809 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
810 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
811 break;
812 case CMDQ_OP_TLBI_NH_ASID:
813 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
814 /* Fallthrough */
815 case CMDQ_OP_TLBI_S12_VMALL:
816 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
817 break;
818 case CMDQ_OP_PRI_RESP:
819 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
820 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
821 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
822 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
823 switch (ent->pri.resp) {
824 case PRI_RESP_DENY:
825 case PRI_RESP_FAIL:
826 case PRI_RESP_SUCC:
827 break;
828 default:
829 return -EINVAL;
830 }
831 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
832 break;
833 case CMDQ_OP_CMD_SYNC:
834 if (ent->sync.msiaddr)
835 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
836 else
837 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
838 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
839 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
840 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
841 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
842 break;
843 default:
844 return -ENOENT;
845 }
846
847 return 0;
848 }
849
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)850 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
851 {
852 static const char *cerror_str[] = {
853 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
854 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
855 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
856 };
857
858 int i;
859 u64 cmd[CMDQ_ENT_DWORDS];
860 struct arm_smmu_queue *q = &smmu->cmdq.q;
861 u32 cons = readl_relaxed(q->cons_reg);
862 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
863 struct arm_smmu_cmdq_ent cmd_sync = {
864 .opcode = CMDQ_OP_CMD_SYNC,
865 };
866
867 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
868 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
869
870 switch (idx) {
871 case CMDQ_ERR_CERROR_ABT_IDX:
872 dev_err(smmu->dev, "retrying command fetch\n");
873 case CMDQ_ERR_CERROR_NONE_IDX:
874 return;
875 case CMDQ_ERR_CERROR_ILL_IDX:
876 /* Fallthrough */
877 default:
878 break;
879 }
880
881 /*
882 * We may have concurrent producers, so we need to be careful
883 * not to touch any of the shadow cmdq state.
884 */
885 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
886 dev_err(smmu->dev, "skipping command in error state:\n");
887 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
888 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
889
890 /* Convert the erroneous command into a CMD_SYNC */
891 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
892 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
893 return;
894 }
895
896 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
897 }
898
arm_smmu_cmdq_insert_cmd(struct arm_smmu_device * smmu,u64 * cmd)899 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
900 {
901 struct arm_smmu_queue *q = &smmu->cmdq.q;
902 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
903
904 while (queue_insert_raw(q, cmd) == -ENOSPC) {
905 if (queue_poll_cons(q, false, wfe))
906 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
907 }
908 }
909
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)910 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
911 struct arm_smmu_cmdq_ent *ent)
912 {
913 u64 cmd[CMDQ_ENT_DWORDS];
914 unsigned long flags;
915
916 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
917 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
918 ent->opcode);
919 return;
920 }
921
922 spin_lock_irqsave(&smmu->cmdq.lock, flags);
923 arm_smmu_cmdq_insert_cmd(smmu, cmd);
924 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
925 }
926
927 /*
928 * The difference between val and sync_idx is bounded by the maximum size of
929 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
930 */
__arm_smmu_sync_poll_msi(struct arm_smmu_device * smmu,u32 sync_idx)931 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
932 {
933 ktime_t timeout;
934 u32 val;
935
936 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
937 val = smp_cond_load_acquire(&smmu->sync_count,
938 (int)(VAL - sync_idx) >= 0 ||
939 !ktime_before(ktime_get(), timeout));
940
941 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
942 }
943
__arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device * smmu)944 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
945 {
946 u64 cmd[CMDQ_ENT_DWORDS];
947 unsigned long flags;
948 struct arm_smmu_cmdq_ent ent = {
949 .opcode = CMDQ_OP_CMD_SYNC,
950 .sync = {
951 .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
952 .msiaddr = virt_to_phys(&smmu->sync_count),
953 },
954 };
955
956 arm_smmu_cmdq_build_cmd(cmd, &ent);
957
958 spin_lock_irqsave(&smmu->cmdq.lock, flags);
959 arm_smmu_cmdq_insert_cmd(smmu, cmd);
960 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
961
962 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
963 }
964
__arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)965 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
966 {
967 u64 cmd[CMDQ_ENT_DWORDS];
968 unsigned long flags;
969 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
970 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
971 int ret;
972
973 arm_smmu_cmdq_build_cmd(cmd, &ent);
974
975 spin_lock_irqsave(&smmu->cmdq.lock, flags);
976 arm_smmu_cmdq_insert_cmd(smmu, cmd);
977 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
978 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
979
980 return ret;
981 }
982
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)983 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
984 {
985 int ret;
986 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
987 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
988
989 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
990 : __arm_smmu_cmdq_issue_sync(smmu);
991 if (ret)
992 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
993 }
994
995 /* Context descriptor manipulation functions */
arm_smmu_cpu_tcr_to_cd(u64 tcr)996 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
997 {
998 u64 val = 0;
999
1000 /* Repack the TCR. Just care about TTBR0 for now */
1001 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1002 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1003 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1004 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1005 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1006 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1007 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1008 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1009 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1010
1011 return val;
1012 }
1013
arm_smmu_write_ctx_desc(struct arm_smmu_device * smmu,struct arm_smmu_s1_cfg * cfg)1014 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1015 struct arm_smmu_s1_cfg *cfg)
1016 {
1017 u64 val;
1018
1019 /*
1020 * We don't need to issue any invalidation here, as we'll invalidate
1021 * the STE when installing the new entry anyway.
1022 */
1023 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1024 #ifdef __BIG_ENDIAN
1025 CTXDESC_CD_0_ENDI |
1026 #endif
1027 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1028 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1029 CTXDESC_CD_0_V;
1030
1031 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1032 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1033 val |= CTXDESC_CD_0_S;
1034
1035 cfg->cdptr[0] = cpu_to_le64(val);
1036
1037 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1038 cfg->cdptr[1] = cpu_to_le64(val);
1039
1040 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1041 }
1042
1043 /* Stream table manipulation functions */
1044 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1045 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1046 {
1047 u64 val = 0;
1048
1049 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1050 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1051
1052 *dst = cpu_to_le64(val);
1053 }
1054
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1055 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1056 {
1057 struct arm_smmu_cmdq_ent cmd = {
1058 .opcode = CMDQ_OP_CFGI_STE,
1059 .cfgi = {
1060 .sid = sid,
1061 .leaf = true,
1062 },
1063 };
1064
1065 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1066 arm_smmu_cmdq_issue_sync(smmu);
1067 }
1068
arm_smmu_write_strtab_ent(struct arm_smmu_device * smmu,u32 sid,__le64 * dst,struct arm_smmu_strtab_ent * ste)1069 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1070 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1071 {
1072 /*
1073 * This is hideously complicated, but we only really care about
1074 * three cases at the moment:
1075 *
1076 * 1. Invalid (all zero) -> bypass/fault (init)
1077 * 2. Bypass/fault -> translation/bypass (attach)
1078 * 3. Translation/bypass -> bypass/fault (detach)
1079 *
1080 * Given that we can't update the STE atomically and the SMMU
1081 * doesn't read the thing in a defined order, that leaves us
1082 * with the following maintenance requirements:
1083 *
1084 * 1. Update Config, return (init time STEs aren't live)
1085 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1086 * 3. Update Config, sync
1087 */
1088 u64 val = le64_to_cpu(dst[0]);
1089 bool ste_live = false;
1090 struct arm_smmu_cmdq_ent prefetch_cmd = {
1091 .opcode = CMDQ_OP_PREFETCH_CFG,
1092 .prefetch = {
1093 .sid = sid,
1094 },
1095 };
1096
1097 if (val & STRTAB_STE_0_V) {
1098 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1099 case STRTAB_STE_0_CFG_BYPASS:
1100 break;
1101 case STRTAB_STE_0_CFG_S1_TRANS:
1102 case STRTAB_STE_0_CFG_S2_TRANS:
1103 ste_live = true;
1104 break;
1105 case STRTAB_STE_0_CFG_ABORT:
1106 if (disable_bypass)
1107 break;
1108 default:
1109 BUG(); /* STE corruption */
1110 }
1111 }
1112
1113 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1114 val = STRTAB_STE_0_V;
1115
1116 /* Bypass/fault */
1117 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1118 if (!ste->assigned && disable_bypass)
1119 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1120 else
1121 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1122
1123 dst[0] = cpu_to_le64(val);
1124 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1125 STRTAB_STE_1_SHCFG_INCOMING));
1126 dst[2] = 0; /* Nuke the VMID */
1127 /*
1128 * The SMMU can perform negative caching, so we must sync
1129 * the STE regardless of whether the old value was live.
1130 */
1131 if (smmu)
1132 arm_smmu_sync_ste_for_sid(smmu, sid);
1133 return;
1134 }
1135
1136 if (ste->s1_cfg) {
1137 BUG_ON(ste_live);
1138 dst[1] = cpu_to_le64(
1139 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1140 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1141 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1142 #ifdef CONFIG_PCI_ATS
1143 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1144 #endif
1145 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1146
1147 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1148 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1149 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1150
1151 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1152 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1153 }
1154
1155 if (ste->s2_cfg) {
1156 BUG_ON(ste_live);
1157 dst[2] = cpu_to_le64(
1158 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1159 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1160 #ifdef __BIG_ENDIAN
1161 STRTAB_STE_2_S2ENDI |
1162 #endif
1163 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1164 STRTAB_STE_2_S2R);
1165
1166 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1167
1168 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1169 }
1170
1171 arm_smmu_sync_ste_for_sid(smmu, sid);
1172 dst[0] = cpu_to_le64(val);
1173 arm_smmu_sync_ste_for_sid(smmu, sid);
1174
1175 /* It's likely that we'll want to use the new STE soon */
1176 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1177 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1178 }
1179
arm_smmu_init_bypass_stes(u64 * strtab,unsigned int nent)1180 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1181 {
1182 unsigned int i;
1183 struct arm_smmu_strtab_ent ste = { .assigned = false };
1184
1185 for (i = 0; i < nent; ++i) {
1186 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1187 strtab += STRTAB_STE_DWORDS;
1188 }
1189 }
1190
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1191 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1192 {
1193 size_t size;
1194 void *strtab;
1195 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1196 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1197
1198 if (desc->l2ptr)
1199 return 0;
1200
1201 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1202 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1203
1204 desc->span = STRTAB_SPLIT + 1;
1205 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1206 GFP_KERNEL | __GFP_ZERO);
1207 if (!desc->l2ptr) {
1208 dev_err(smmu->dev,
1209 "failed to allocate l2 stream table for SID %u\n",
1210 sid);
1211 return -ENOMEM;
1212 }
1213
1214 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1215 arm_smmu_write_strtab_l1_desc(strtab, desc);
1216 return 0;
1217 }
1218
1219 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1220 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1221 {
1222 int i;
1223 struct arm_smmu_device *smmu = dev;
1224 struct arm_smmu_queue *q = &smmu->evtq.q;
1225 u64 evt[EVTQ_ENT_DWORDS];
1226
1227 do {
1228 while (!queue_remove_raw(q, evt)) {
1229 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1230
1231 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1232 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1233 dev_info(smmu->dev, "\t0x%016llx\n",
1234 (unsigned long long)evt[i]);
1235
1236 }
1237
1238 /*
1239 * Not much we can do on overflow, so scream and pretend we're
1240 * trying harder.
1241 */
1242 if (queue_sync_prod(q) == -EOVERFLOW)
1243 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1244 } while (!queue_empty(q));
1245
1246 /* Sync our overflow flag, as we believe we're up to speed */
1247 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1248 return IRQ_HANDLED;
1249 }
1250
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1251 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1252 {
1253 u32 sid, ssid;
1254 u16 grpid;
1255 bool ssv, last;
1256
1257 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1258 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1259 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1260 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1261 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1262
1263 dev_info(smmu->dev, "unexpected PRI request received:\n");
1264 dev_info(smmu->dev,
1265 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1266 sid, ssid, grpid, last ? "L" : "",
1267 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1268 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1269 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1270 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1271 evt[1] & PRIQ_1_ADDR_MASK);
1272
1273 if (last) {
1274 struct arm_smmu_cmdq_ent cmd = {
1275 .opcode = CMDQ_OP_PRI_RESP,
1276 .substream_valid = ssv,
1277 .pri = {
1278 .sid = sid,
1279 .ssid = ssid,
1280 .grpid = grpid,
1281 .resp = PRI_RESP_DENY,
1282 },
1283 };
1284
1285 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1286 }
1287 }
1288
arm_smmu_priq_thread(int irq,void * dev)1289 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1290 {
1291 struct arm_smmu_device *smmu = dev;
1292 struct arm_smmu_queue *q = &smmu->priq.q;
1293 u64 evt[PRIQ_ENT_DWORDS];
1294
1295 do {
1296 while (!queue_remove_raw(q, evt))
1297 arm_smmu_handle_ppr(smmu, evt);
1298
1299 if (queue_sync_prod(q) == -EOVERFLOW)
1300 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1301 } while (!queue_empty(q));
1302
1303 /* Sync our overflow flag, as we believe we're up to speed */
1304 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1305 writel(q->cons, q->cons_reg);
1306 return IRQ_HANDLED;
1307 }
1308
1309 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1310
arm_smmu_gerror_handler(int irq,void * dev)1311 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1312 {
1313 u32 gerror, gerrorn, active;
1314 struct arm_smmu_device *smmu = dev;
1315
1316 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1317 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1318
1319 active = gerror ^ gerrorn;
1320 if (!(active & GERROR_ERR_MASK))
1321 return IRQ_NONE; /* No errors pending */
1322
1323 dev_warn(smmu->dev,
1324 "unexpected global error reported (0x%08x), this could be serious\n",
1325 active);
1326
1327 if (active & GERROR_SFM_ERR) {
1328 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1329 arm_smmu_device_disable(smmu);
1330 }
1331
1332 if (active & GERROR_MSI_GERROR_ABT_ERR)
1333 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1334
1335 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1336 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1337
1338 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1339 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1340
1341 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1342 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1343
1344 if (active & GERROR_PRIQ_ABT_ERR)
1345 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1346
1347 if (active & GERROR_EVTQ_ABT_ERR)
1348 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1349
1350 if (active & GERROR_CMDQ_ERR)
1351 arm_smmu_cmdq_skip_err(smmu);
1352
1353 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1354 return IRQ_HANDLED;
1355 }
1356
arm_smmu_combined_irq_thread(int irq,void * dev)1357 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1358 {
1359 struct arm_smmu_device *smmu = dev;
1360
1361 arm_smmu_evtq_thread(irq, dev);
1362 if (smmu->features & ARM_SMMU_FEAT_PRI)
1363 arm_smmu_priq_thread(irq, dev);
1364
1365 return IRQ_HANDLED;
1366 }
1367
arm_smmu_combined_irq_handler(int irq,void * dev)1368 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1369 {
1370 arm_smmu_gerror_handler(irq, dev);
1371 return IRQ_WAKE_THREAD;
1372 }
1373
1374 /* IO_PGTABLE API */
__arm_smmu_tlb_sync(struct arm_smmu_device * smmu)1375 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1376 {
1377 arm_smmu_cmdq_issue_sync(smmu);
1378 }
1379
arm_smmu_tlb_sync(void * cookie)1380 static void arm_smmu_tlb_sync(void *cookie)
1381 {
1382 struct arm_smmu_domain *smmu_domain = cookie;
1383 __arm_smmu_tlb_sync(smmu_domain->smmu);
1384 }
1385
arm_smmu_tlb_inv_context(void * cookie)1386 static void arm_smmu_tlb_inv_context(void *cookie)
1387 {
1388 struct arm_smmu_domain *smmu_domain = cookie;
1389 struct arm_smmu_device *smmu = smmu_domain->smmu;
1390 struct arm_smmu_cmdq_ent cmd;
1391
1392 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1393 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1394 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1395 cmd.tlbi.vmid = 0;
1396 } else {
1397 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1398 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1399 }
1400
1401 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1402 __arm_smmu_tlb_sync(smmu);
1403 }
1404
arm_smmu_tlb_inv_range_nosync(unsigned long iova,size_t size,size_t granule,bool leaf,void * cookie)1405 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1406 size_t granule, bool leaf, void *cookie)
1407 {
1408 struct arm_smmu_domain *smmu_domain = cookie;
1409 struct arm_smmu_device *smmu = smmu_domain->smmu;
1410 struct arm_smmu_cmdq_ent cmd = {
1411 .tlbi = {
1412 .leaf = leaf,
1413 .addr = iova,
1414 },
1415 };
1416
1417 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1418 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1419 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1420 } else {
1421 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1422 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1423 }
1424
1425 do {
1426 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1427 cmd.tlbi.addr += granule;
1428 } while (size -= granule);
1429 }
1430
1431 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1432 .tlb_flush_all = arm_smmu_tlb_inv_context,
1433 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1434 .tlb_sync = arm_smmu_tlb_sync,
1435 };
1436
1437 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1438 static bool arm_smmu_capable(enum iommu_cap cap)
1439 {
1440 switch (cap) {
1441 case IOMMU_CAP_CACHE_COHERENCY:
1442 return true;
1443 case IOMMU_CAP_NOEXEC:
1444 return true;
1445 default:
1446 return false;
1447 }
1448 }
1449
arm_smmu_domain_alloc(unsigned type)1450 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1451 {
1452 struct arm_smmu_domain *smmu_domain;
1453
1454 if (type != IOMMU_DOMAIN_UNMANAGED &&
1455 type != IOMMU_DOMAIN_DMA &&
1456 type != IOMMU_DOMAIN_IDENTITY)
1457 return NULL;
1458
1459 /*
1460 * Allocate the domain and initialise some of its data structures.
1461 * We can't really do anything meaningful until we've added a
1462 * master.
1463 */
1464 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1465 if (!smmu_domain)
1466 return NULL;
1467
1468 if (type == IOMMU_DOMAIN_DMA &&
1469 iommu_get_dma_cookie(&smmu_domain->domain)) {
1470 kfree(smmu_domain);
1471 return NULL;
1472 }
1473
1474 mutex_init(&smmu_domain->init_mutex);
1475 return &smmu_domain->domain;
1476 }
1477
arm_smmu_bitmap_alloc(unsigned long * map,int span)1478 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1479 {
1480 int idx, size = 1 << span;
1481
1482 do {
1483 idx = find_first_zero_bit(map, size);
1484 if (idx == size)
1485 return -ENOSPC;
1486 } while (test_and_set_bit(idx, map));
1487
1488 return idx;
1489 }
1490
arm_smmu_bitmap_free(unsigned long * map,int idx)1491 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1492 {
1493 clear_bit(idx, map);
1494 }
1495
arm_smmu_domain_free(struct iommu_domain * domain)1496 static void arm_smmu_domain_free(struct iommu_domain *domain)
1497 {
1498 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1499 struct arm_smmu_device *smmu = smmu_domain->smmu;
1500
1501 iommu_put_dma_cookie(domain);
1502 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1503
1504 /* Free the CD and ASID, if we allocated them */
1505 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1506 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1507
1508 if (cfg->cdptr) {
1509 dmam_free_coherent(smmu_domain->smmu->dev,
1510 CTXDESC_CD_DWORDS << 3,
1511 cfg->cdptr,
1512 cfg->cdptr_dma);
1513
1514 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1515 }
1516 } else {
1517 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1518 if (cfg->vmid)
1519 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1520 }
1521
1522 kfree(smmu_domain);
1523 }
1524
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1525 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1526 struct io_pgtable_cfg *pgtbl_cfg)
1527 {
1528 int ret;
1529 int asid;
1530 struct arm_smmu_device *smmu = smmu_domain->smmu;
1531 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1532
1533 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1534 if (asid < 0)
1535 return asid;
1536
1537 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1538 &cfg->cdptr_dma,
1539 GFP_KERNEL | __GFP_ZERO);
1540 if (!cfg->cdptr) {
1541 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1542 ret = -ENOMEM;
1543 goto out_free_asid;
1544 }
1545
1546 cfg->cd.asid = (u16)asid;
1547 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1548 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1549 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1550 return 0;
1551
1552 out_free_asid:
1553 arm_smmu_bitmap_free(smmu->asid_map, asid);
1554 return ret;
1555 }
1556
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1557 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1558 struct io_pgtable_cfg *pgtbl_cfg)
1559 {
1560 int vmid;
1561 struct arm_smmu_device *smmu = smmu_domain->smmu;
1562 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1563
1564 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1565 if (vmid < 0)
1566 return vmid;
1567
1568 cfg->vmid = (u16)vmid;
1569 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1570 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1571 return 0;
1572 }
1573
arm_smmu_domain_finalise(struct iommu_domain * domain)1574 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1575 {
1576 int ret;
1577 unsigned long ias, oas;
1578 enum io_pgtable_fmt fmt;
1579 struct io_pgtable_cfg pgtbl_cfg;
1580 struct io_pgtable_ops *pgtbl_ops;
1581 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1582 struct io_pgtable_cfg *);
1583 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1584 struct arm_smmu_device *smmu = smmu_domain->smmu;
1585
1586 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1587 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1588 return 0;
1589 }
1590
1591 /* Restrict the stage to what we can actually support */
1592 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1593 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1594 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1595 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1596
1597 switch (smmu_domain->stage) {
1598 case ARM_SMMU_DOMAIN_S1:
1599 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1600 ias = min_t(unsigned long, ias, VA_BITS);
1601 oas = smmu->ias;
1602 fmt = ARM_64_LPAE_S1;
1603 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1604 break;
1605 case ARM_SMMU_DOMAIN_NESTED:
1606 case ARM_SMMU_DOMAIN_S2:
1607 ias = smmu->ias;
1608 oas = smmu->oas;
1609 fmt = ARM_64_LPAE_S2;
1610 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1611 break;
1612 default:
1613 return -EINVAL;
1614 }
1615
1616 pgtbl_cfg = (struct io_pgtable_cfg) {
1617 .pgsize_bitmap = smmu->pgsize_bitmap,
1618 .ias = ias,
1619 .oas = oas,
1620 .tlb = &arm_smmu_gather_ops,
1621 .iommu_dev = smmu->dev,
1622 };
1623
1624 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1625 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1626
1627 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1628 if (!pgtbl_ops)
1629 return -ENOMEM;
1630
1631 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1632 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1633 domain->geometry.force_aperture = true;
1634
1635 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1636 if (ret < 0) {
1637 free_io_pgtable_ops(pgtbl_ops);
1638 return ret;
1639 }
1640
1641 smmu_domain->pgtbl_ops = pgtbl_ops;
1642 return 0;
1643 }
1644
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1645 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1646 {
1647 __le64 *step;
1648 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1649
1650 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1651 struct arm_smmu_strtab_l1_desc *l1_desc;
1652 int idx;
1653
1654 /* Two-level walk */
1655 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1656 l1_desc = &cfg->l1_desc[idx];
1657 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1658 step = &l1_desc->l2ptr[idx];
1659 } else {
1660 /* Simple linear lookup */
1661 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1662 }
1663
1664 return step;
1665 }
1666
arm_smmu_install_ste_for_dev(struct iommu_fwspec * fwspec)1667 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1668 {
1669 int i, j;
1670 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1671 struct arm_smmu_device *smmu = master->smmu;
1672
1673 for (i = 0; i < fwspec->num_ids; ++i) {
1674 u32 sid = fwspec->ids[i];
1675 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1676
1677 /* Bridged PCI devices may end up with duplicated IDs */
1678 for (j = 0; j < i; j++)
1679 if (fwspec->ids[j] == sid)
1680 break;
1681 if (j < i)
1682 continue;
1683
1684 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1685 }
1686 }
1687
arm_smmu_detach_dev(struct device * dev)1688 static void arm_smmu_detach_dev(struct device *dev)
1689 {
1690 struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1691
1692 master->ste.assigned = false;
1693 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1694 }
1695
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)1696 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1697 {
1698 int ret = 0;
1699 struct arm_smmu_device *smmu;
1700 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1701 struct arm_smmu_master_data *master;
1702 struct arm_smmu_strtab_ent *ste;
1703
1704 if (!dev->iommu_fwspec)
1705 return -ENOENT;
1706
1707 master = dev->iommu_fwspec->iommu_priv;
1708 smmu = master->smmu;
1709 ste = &master->ste;
1710
1711 /* Already attached to a different domain? */
1712 if (ste->assigned)
1713 arm_smmu_detach_dev(dev);
1714
1715 mutex_lock(&smmu_domain->init_mutex);
1716
1717 if (!smmu_domain->smmu) {
1718 smmu_domain->smmu = smmu;
1719 ret = arm_smmu_domain_finalise(domain);
1720 if (ret) {
1721 smmu_domain->smmu = NULL;
1722 goto out_unlock;
1723 }
1724 } else if (smmu_domain->smmu != smmu) {
1725 dev_err(dev,
1726 "cannot attach to SMMU %s (upstream of %s)\n",
1727 dev_name(smmu_domain->smmu->dev),
1728 dev_name(smmu->dev));
1729 ret = -ENXIO;
1730 goto out_unlock;
1731 }
1732
1733 ste->assigned = true;
1734
1735 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1736 ste->s1_cfg = NULL;
1737 ste->s2_cfg = NULL;
1738 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1739 ste->s1_cfg = &smmu_domain->s1_cfg;
1740 ste->s2_cfg = NULL;
1741 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1742 } else {
1743 ste->s1_cfg = NULL;
1744 ste->s2_cfg = &smmu_domain->s2_cfg;
1745 }
1746
1747 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1748 out_unlock:
1749 mutex_unlock(&smmu_domain->init_mutex);
1750 return ret;
1751 }
1752
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot)1753 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1754 phys_addr_t paddr, size_t size, int prot)
1755 {
1756 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1757
1758 if (!ops)
1759 return -ENODEV;
1760
1761 return ops->map(ops, iova, paddr, size, prot);
1762 }
1763
1764 static size_t
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)1765 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1766 {
1767 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1768
1769 if (!ops)
1770 return 0;
1771
1772 return ops->unmap(ops, iova, size);
1773 }
1774
arm_smmu_iotlb_sync(struct iommu_domain * domain)1775 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1776 {
1777 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1778
1779 if (smmu)
1780 __arm_smmu_tlb_sync(smmu);
1781 }
1782
1783 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)1784 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1785 {
1786 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1787
1788 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1789 return iova;
1790
1791 if (!ops)
1792 return 0;
1793
1794 return ops->iova_to_phys(ops, iova);
1795 }
1796
1797 static struct platform_driver arm_smmu_driver;
1798
arm_smmu_match_node(struct device * dev,void * data)1799 static int arm_smmu_match_node(struct device *dev, void *data)
1800 {
1801 return dev->fwnode == data;
1802 }
1803
1804 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)1805 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1806 {
1807 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1808 fwnode, arm_smmu_match_node);
1809 put_device(dev);
1810 return dev ? dev_get_drvdata(dev) : NULL;
1811 }
1812
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)1813 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1814 {
1815 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1816
1817 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1818 limit *= 1UL << STRTAB_SPLIT;
1819
1820 return sid < limit;
1821 }
1822
1823 static struct iommu_ops arm_smmu_ops;
1824
arm_smmu_add_device(struct device * dev)1825 static int arm_smmu_add_device(struct device *dev)
1826 {
1827 int i, ret;
1828 struct arm_smmu_device *smmu;
1829 struct arm_smmu_master_data *master;
1830 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1831 struct iommu_group *group;
1832
1833 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1834 return -ENODEV;
1835 /*
1836 * We _can_ actually withstand dodgy bus code re-calling add_device()
1837 * without an intervening remove_device()/of_xlate() sequence, but
1838 * we're not going to do so quietly...
1839 */
1840 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1841 master = fwspec->iommu_priv;
1842 smmu = master->smmu;
1843 } else {
1844 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1845 if (!smmu)
1846 return -ENODEV;
1847 master = kzalloc(sizeof(*master), GFP_KERNEL);
1848 if (!master)
1849 return -ENOMEM;
1850
1851 master->smmu = smmu;
1852 fwspec->iommu_priv = master;
1853 }
1854
1855 /* Check the SIDs are in range of the SMMU and our stream table */
1856 for (i = 0; i < fwspec->num_ids; i++) {
1857 u32 sid = fwspec->ids[i];
1858
1859 if (!arm_smmu_sid_in_range(smmu, sid))
1860 return -ERANGE;
1861
1862 /* Ensure l2 strtab is initialised */
1863 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1864 ret = arm_smmu_init_l2_strtab(smmu, sid);
1865 if (ret)
1866 return ret;
1867 }
1868 }
1869
1870 group = iommu_group_get_for_dev(dev);
1871 if (!IS_ERR(group)) {
1872 iommu_group_put(group);
1873 iommu_device_link(&smmu->iommu, dev);
1874 }
1875
1876 return PTR_ERR_OR_ZERO(group);
1877 }
1878
arm_smmu_remove_device(struct device * dev)1879 static void arm_smmu_remove_device(struct device *dev)
1880 {
1881 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1882 struct arm_smmu_master_data *master;
1883 struct arm_smmu_device *smmu;
1884
1885 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1886 return;
1887
1888 master = fwspec->iommu_priv;
1889 smmu = master->smmu;
1890 if (master && master->ste.assigned)
1891 arm_smmu_detach_dev(dev);
1892 iommu_group_remove_device(dev);
1893 iommu_device_unlink(&smmu->iommu, dev);
1894 kfree(master);
1895 iommu_fwspec_free(dev);
1896 }
1897
arm_smmu_device_group(struct device * dev)1898 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1899 {
1900 struct iommu_group *group;
1901
1902 /*
1903 * We don't support devices sharing stream IDs other than PCI RID
1904 * aliases, since the necessary ID-to-device lookup becomes rather
1905 * impractical given a potential sparse 32-bit stream ID space.
1906 */
1907 if (dev_is_pci(dev))
1908 group = pci_device_group(dev);
1909 else
1910 group = generic_device_group(dev);
1911
1912 return group;
1913 }
1914
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1915 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1916 enum iommu_attr attr, void *data)
1917 {
1918 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1919
1920 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1921 return -EINVAL;
1922
1923 switch (attr) {
1924 case DOMAIN_ATTR_NESTING:
1925 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1926 return 0;
1927 default:
1928 return -ENODEV;
1929 }
1930 }
1931
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1932 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1933 enum iommu_attr attr, void *data)
1934 {
1935 int ret = 0;
1936 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1937
1938 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1939 return -EINVAL;
1940
1941 mutex_lock(&smmu_domain->init_mutex);
1942
1943 switch (attr) {
1944 case DOMAIN_ATTR_NESTING:
1945 if (smmu_domain->smmu) {
1946 ret = -EPERM;
1947 goto out_unlock;
1948 }
1949
1950 if (*(int *)data)
1951 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1952 else
1953 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1954
1955 break;
1956 default:
1957 ret = -ENODEV;
1958 }
1959
1960 out_unlock:
1961 mutex_unlock(&smmu_domain->init_mutex);
1962 return ret;
1963 }
1964
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)1965 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1966 {
1967 return iommu_fwspec_add_ids(dev, args->args, 1);
1968 }
1969
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)1970 static void arm_smmu_get_resv_regions(struct device *dev,
1971 struct list_head *head)
1972 {
1973 struct iommu_resv_region *region;
1974 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1975
1976 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1977 prot, IOMMU_RESV_SW_MSI);
1978 if (!region)
1979 return;
1980
1981 list_add_tail(®ion->list, head);
1982
1983 iommu_dma_get_resv_regions(dev, head);
1984 }
1985
arm_smmu_put_resv_regions(struct device * dev,struct list_head * head)1986 static void arm_smmu_put_resv_regions(struct device *dev,
1987 struct list_head *head)
1988 {
1989 struct iommu_resv_region *entry, *next;
1990
1991 list_for_each_entry_safe(entry, next, head, list)
1992 kfree(entry);
1993 }
1994
1995 static struct iommu_ops arm_smmu_ops = {
1996 .capable = arm_smmu_capable,
1997 .domain_alloc = arm_smmu_domain_alloc,
1998 .domain_free = arm_smmu_domain_free,
1999 .attach_dev = arm_smmu_attach_dev,
2000 .map = arm_smmu_map,
2001 .unmap = arm_smmu_unmap,
2002 .flush_iotlb_all = arm_smmu_iotlb_sync,
2003 .iotlb_sync = arm_smmu_iotlb_sync,
2004 .iova_to_phys = arm_smmu_iova_to_phys,
2005 .add_device = arm_smmu_add_device,
2006 .remove_device = arm_smmu_remove_device,
2007 .device_group = arm_smmu_device_group,
2008 .domain_get_attr = arm_smmu_domain_get_attr,
2009 .domain_set_attr = arm_smmu_domain_set_attr,
2010 .of_xlate = arm_smmu_of_xlate,
2011 .get_resv_regions = arm_smmu_get_resv_regions,
2012 .put_resv_regions = arm_smmu_put_resv_regions,
2013 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2014 };
2015
2016 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords)2017 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2018 struct arm_smmu_queue *q,
2019 unsigned long prod_off,
2020 unsigned long cons_off,
2021 size_t dwords)
2022 {
2023 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2024
2025 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2026 if (!q->base) {
2027 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2028 qsz);
2029 return -ENOMEM;
2030 }
2031
2032 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2033 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2034 q->ent_dwords = dwords;
2035
2036 q->q_base = Q_BASE_RWA;
2037 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2038 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2039
2040 q->prod = q->cons = 0;
2041 return 0;
2042 }
2043
arm_smmu_init_queues(struct arm_smmu_device * smmu)2044 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2045 {
2046 int ret;
2047
2048 /* cmdq */
2049 spin_lock_init(&smmu->cmdq.lock);
2050 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2051 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2052 if (ret)
2053 return ret;
2054
2055 /* evtq */
2056 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2057 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2058 if (ret)
2059 return ret;
2060
2061 /* priq */
2062 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2063 return 0;
2064
2065 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2066 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2067 }
2068
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2069 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2070 {
2071 unsigned int i;
2072 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2073 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2074 void *strtab = smmu->strtab_cfg.strtab;
2075
2076 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2077 if (!cfg->l1_desc) {
2078 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2079 return -ENOMEM;
2080 }
2081
2082 for (i = 0; i < cfg->num_l1_ents; ++i) {
2083 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2084 strtab += STRTAB_L1_DESC_DWORDS << 3;
2085 }
2086
2087 return 0;
2088 }
2089
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2090 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2091 {
2092 void *strtab;
2093 u64 reg;
2094 u32 size, l1size;
2095 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2096
2097 /* Calculate the L1 size, capped to the SIDSIZE. */
2098 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2099 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2100 cfg->num_l1_ents = 1 << size;
2101
2102 size += STRTAB_SPLIT;
2103 if (size < smmu->sid_bits)
2104 dev_warn(smmu->dev,
2105 "2-level strtab only covers %u/%u bits of SID\n",
2106 size, smmu->sid_bits);
2107
2108 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2109 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2110 GFP_KERNEL | __GFP_ZERO);
2111 if (!strtab) {
2112 dev_err(smmu->dev,
2113 "failed to allocate l1 stream table (%u bytes)\n",
2114 size);
2115 return -ENOMEM;
2116 }
2117 cfg->strtab = strtab;
2118
2119 /* Configure strtab_base_cfg for 2 levels */
2120 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2121 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2122 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2123 cfg->strtab_base_cfg = reg;
2124
2125 return arm_smmu_init_l1_strtab(smmu);
2126 }
2127
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2128 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2129 {
2130 void *strtab;
2131 u64 reg;
2132 u32 size;
2133 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2134
2135 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2136 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2137 GFP_KERNEL | __GFP_ZERO);
2138 if (!strtab) {
2139 dev_err(smmu->dev,
2140 "failed to allocate linear stream table (%u bytes)\n",
2141 size);
2142 return -ENOMEM;
2143 }
2144 cfg->strtab = strtab;
2145 cfg->num_l1_ents = 1 << smmu->sid_bits;
2146
2147 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2148 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2149 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2150 cfg->strtab_base_cfg = reg;
2151
2152 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2153 return 0;
2154 }
2155
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2156 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2157 {
2158 u64 reg;
2159 int ret;
2160
2161 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2162 ret = arm_smmu_init_strtab_2lvl(smmu);
2163 else
2164 ret = arm_smmu_init_strtab_linear(smmu);
2165
2166 if (ret)
2167 return ret;
2168
2169 /* Set the strtab base address */
2170 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2171 reg |= STRTAB_BASE_RA;
2172 smmu->strtab_cfg.strtab_base = reg;
2173
2174 /* Allocate the first VMID for stage-2 bypass STEs */
2175 set_bit(0, smmu->vmid_map);
2176 return 0;
2177 }
2178
arm_smmu_init_structures(struct arm_smmu_device * smmu)2179 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2180 {
2181 int ret;
2182
2183 atomic_set(&smmu->sync_nr, 0);
2184 ret = arm_smmu_init_queues(smmu);
2185 if (ret)
2186 return ret;
2187
2188 return arm_smmu_init_strtab(smmu);
2189 }
2190
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2191 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2192 unsigned int reg_off, unsigned int ack_off)
2193 {
2194 u32 reg;
2195
2196 writel_relaxed(val, smmu->base + reg_off);
2197 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2198 1, ARM_SMMU_POLL_TIMEOUT_US);
2199 }
2200
2201 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2202 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2203 {
2204 int ret;
2205 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2206
2207 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2208 1, ARM_SMMU_POLL_TIMEOUT_US);
2209 if (ret)
2210 return ret;
2211
2212 reg &= ~clr;
2213 reg |= set;
2214 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2215 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2216 1, ARM_SMMU_POLL_TIMEOUT_US);
2217
2218 if (ret)
2219 dev_err(smmu->dev, "GBPA not responding to update\n");
2220 return ret;
2221 }
2222
arm_smmu_free_msis(void * data)2223 static void arm_smmu_free_msis(void *data)
2224 {
2225 struct device *dev = data;
2226 platform_msi_domain_free_irqs(dev);
2227 }
2228
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2229 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2230 {
2231 phys_addr_t doorbell;
2232 struct device *dev = msi_desc_to_dev(desc);
2233 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2234 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2235
2236 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2237 doorbell &= MSI_CFG0_ADDR_MASK;
2238
2239 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2240 writel_relaxed(msg->data, smmu->base + cfg[1]);
2241 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2242 }
2243
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2244 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2245 {
2246 struct msi_desc *desc;
2247 int ret, nvec = ARM_SMMU_MAX_MSIS;
2248 struct device *dev = smmu->dev;
2249
2250 /* Clear the MSI address regs */
2251 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2252 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2253
2254 if (smmu->features & ARM_SMMU_FEAT_PRI)
2255 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2256 else
2257 nvec--;
2258
2259 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2260 return;
2261
2262 if (!dev->msi_domain) {
2263 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2264 return;
2265 }
2266
2267 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2268 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2269 if (ret) {
2270 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2271 return;
2272 }
2273
2274 for_each_msi_entry(desc, dev) {
2275 switch (desc->platform.msi_index) {
2276 case EVTQ_MSI_INDEX:
2277 smmu->evtq.q.irq = desc->irq;
2278 break;
2279 case GERROR_MSI_INDEX:
2280 smmu->gerr_irq = desc->irq;
2281 break;
2282 case PRIQ_MSI_INDEX:
2283 smmu->priq.q.irq = desc->irq;
2284 break;
2285 default: /* Unknown */
2286 continue;
2287 }
2288 }
2289
2290 /* Add callback to free MSIs on teardown */
2291 devm_add_action(dev, arm_smmu_free_msis, dev);
2292 }
2293
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2294 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2295 {
2296 int irq, ret;
2297
2298 arm_smmu_setup_msis(smmu);
2299
2300 /* Request interrupt lines */
2301 irq = smmu->evtq.q.irq;
2302 if (irq) {
2303 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2304 arm_smmu_evtq_thread,
2305 IRQF_ONESHOT,
2306 "arm-smmu-v3-evtq", smmu);
2307 if (ret < 0)
2308 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2309 } else {
2310 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2311 }
2312
2313 irq = smmu->gerr_irq;
2314 if (irq) {
2315 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2316 0, "arm-smmu-v3-gerror", smmu);
2317 if (ret < 0)
2318 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2319 } else {
2320 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2321 }
2322
2323 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2324 irq = smmu->priq.q.irq;
2325 if (irq) {
2326 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2327 arm_smmu_priq_thread,
2328 IRQF_ONESHOT,
2329 "arm-smmu-v3-priq",
2330 smmu);
2331 if (ret < 0)
2332 dev_warn(smmu->dev,
2333 "failed to enable priq irq\n");
2334 } else {
2335 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2336 }
2337 }
2338 }
2339
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2340 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2341 {
2342 int ret, irq;
2343 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2344
2345 /* Disable IRQs first */
2346 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2347 ARM_SMMU_IRQ_CTRLACK);
2348 if (ret) {
2349 dev_err(smmu->dev, "failed to disable irqs\n");
2350 return ret;
2351 }
2352
2353 irq = smmu->combined_irq;
2354 if (irq) {
2355 /*
2356 * Cavium ThunderX2 implementation doesn't not support unique
2357 * irq lines. Use single irq line for all the SMMUv3 interrupts.
2358 */
2359 ret = devm_request_threaded_irq(smmu->dev, irq,
2360 arm_smmu_combined_irq_handler,
2361 arm_smmu_combined_irq_thread,
2362 IRQF_ONESHOT,
2363 "arm-smmu-v3-combined-irq", smmu);
2364 if (ret < 0)
2365 dev_warn(smmu->dev, "failed to enable combined irq\n");
2366 } else
2367 arm_smmu_setup_unique_irqs(smmu);
2368
2369 if (smmu->features & ARM_SMMU_FEAT_PRI)
2370 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2371
2372 /* Enable interrupt generation on the SMMU */
2373 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2374 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2375 if (ret)
2376 dev_warn(smmu->dev, "failed to enable irqs\n");
2377
2378 return 0;
2379 }
2380
arm_smmu_device_disable(struct arm_smmu_device * smmu)2381 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2382 {
2383 int ret;
2384
2385 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2386 if (ret)
2387 dev_err(smmu->dev, "failed to clear cr0\n");
2388
2389 return ret;
2390 }
2391
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)2392 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2393 {
2394 int ret;
2395 u32 reg, enables;
2396 struct arm_smmu_cmdq_ent cmd;
2397
2398 /* Clear CR0 and sync (disables SMMU and queue processing) */
2399 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2400 if (reg & CR0_SMMUEN) {
2401 if (is_kdump_kernel()) {
2402 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2403 arm_smmu_device_disable(smmu);
2404 return -EBUSY;
2405 }
2406
2407 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2408 }
2409
2410 ret = arm_smmu_device_disable(smmu);
2411 if (ret)
2412 return ret;
2413
2414 /* CR1 (table and queue memory attributes) */
2415 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2416 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2417 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2418 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2419 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2420 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2421 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2422
2423 /* CR2 (random crap) */
2424 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2425 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2426
2427 /* Stream table */
2428 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2429 smmu->base + ARM_SMMU_STRTAB_BASE);
2430 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2431 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2432
2433 /* Command queue */
2434 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2435 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2436 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2437
2438 enables = CR0_CMDQEN;
2439 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2440 ARM_SMMU_CR0ACK);
2441 if (ret) {
2442 dev_err(smmu->dev, "failed to enable command queue\n");
2443 return ret;
2444 }
2445
2446 /* Invalidate any cached configuration */
2447 cmd.opcode = CMDQ_OP_CFGI_ALL;
2448 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2449 arm_smmu_cmdq_issue_sync(smmu);
2450
2451 /* Invalidate any stale TLB entries */
2452 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2453 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2454 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2455 }
2456
2457 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2458 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2459 arm_smmu_cmdq_issue_sync(smmu);
2460
2461 /* Event queue */
2462 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2463 writel_relaxed(smmu->evtq.q.prod,
2464 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2465 writel_relaxed(smmu->evtq.q.cons,
2466 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2467
2468 enables |= CR0_EVTQEN;
2469 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2470 ARM_SMMU_CR0ACK);
2471 if (ret) {
2472 dev_err(smmu->dev, "failed to enable event queue\n");
2473 return ret;
2474 }
2475
2476 /* PRI queue */
2477 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2478 writeq_relaxed(smmu->priq.q.q_base,
2479 smmu->base + ARM_SMMU_PRIQ_BASE);
2480 writel_relaxed(smmu->priq.q.prod,
2481 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2482 writel_relaxed(smmu->priq.q.cons,
2483 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2484
2485 enables |= CR0_PRIQEN;
2486 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2487 ARM_SMMU_CR0ACK);
2488 if (ret) {
2489 dev_err(smmu->dev, "failed to enable PRI queue\n");
2490 return ret;
2491 }
2492 }
2493
2494 ret = arm_smmu_setup_irqs(smmu);
2495 if (ret) {
2496 dev_err(smmu->dev, "failed to setup irqs\n");
2497 return ret;
2498 }
2499
2500
2501 /* Enable the SMMU interface, or ensure bypass */
2502 if (!bypass || disable_bypass) {
2503 enables |= CR0_SMMUEN;
2504 } else {
2505 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2506 if (ret)
2507 return ret;
2508 }
2509 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2510 ARM_SMMU_CR0ACK);
2511 if (ret) {
2512 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2513 return ret;
2514 }
2515
2516 return 0;
2517 }
2518
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)2519 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2520 {
2521 u32 reg;
2522 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2523
2524 /* IDR0 */
2525 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2526
2527 /* 2-level structures */
2528 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2529 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2530
2531 if (reg & IDR0_CD2L)
2532 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2533
2534 /*
2535 * Translation table endianness.
2536 * We currently require the same endianness as the CPU, but this
2537 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2538 */
2539 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2540 case IDR0_TTENDIAN_MIXED:
2541 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2542 break;
2543 #ifdef __BIG_ENDIAN
2544 case IDR0_TTENDIAN_BE:
2545 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2546 break;
2547 #else
2548 case IDR0_TTENDIAN_LE:
2549 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2550 break;
2551 #endif
2552 default:
2553 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2554 return -ENXIO;
2555 }
2556
2557 /* Boolean feature flags */
2558 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2559 smmu->features |= ARM_SMMU_FEAT_PRI;
2560
2561 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2562 smmu->features |= ARM_SMMU_FEAT_ATS;
2563
2564 if (reg & IDR0_SEV)
2565 smmu->features |= ARM_SMMU_FEAT_SEV;
2566
2567 if (reg & IDR0_MSI)
2568 smmu->features |= ARM_SMMU_FEAT_MSI;
2569
2570 if (reg & IDR0_HYP)
2571 smmu->features |= ARM_SMMU_FEAT_HYP;
2572
2573 /*
2574 * The coherency feature as set by FW is used in preference to the ID
2575 * register, but warn on mismatch.
2576 */
2577 if (!!(reg & IDR0_COHACC) != coherent)
2578 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2579 coherent ? "true" : "false");
2580
2581 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2582 case IDR0_STALL_MODEL_FORCE:
2583 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2584 /* Fallthrough */
2585 case IDR0_STALL_MODEL_STALL:
2586 smmu->features |= ARM_SMMU_FEAT_STALLS;
2587 }
2588
2589 if (reg & IDR0_S1P)
2590 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2591
2592 if (reg & IDR0_S2P)
2593 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2594
2595 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2596 dev_err(smmu->dev, "no translation support!\n");
2597 return -ENXIO;
2598 }
2599
2600 /* We only support the AArch64 table format at present */
2601 switch (FIELD_GET(IDR0_TTF, reg)) {
2602 case IDR0_TTF_AARCH32_64:
2603 smmu->ias = 40;
2604 /* Fallthrough */
2605 case IDR0_TTF_AARCH64:
2606 break;
2607 default:
2608 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2609 return -ENXIO;
2610 }
2611
2612 /* ASID/VMID sizes */
2613 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2614 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2615
2616 /* IDR1 */
2617 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2618 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2619 dev_err(smmu->dev, "embedded implementation not supported\n");
2620 return -ENXIO;
2621 }
2622
2623 /* Queue sizes, capped at 4k */
2624 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2625 FIELD_GET(IDR1_CMDQS, reg));
2626 if (!smmu->cmdq.q.max_n_shift) {
2627 /* Odd alignment restrictions on the base, so ignore for now */
2628 dev_err(smmu->dev, "unit-length command queue not supported\n");
2629 return -ENXIO;
2630 }
2631
2632 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2633 FIELD_GET(IDR1_EVTQS, reg));
2634 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2635 FIELD_GET(IDR1_PRIQS, reg));
2636
2637 /* SID/SSID sizes */
2638 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2639 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2640
2641 /*
2642 * If the SMMU supports fewer bits than would fill a single L2 stream
2643 * table, use a linear table instead.
2644 */
2645 if (smmu->sid_bits <= STRTAB_SPLIT)
2646 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2647
2648 /* IDR5 */
2649 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2650
2651 /* Maximum number of outstanding stalls */
2652 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2653
2654 /* Page sizes */
2655 if (reg & IDR5_GRAN64K)
2656 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2657 if (reg & IDR5_GRAN16K)
2658 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2659 if (reg & IDR5_GRAN4K)
2660 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2661
2662 /* Input address size */
2663 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2664 smmu->features |= ARM_SMMU_FEAT_VAX;
2665
2666 /* Output address size */
2667 switch (FIELD_GET(IDR5_OAS, reg)) {
2668 case IDR5_OAS_32_BIT:
2669 smmu->oas = 32;
2670 break;
2671 case IDR5_OAS_36_BIT:
2672 smmu->oas = 36;
2673 break;
2674 case IDR5_OAS_40_BIT:
2675 smmu->oas = 40;
2676 break;
2677 case IDR5_OAS_42_BIT:
2678 smmu->oas = 42;
2679 break;
2680 case IDR5_OAS_44_BIT:
2681 smmu->oas = 44;
2682 break;
2683 case IDR5_OAS_52_BIT:
2684 smmu->oas = 52;
2685 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2686 break;
2687 default:
2688 dev_info(smmu->dev,
2689 "unknown output address size. Truncating to 48-bit\n");
2690 /* Fallthrough */
2691 case IDR5_OAS_48_BIT:
2692 smmu->oas = 48;
2693 }
2694
2695 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2696 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2697 else
2698 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2699
2700 /* Set the DMA mask for our table walker */
2701 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2702 dev_warn(smmu->dev,
2703 "failed to set DMA mask for table walker\n");
2704
2705 smmu->ias = max(smmu->ias, smmu->oas);
2706
2707 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2708 smmu->ias, smmu->oas, smmu->features);
2709 return 0;
2710 }
2711
2712 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)2713 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2714 {
2715 switch (model) {
2716 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2717 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2718 break;
2719 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2720 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2721 break;
2722 }
2723
2724 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2725 }
2726
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)2727 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2728 struct arm_smmu_device *smmu)
2729 {
2730 struct acpi_iort_smmu_v3 *iort_smmu;
2731 struct device *dev = smmu->dev;
2732 struct acpi_iort_node *node;
2733
2734 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2735
2736 /* Retrieve SMMUv3 specific data */
2737 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2738
2739 acpi_smmu_get_options(iort_smmu->model, smmu);
2740
2741 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2742 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2743
2744 return 0;
2745 }
2746 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)2747 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2748 struct arm_smmu_device *smmu)
2749 {
2750 return -ENODEV;
2751 }
2752 #endif
2753
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)2754 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2755 struct arm_smmu_device *smmu)
2756 {
2757 struct device *dev = &pdev->dev;
2758 u32 cells;
2759 int ret = -EINVAL;
2760
2761 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2762 dev_err(dev, "missing #iommu-cells property\n");
2763 else if (cells != 1)
2764 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2765 else
2766 ret = 0;
2767
2768 parse_driver_options(smmu);
2769
2770 if (of_dma_is_coherent(dev->of_node))
2771 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2772
2773 return ret;
2774 }
2775
arm_smmu_resource_size(struct arm_smmu_device * smmu)2776 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2777 {
2778 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2779 return SZ_64K;
2780 else
2781 return SZ_128K;
2782 }
2783
arm_smmu_device_probe(struct platform_device * pdev)2784 static int arm_smmu_device_probe(struct platform_device *pdev)
2785 {
2786 int irq, ret;
2787 struct resource *res;
2788 resource_size_t ioaddr;
2789 struct arm_smmu_device *smmu;
2790 struct device *dev = &pdev->dev;
2791 bool bypass;
2792
2793 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2794 if (!smmu) {
2795 dev_err(dev, "failed to allocate arm_smmu_device\n");
2796 return -ENOMEM;
2797 }
2798 smmu->dev = dev;
2799
2800 if (dev->of_node) {
2801 ret = arm_smmu_device_dt_probe(pdev, smmu);
2802 } else {
2803 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2804 if (ret == -ENODEV)
2805 return ret;
2806 }
2807
2808 /* Set bypass mode according to firmware probing result */
2809 bypass = !!ret;
2810
2811 /* Base address */
2812 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2813 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2814 dev_err(dev, "MMIO region too small (%pr)\n", res);
2815 return -EINVAL;
2816 }
2817 ioaddr = res->start;
2818
2819 smmu->base = devm_ioremap_resource(dev, res);
2820 if (IS_ERR(smmu->base))
2821 return PTR_ERR(smmu->base);
2822
2823 /* Interrupt lines */
2824
2825 irq = platform_get_irq_byname(pdev, "combined");
2826 if (irq > 0)
2827 smmu->combined_irq = irq;
2828 else {
2829 irq = platform_get_irq_byname(pdev, "eventq");
2830 if (irq > 0)
2831 smmu->evtq.q.irq = irq;
2832
2833 irq = platform_get_irq_byname(pdev, "priq");
2834 if (irq > 0)
2835 smmu->priq.q.irq = irq;
2836
2837 irq = platform_get_irq_byname(pdev, "gerror");
2838 if (irq > 0)
2839 smmu->gerr_irq = irq;
2840 }
2841 /* Probe the h/w */
2842 ret = arm_smmu_device_hw_probe(smmu);
2843 if (ret)
2844 return ret;
2845
2846 /* Initialise in-memory data structures */
2847 ret = arm_smmu_init_structures(smmu);
2848 if (ret)
2849 return ret;
2850
2851 /* Record our private device structure */
2852 platform_set_drvdata(pdev, smmu);
2853
2854 /* Reset the device */
2855 ret = arm_smmu_device_reset(smmu, bypass);
2856 if (ret)
2857 return ret;
2858
2859 /* And we're up. Go go go! */
2860 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2861 "smmu3.%pa", &ioaddr);
2862 if (ret)
2863 return ret;
2864
2865 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2866 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2867
2868 ret = iommu_device_register(&smmu->iommu);
2869 if (ret) {
2870 dev_err(dev, "Failed to register iommu\n");
2871 return ret;
2872 }
2873
2874 #ifdef CONFIG_PCI
2875 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2876 pci_request_acs();
2877 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2878 if (ret)
2879 return ret;
2880 }
2881 #endif
2882 #ifdef CONFIG_ARM_AMBA
2883 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2884 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2885 if (ret)
2886 return ret;
2887 }
2888 #endif
2889 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2890 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2891 if (ret)
2892 return ret;
2893 }
2894 return 0;
2895 }
2896
arm_smmu_device_remove(struct platform_device * pdev)2897 static int arm_smmu_device_remove(struct platform_device *pdev)
2898 {
2899 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2900
2901 arm_smmu_device_disable(smmu);
2902
2903 return 0;
2904 }
2905
arm_smmu_device_shutdown(struct platform_device * pdev)2906 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2907 {
2908 arm_smmu_device_remove(pdev);
2909 }
2910
2911 static const struct of_device_id arm_smmu_of_match[] = {
2912 { .compatible = "arm,smmu-v3", },
2913 { },
2914 };
2915 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2916
2917 static struct platform_driver arm_smmu_driver = {
2918 .driver = {
2919 .name = "arm-smmu-v3",
2920 .of_match_table = of_match_ptr(arm_smmu_of_match),
2921 },
2922 .probe = arm_smmu_device_probe,
2923 .remove = arm_smmu_device_remove,
2924 .shutdown = arm_smmu_device_shutdown,
2925 };
2926 module_platform_driver(arm_smmu_driver);
2927
2928 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2929 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2930 MODULE_LICENSE("GPL v2");
2931