1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4 *
5 * Copyright 2018 IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
15
16 #include <asm/iommu.h>
17 #include <asm/tce.h>
18 #include "pci.h"
19
pnv_pci_setup_iommu_table(struct iommu_table * tbl,void * tce_mem,u64 tce_size,u64 dma_offset,unsigned int page_shift)20 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
21 void *tce_mem, u64 tce_size,
22 u64 dma_offset, unsigned int page_shift)
23 {
24 tbl->it_blocksize = 16;
25 tbl->it_base = (unsigned long)tce_mem;
26 tbl->it_page_shift = page_shift;
27 tbl->it_offset = dma_offset >> tbl->it_page_shift;
28 tbl->it_index = 0;
29 tbl->it_size = tce_size >> 3;
30 tbl->it_busno = 0;
31 tbl->it_type = TCE_PCI;
32 }
33
pnv_alloc_tce_level(int nid,unsigned int shift)34 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
35 {
36 struct page *tce_mem = NULL;
37 __be64 *addr;
38
39 tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
40 shift - PAGE_SHIFT);
41 if (!tce_mem) {
42 pr_err("Failed to allocate a TCE memory, level shift=%d\n",
43 shift);
44 return NULL;
45 }
46 addr = page_address(tce_mem);
47 memset(addr, 0, 1UL << shift);
48
49 return addr;
50 }
51
52 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
53 unsigned long size, unsigned int levels);
54
pnv_tce(struct iommu_table * tbl,bool user,long idx,bool alloc)55 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
56 {
57 __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
58 int level = tbl->it_indirect_levels;
59 const long shift = ilog2(tbl->it_level_size);
60 unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
61
62 while (level) {
63 int n = (idx & mask) >> (level * shift);
64 unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
65
66 if (!tce) {
67 __be64 *tmp2;
68
69 if (!alloc)
70 return NULL;
71
72 tmp2 = pnv_alloc_tce_level(tbl->it_nid,
73 ilog2(tbl->it_level_size) + 3);
74 if (!tmp2)
75 return NULL;
76
77 tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
78 oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
79 cpu_to_be64(tce)));
80 if (oldtce) {
81 pnv_pci_ioda2_table_do_free_pages(tmp2,
82 ilog2(tbl->it_level_size) + 3, 1);
83 tce = oldtce;
84 }
85 }
86
87 tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
88 idx &= ~mask;
89 mask >>= shift;
90 --level;
91 }
92
93 return tmp + idx;
94 }
95
pnv_tce_build(struct iommu_table * tbl,long index,long npages,unsigned long uaddr,enum dma_data_direction direction,unsigned long attrs)96 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
97 unsigned long uaddr, enum dma_data_direction direction,
98 unsigned long attrs)
99 {
100 u64 proto_tce = iommu_direction_to_tce_perm(direction);
101 u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
102 long i;
103
104 if (proto_tce & TCE_PCI_WRITE)
105 proto_tce |= TCE_PCI_READ;
106
107 for (i = 0; i < npages; i++) {
108 unsigned long newtce = proto_tce |
109 ((rpn + i) << tbl->it_page_shift);
110 unsigned long idx = index - tbl->it_offset + i;
111
112 *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
113 }
114
115 return 0;
116 }
117
118 #ifdef CONFIG_IOMMU_API
pnv_tce_xchg(struct iommu_table * tbl,long index,unsigned long * hpa,enum dma_data_direction * direction,bool alloc)119 int pnv_tce_xchg(struct iommu_table *tbl, long index,
120 unsigned long *hpa, enum dma_data_direction *direction,
121 bool alloc)
122 {
123 u64 proto_tce = iommu_direction_to_tce_perm(*direction);
124 unsigned long newtce = *hpa | proto_tce, oldtce;
125 unsigned long idx = index - tbl->it_offset;
126 __be64 *ptce = NULL;
127
128 BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
129
130 if (*direction == DMA_NONE) {
131 ptce = pnv_tce(tbl, false, idx, false);
132 if (!ptce) {
133 *hpa = 0;
134 return 0;
135 }
136 }
137
138 if (!ptce) {
139 ptce = pnv_tce(tbl, false, idx, alloc);
140 if (!ptce)
141 return alloc ? H_HARDWARE : H_TOO_HARD;
142 }
143
144 if (newtce & TCE_PCI_WRITE)
145 newtce |= TCE_PCI_READ;
146
147 oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
148 *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
149 *direction = iommu_tce_direction(oldtce);
150
151 return 0;
152 }
153
pnv_tce_useraddrptr(struct iommu_table * tbl,long index,bool alloc)154 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
155 {
156 if (WARN_ON_ONCE(!tbl->it_userspace))
157 return NULL;
158
159 return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
160 }
161 #endif
162
pnv_tce_free(struct iommu_table * tbl,long index,long npages)163 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
164 {
165 long i;
166
167 for (i = 0; i < npages; i++) {
168 unsigned long idx = index - tbl->it_offset + i;
169 __be64 *ptce = pnv_tce(tbl, false, idx, false);
170
171 if (ptce)
172 *ptce = cpu_to_be64(0);
173 else
174 /* Skip the rest of the level */
175 i |= tbl->it_level_size - 1;
176 }
177 }
178
pnv_tce_get(struct iommu_table * tbl,long index)179 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
180 {
181 __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
182
183 if (!ptce)
184 return 0;
185
186 return be64_to_cpu(*ptce);
187 }
188
pnv_pci_ioda2_table_do_free_pages(__be64 * addr,unsigned long size,unsigned int levels)189 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
190 unsigned long size, unsigned int levels)
191 {
192 const unsigned long addr_ul = (unsigned long) addr &
193 ~(TCE_PCI_READ | TCE_PCI_WRITE);
194
195 if (levels) {
196 long i;
197 u64 *tmp = (u64 *) addr_ul;
198
199 for (i = 0; i < size; ++i) {
200 unsigned long hpa = be64_to_cpu(tmp[i]);
201
202 if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
203 continue;
204
205 pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
206 levels - 1);
207 }
208 }
209
210 free_pages(addr_ul, get_order(size << 3));
211 }
212
pnv_pci_ioda2_table_free_pages(struct iommu_table * tbl)213 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
214 {
215 const unsigned long size = tbl->it_indirect_levels ?
216 tbl->it_level_size : tbl->it_size;
217
218 if (!tbl->it_size)
219 return;
220
221 pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
222 tbl->it_indirect_levels);
223 if (tbl->it_userspace) {
224 pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
225 tbl->it_indirect_levels);
226 }
227 }
228
pnv_pci_ioda2_table_do_alloc_pages(int nid,unsigned int shift,unsigned int levels,unsigned long limit,unsigned long * current_offset,unsigned long * total_allocated)229 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
230 unsigned int levels, unsigned long limit,
231 unsigned long *current_offset, unsigned long *total_allocated)
232 {
233 __be64 *addr, *tmp;
234 unsigned long allocated = 1UL << shift;
235 unsigned int entries = 1UL << (shift - 3);
236 long i;
237
238 addr = pnv_alloc_tce_level(nid, shift);
239 *total_allocated += allocated;
240
241 --levels;
242 if (!levels) {
243 *current_offset += allocated;
244 return addr;
245 }
246
247 for (i = 0; i < entries; ++i) {
248 tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
249 levels, limit, current_offset, total_allocated);
250 if (!tmp)
251 break;
252
253 addr[i] = cpu_to_be64(__pa(tmp) |
254 TCE_PCI_READ | TCE_PCI_WRITE);
255
256 if (*current_offset >= limit)
257 break;
258 }
259
260 return addr;
261 }
262
pnv_pci_ioda2_table_alloc_pages(int nid,__u64 bus_offset,__u32 page_shift,__u64 window_size,__u32 levels,bool alloc_userspace_copy,struct iommu_table * tbl)263 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
264 __u32 page_shift, __u64 window_size, __u32 levels,
265 bool alloc_userspace_copy, struct iommu_table *tbl)
266 {
267 void *addr, *uas = NULL;
268 unsigned long offset = 0, level_shift, total_allocated = 0;
269 unsigned long total_allocated_uas = 0;
270 const unsigned int window_shift = ilog2(window_size);
271 unsigned int entries_shift = window_shift - page_shift;
272 unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
273 PAGE_SHIFT);
274 const unsigned long tce_table_size = 1UL << table_shift;
275
276 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
277 return -EINVAL;
278
279 if (!is_power_of_2(window_size))
280 return -EINVAL;
281
282 /* Adjust direct table size from window_size and levels */
283 entries_shift = (entries_shift + levels - 1) / levels;
284 level_shift = entries_shift + 3;
285 level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
286
287 if ((level_shift - 3) * levels + page_shift >= 55)
288 return -EINVAL;
289
290 /* Allocate TCE table */
291 addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
292 1, tce_table_size, &offset, &total_allocated);
293
294 /* addr==NULL means that the first level allocation failed */
295 if (!addr)
296 return -ENOMEM;
297
298 /*
299 * First level was allocated but some lower level failed as
300 * we did not allocate as much as we wanted,
301 * release partially allocated table.
302 */
303 if (levels == 1 && offset < tce_table_size)
304 goto free_tces_exit;
305
306 /* Allocate userspace view of the TCE table */
307 if (alloc_userspace_copy) {
308 offset = 0;
309 uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
310 1, tce_table_size, &offset,
311 &total_allocated_uas);
312 if (!uas)
313 goto free_tces_exit;
314 if (levels == 1 && (offset < tce_table_size ||
315 total_allocated_uas != total_allocated))
316 goto free_uas_exit;
317 }
318
319 /* Setup linux iommu table */
320 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
321 page_shift);
322 tbl->it_level_size = 1ULL << (level_shift - 3);
323 tbl->it_indirect_levels = levels - 1;
324 tbl->it_userspace = uas;
325 tbl->it_nid = nid;
326
327 pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
328 window_size, tce_table_size, bus_offset, tbl->it_base,
329 tbl->it_userspace, 1, levels);
330
331 return 0;
332
333 free_uas_exit:
334 pnv_pci_ioda2_table_do_free_pages(uas,
335 1ULL << (level_shift - 3), levels - 1);
336 free_tces_exit:
337 pnv_pci_ioda2_table_do_free_pages(addr,
338 1ULL << (level_shift - 3), levels - 1);
339
340 return -ENOMEM;
341 }
342
pnv_iommu_table_group_link_free(struct rcu_head * head)343 static void pnv_iommu_table_group_link_free(struct rcu_head *head)
344 {
345 struct iommu_table_group_link *tgl = container_of(head,
346 struct iommu_table_group_link, rcu);
347
348 kfree(tgl);
349 }
350
pnv_pci_unlink_table_and_group(struct iommu_table * tbl,struct iommu_table_group * table_group)351 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
352 struct iommu_table_group *table_group)
353 {
354 long i;
355 bool found;
356 struct iommu_table_group_link *tgl;
357
358 if (!tbl || !table_group)
359 return;
360
361 /* Remove link to a group from table's list of attached groups */
362 found = false;
363 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
364 if (tgl->table_group == table_group) {
365 list_del_rcu(&tgl->next);
366 call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
367 found = true;
368 break;
369 }
370 }
371 if (WARN_ON(!found))
372 return;
373
374 /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
375 found = false;
376 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
377 if (table_group->tables[i] == tbl) {
378 iommu_tce_table_put(tbl);
379 table_group->tables[i] = NULL;
380 found = true;
381 break;
382 }
383 }
384 WARN_ON(!found);
385 }
386
pnv_pci_link_table_and_group(int node,int num,struct iommu_table * tbl,struct iommu_table_group * table_group)387 long pnv_pci_link_table_and_group(int node, int num,
388 struct iommu_table *tbl,
389 struct iommu_table_group *table_group)
390 {
391 struct iommu_table_group_link *tgl = NULL;
392
393 if (WARN_ON(!tbl || !table_group))
394 return -EINVAL;
395
396 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
397 node);
398 if (!tgl)
399 return -ENOMEM;
400
401 tgl->table_group = table_group;
402 list_add_rcu(&tgl->next, &tbl->it_group_list);
403
404 table_group->tables[num] = iommu_tce_table_get(tbl);
405
406 return 0;
407 }
408