1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4  *
5  * Copyright 2018 IBM Corp.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
15 
16 #include <asm/iommu.h>
17 #include <asm/tce.h>
18 #include "pci.h"
19 
pnv_pci_setup_iommu_table(struct iommu_table * tbl,void * tce_mem,u64 tce_size,u64 dma_offset,unsigned int page_shift)20 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
21 		void *tce_mem, u64 tce_size,
22 		u64 dma_offset, unsigned int page_shift)
23 {
24 	tbl->it_blocksize = 16;
25 	tbl->it_base = (unsigned long)tce_mem;
26 	tbl->it_page_shift = page_shift;
27 	tbl->it_offset = dma_offset >> tbl->it_page_shift;
28 	tbl->it_index = 0;
29 	tbl->it_size = tce_size >> 3;
30 	tbl->it_busno = 0;
31 	tbl->it_type = TCE_PCI;
32 }
33 
pnv_alloc_tce_level(int nid,unsigned int shift)34 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
35 {
36 	struct page *tce_mem = NULL;
37 	__be64 *addr;
38 
39 	tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
40 			shift - PAGE_SHIFT);
41 	if (!tce_mem) {
42 		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
43 				shift);
44 		return NULL;
45 	}
46 	addr = page_address(tce_mem);
47 	memset(addr, 0, 1UL << shift);
48 
49 	return addr;
50 }
51 
52 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
53 		unsigned long size, unsigned int levels);
54 
pnv_tce(struct iommu_table * tbl,bool user,long idx,bool alloc)55 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
56 {
57 	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
58 	int  level = tbl->it_indirect_levels;
59 	const long shift = ilog2(tbl->it_level_size);
60 	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
61 
62 	while (level) {
63 		int n = (idx & mask) >> (level * shift);
64 		unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
65 
66 		if (!tce) {
67 			__be64 *tmp2;
68 
69 			if (!alloc)
70 				return NULL;
71 
72 			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
73 					ilog2(tbl->it_level_size) + 3);
74 			if (!tmp2)
75 				return NULL;
76 
77 			tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
78 			oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
79 					cpu_to_be64(tce)));
80 			if (oldtce) {
81 				pnv_pci_ioda2_table_do_free_pages(tmp2,
82 					ilog2(tbl->it_level_size) + 3, 1);
83 				tce = oldtce;
84 			}
85 		}
86 
87 		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
88 		idx &= ~mask;
89 		mask >>= shift;
90 		--level;
91 	}
92 
93 	return tmp + idx;
94 }
95 
pnv_tce_build(struct iommu_table * tbl,long index,long npages,unsigned long uaddr,enum dma_data_direction direction,unsigned long attrs)96 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
97 		unsigned long uaddr, enum dma_data_direction direction,
98 		unsigned long attrs)
99 {
100 	u64 proto_tce = iommu_direction_to_tce_perm(direction);
101 	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
102 	long i;
103 
104 	if (proto_tce & TCE_PCI_WRITE)
105 		proto_tce |= TCE_PCI_READ;
106 
107 	for (i = 0; i < npages; i++) {
108 		unsigned long newtce = proto_tce |
109 			((rpn + i) << tbl->it_page_shift);
110 		unsigned long idx = index - tbl->it_offset + i;
111 
112 		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
113 	}
114 
115 	return 0;
116 }
117 
118 #ifdef CONFIG_IOMMU_API
pnv_tce_xchg(struct iommu_table * tbl,long index,unsigned long * hpa,enum dma_data_direction * direction,bool alloc)119 int pnv_tce_xchg(struct iommu_table *tbl, long index,
120 		unsigned long *hpa, enum dma_data_direction *direction,
121 		bool alloc)
122 {
123 	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
124 	unsigned long newtce = *hpa | proto_tce, oldtce;
125 	unsigned long idx = index - tbl->it_offset;
126 	__be64 *ptce = NULL;
127 
128 	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
129 
130 	if (*direction == DMA_NONE) {
131 		ptce = pnv_tce(tbl, false, idx, false);
132 		if (!ptce) {
133 			*hpa = 0;
134 			return 0;
135 		}
136 	}
137 
138 	if (!ptce) {
139 		ptce = pnv_tce(tbl, false, idx, alloc);
140 		if (!ptce)
141 			return alloc ? H_HARDWARE : H_TOO_HARD;
142 	}
143 
144 	if (newtce & TCE_PCI_WRITE)
145 		newtce |= TCE_PCI_READ;
146 
147 	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
148 	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
149 	*direction = iommu_tce_direction(oldtce);
150 
151 	return 0;
152 }
153 
pnv_tce_useraddrptr(struct iommu_table * tbl,long index,bool alloc)154 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
155 {
156 	if (WARN_ON_ONCE(!tbl->it_userspace))
157 		return NULL;
158 
159 	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
160 }
161 #endif
162 
pnv_tce_free(struct iommu_table * tbl,long index,long npages)163 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
164 {
165 	long i;
166 
167 	for (i = 0; i < npages; i++) {
168 		unsigned long idx = index - tbl->it_offset + i;
169 		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
170 
171 		if (ptce)
172 			*ptce = cpu_to_be64(0);
173 		else
174 			/* Skip the rest of the level */
175 			i |= tbl->it_level_size - 1;
176 	}
177 }
178 
pnv_tce_get(struct iommu_table * tbl,long index)179 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
180 {
181 	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
182 
183 	if (!ptce)
184 		return 0;
185 
186 	return be64_to_cpu(*ptce);
187 }
188 
pnv_pci_ioda2_table_do_free_pages(__be64 * addr,unsigned long size,unsigned int levels)189 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
190 		unsigned long size, unsigned int levels)
191 {
192 	const unsigned long addr_ul = (unsigned long) addr &
193 			~(TCE_PCI_READ | TCE_PCI_WRITE);
194 
195 	if (levels) {
196 		long i;
197 		u64 *tmp = (u64 *) addr_ul;
198 
199 		for (i = 0; i < size; ++i) {
200 			unsigned long hpa = be64_to_cpu(tmp[i]);
201 
202 			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
203 				continue;
204 
205 			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
206 					levels - 1);
207 		}
208 	}
209 
210 	free_pages(addr_ul, get_order(size << 3));
211 }
212 
pnv_pci_ioda2_table_free_pages(struct iommu_table * tbl)213 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
214 {
215 	const unsigned long size = tbl->it_indirect_levels ?
216 			tbl->it_level_size : tbl->it_size;
217 
218 	if (!tbl->it_size)
219 		return;
220 
221 	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
222 			tbl->it_indirect_levels);
223 	if (tbl->it_userspace) {
224 		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
225 				tbl->it_indirect_levels);
226 	}
227 }
228 
pnv_pci_ioda2_table_do_alloc_pages(int nid,unsigned int shift,unsigned int levels,unsigned long limit,unsigned long * current_offset,unsigned long * total_allocated)229 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
230 		unsigned int levels, unsigned long limit,
231 		unsigned long *current_offset, unsigned long *total_allocated)
232 {
233 	__be64 *addr, *tmp;
234 	unsigned long allocated = 1UL << shift;
235 	unsigned int entries = 1UL << (shift - 3);
236 	long i;
237 
238 	addr = pnv_alloc_tce_level(nid, shift);
239 	*total_allocated += allocated;
240 
241 	--levels;
242 	if (!levels) {
243 		*current_offset += allocated;
244 		return addr;
245 	}
246 
247 	for (i = 0; i < entries; ++i) {
248 		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
249 				levels, limit, current_offset, total_allocated);
250 		if (!tmp)
251 			break;
252 
253 		addr[i] = cpu_to_be64(__pa(tmp) |
254 				TCE_PCI_READ | TCE_PCI_WRITE);
255 
256 		if (*current_offset >= limit)
257 			break;
258 	}
259 
260 	return addr;
261 }
262 
pnv_pci_ioda2_table_alloc_pages(int nid,__u64 bus_offset,__u32 page_shift,__u64 window_size,__u32 levels,bool alloc_userspace_copy,struct iommu_table * tbl)263 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
264 		__u32 page_shift, __u64 window_size, __u32 levels,
265 		bool alloc_userspace_copy, struct iommu_table *tbl)
266 {
267 	void *addr, *uas = NULL;
268 	unsigned long offset = 0, level_shift, total_allocated = 0;
269 	unsigned long total_allocated_uas = 0;
270 	const unsigned int window_shift = ilog2(window_size);
271 	unsigned int entries_shift = window_shift - page_shift;
272 	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
273 			PAGE_SHIFT);
274 	const unsigned long tce_table_size = 1UL << table_shift;
275 
276 	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
277 		return -EINVAL;
278 
279 	if (!is_power_of_2(window_size))
280 		return -EINVAL;
281 
282 	/* Adjust direct table size from window_size and levels */
283 	entries_shift = (entries_shift + levels - 1) / levels;
284 	level_shift = entries_shift + 3;
285 	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
286 
287 	if ((level_shift - 3) * levels + page_shift >= 55)
288 		return -EINVAL;
289 
290 	/* Allocate TCE table */
291 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
292 			1, tce_table_size, &offset, &total_allocated);
293 
294 	/* addr==NULL means that the first level allocation failed */
295 	if (!addr)
296 		return -ENOMEM;
297 
298 	/*
299 	 * First level was allocated but some lower level failed as
300 	 * we did not allocate as much as we wanted,
301 	 * release partially allocated table.
302 	 */
303 	if (levels == 1 && offset < tce_table_size)
304 		goto free_tces_exit;
305 
306 	/* Allocate userspace view of the TCE table */
307 	if (alloc_userspace_copy) {
308 		offset = 0;
309 		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
310 				1, tce_table_size, &offset,
311 				&total_allocated_uas);
312 		if (!uas)
313 			goto free_tces_exit;
314 		if (levels == 1 && (offset < tce_table_size ||
315 				total_allocated_uas != total_allocated))
316 			goto free_uas_exit;
317 	}
318 
319 	/* Setup linux iommu table */
320 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
321 			page_shift);
322 	tbl->it_level_size = 1ULL << (level_shift - 3);
323 	tbl->it_indirect_levels = levels - 1;
324 	tbl->it_userspace = uas;
325 	tbl->it_nid = nid;
326 
327 	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
328 			window_size, tce_table_size, bus_offset, tbl->it_base,
329 			tbl->it_userspace, 1, levels);
330 
331 	return 0;
332 
333 free_uas_exit:
334 	pnv_pci_ioda2_table_do_free_pages(uas,
335 			1ULL << (level_shift - 3), levels - 1);
336 free_tces_exit:
337 	pnv_pci_ioda2_table_do_free_pages(addr,
338 			1ULL << (level_shift - 3), levels - 1);
339 
340 	return -ENOMEM;
341 }
342 
pnv_iommu_table_group_link_free(struct rcu_head * head)343 static void pnv_iommu_table_group_link_free(struct rcu_head *head)
344 {
345 	struct iommu_table_group_link *tgl = container_of(head,
346 			struct iommu_table_group_link, rcu);
347 
348 	kfree(tgl);
349 }
350 
pnv_pci_unlink_table_and_group(struct iommu_table * tbl,struct iommu_table_group * table_group)351 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
352 		struct iommu_table_group *table_group)
353 {
354 	long i;
355 	bool found;
356 	struct iommu_table_group_link *tgl;
357 
358 	if (!tbl || !table_group)
359 		return;
360 
361 	/* Remove link to a group from table's list of attached groups */
362 	found = false;
363 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
364 		if (tgl->table_group == table_group) {
365 			list_del_rcu(&tgl->next);
366 			call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
367 			found = true;
368 			break;
369 		}
370 	}
371 	if (WARN_ON(!found))
372 		return;
373 
374 	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
375 	found = false;
376 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
377 		if (table_group->tables[i] == tbl) {
378 			iommu_tce_table_put(tbl);
379 			table_group->tables[i] = NULL;
380 			found = true;
381 			break;
382 		}
383 	}
384 	WARN_ON(!found);
385 }
386 
pnv_pci_link_table_and_group(int node,int num,struct iommu_table * tbl,struct iommu_table_group * table_group)387 long pnv_pci_link_table_and_group(int node, int num,
388 		struct iommu_table *tbl,
389 		struct iommu_table_group *table_group)
390 {
391 	struct iommu_table_group_link *tgl = NULL;
392 
393 	if (WARN_ON(!tbl || !table_group))
394 		return -EINVAL;
395 
396 	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
397 			node);
398 	if (!tgl)
399 		return -ENOMEM;
400 
401 	tgl->table_group = table_group;
402 	list_add_rcu(&tgl->next, &tbl->it_group_list);
403 
404 	table_group->tables[num] = iommu_tce_table_get(tbl);
405 
406 	return 0;
407 }
408