1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "habanalabs.h"
9 #include "include/hw_ip/mmu/mmu_general.h"
10 
11 #include <linux/genalloc.h>
12 #include <linux/slab.h>
13 
14 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
15 
get_pgt_info(struct hl_ctx * ctx,u64 hop_addr)16 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
17 {
18 	struct pgt_info *pgt_info = NULL;
19 
20 	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
21 				(unsigned long) hop_addr)
22 		if (hop_addr == pgt_info->shadow_addr)
23 			break;
24 
25 	return pgt_info;
26 }
27 
free_hop(struct hl_ctx * ctx,u64 hop_addr)28 static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
29 {
30 	struct hl_device *hdev = ctx->hdev;
31 	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
32 
33 	gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
34 			hdev->asic_prop.mmu_hop_table_size);
35 	hash_del(&pgt_info->node);
36 	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
37 	kfree(pgt_info);
38 }
39 
alloc_hop(struct hl_ctx * ctx)40 static u64 alloc_hop(struct hl_ctx *ctx)
41 {
42 	struct hl_device *hdev = ctx->hdev;
43 	struct asic_fixed_properties *prop = &hdev->asic_prop;
44 	struct pgt_info *pgt_info;
45 	u64 phys_addr, shadow_addr;
46 
47 	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
48 	if (!pgt_info)
49 		return ULLONG_MAX;
50 
51 	phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
52 					prop->mmu_hop_table_size);
53 	if (!phys_addr) {
54 		dev_err(hdev->dev, "failed to allocate page\n");
55 		goto pool_add_err;
56 	}
57 
58 	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
59 						GFP_KERNEL);
60 	if (!shadow_addr)
61 		goto shadow_err;
62 
63 	pgt_info->phys_addr = phys_addr;
64 	pgt_info->shadow_addr = shadow_addr;
65 	pgt_info->ctx = ctx;
66 	pgt_info->num_of_ptes = 0;
67 	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
68 
69 	return shadow_addr;
70 
71 shadow_err:
72 	gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
73 pool_add_err:
74 	kfree(pgt_info);
75 
76 	return ULLONG_MAX;
77 }
78 
get_phys_hop0_addr(struct hl_ctx * ctx)79 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
80 {
81 	return ctx->hdev->asic_prop.mmu_pgt_addr +
82 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
83 }
84 
get_hop0_addr(struct hl_ctx * ctx)85 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
86 {
87 	return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
88 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
89 }
90 
flush(struct hl_ctx * ctx)91 static inline void flush(struct hl_ctx *ctx)
92 {
93 	/* flush all writes from all cores to reach PCI */
94 	mb();
95 	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
96 }
97 
98 /* transform the value to physical address when writing to H/W */
write_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)99 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
100 {
101 	/*
102 	 * The value to write is actually the address of the next shadow hop +
103 	 * flags at the 12 LSBs.
104 	 * Hence in order to get the value to write to the physical PTE, we
105 	 * clear the 12 LSBs and translate the shadow hop to its associated
106 	 * physical hop, and add back the original 12 LSBs.
107 	 */
108 	u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) |
109 				(val & OFFSET_MASK);
110 
111 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
112 					get_phys_addr(ctx, shadow_pte_addr),
113 					phys_val);
114 
115 	*(u64 *) (uintptr_t) shadow_pte_addr = val;
116 }
117 
118 /* do not transform the value to physical address when writing to H/W */
write_final_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)119 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
120 					u64 val)
121 {
122 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
123 					get_phys_addr(ctx, shadow_pte_addr),
124 					val);
125 	*(u64 *) (uintptr_t) shadow_pte_addr = val;
126 }
127 
128 /* clear the last and present bits */
clear_pte(struct hl_ctx * ctx,u64 pte_addr)129 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
130 {
131 	/* no need to transform the value to physical address */
132 	write_final_pte(ctx, pte_addr, 0);
133 }
134 
get_pte(struct hl_ctx * ctx,u64 hop_addr)135 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
136 {
137 	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
138 }
139 
140 /*
141  * put_pte - decrement the num of ptes and free the hop if possible
142  *
143  * @ctx: pointer to the context structure
144  * @hop_addr: addr of the hop
145  *
146  * This function returns the number of ptes left on this hop. If the number is
147  * 0, it means the pte was freed.
148  */
put_pte(struct hl_ctx * ctx,u64 hop_addr)149 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
150 {
151 	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
152 	int num_of_ptes_left;
153 
154 	pgt_info->num_of_ptes--;
155 
156 	/*
157 	 * Need to save the number of ptes left because free_hop might free
158 	 * the pgt_info
159 	 */
160 	num_of_ptes_left = pgt_info->num_of_ptes;
161 	if (!num_of_ptes_left)
162 		free_hop(ctx, hop_addr);
163 
164 	return num_of_ptes_left;
165 }
166 
get_hopN_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 virt_addr,u64 mask,u64 shift)167 static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
168 					u64 virt_addr, u64 mask, u64 shift)
169 {
170 	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
171 			((virt_addr & mask) >> shift);
172 }
173 
get_hop0_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 vaddr)174 static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
175 {
176 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
177 }
178 
get_hop1_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 vaddr)179 static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
180 {
181 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
182 }
183 
get_hop2_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 vaddr)184 static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
185 {
186 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
187 }
188 
get_hop3_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 vaddr)189 static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
190 {
191 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
192 }
193 
get_hop4_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 vaddr)194 static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
195 {
196 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
197 }
198 
get_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte)199 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
200 {
201 	if (curr_pte & PAGE_PRESENT_MASK)
202 		return curr_pte & PHYS_ADDR_MASK;
203 	else
204 		return ULLONG_MAX;
205 }
206 
get_alloc_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte,bool * is_new_hop)207 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
208 						bool *is_new_hop)
209 {
210 	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
211 
212 	if (hop_addr == ULLONG_MAX) {
213 		hop_addr = alloc_hop(ctx);
214 		*is_new_hop = (hop_addr != ULLONG_MAX);
215 	}
216 
217 	return hop_addr;
218 }
219 
220 /* translates shadow address inside hop to a physical address */
get_phys_addr(struct hl_ctx * ctx,u64 shadow_addr)221 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
222 {
223 	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
224 	u64 shadow_hop_addr = shadow_addr & ~page_mask;
225 	u64 pte_offset = shadow_addr & page_mask;
226 	u64 phys_hop_addr;
227 
228 	if (shadow_hop_addr != get_hop0_addr(ctx))
229 		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
230 	else
231 		phys_hop_addr = get_phys_hop0_addr(ctx);
232 
233 	return phys_hop_addr + pte_offset;
234 }
235 
dram_default_mapping_init(struct hl_ctx * ctx)236 static int dram_default_mapping_init(struct hl_ctx *ctx)
237 {
238 	struct hl_device *hdev = ctx->hdev;
239 	struct asic_fixed_properties *prop = &hdev->asic_prop;
240 	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
241 		hop2_pte_addr, hop3_pte_addr, pte_val;
242 	int rc, i, j, hop3_allocated = 0;
243 
244 	if ((!hdev->dram_supports_virtual_memory) ||
245 			(!hdev->dram_default_page_mapping) ||
246 			(ctx->asid == HL_KERNEL_ASID_ID))
247 		return 0;
248 
249 	num_of_hop3 = prop->dram_size_for_default_page_mapping;
250 	do_div(num_of_hop3, prop->dram_page_size);
251 	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
252 
253 	/* add hop1 and hop2 */
254 	total_hops = num_of_hop3 + 2;
255 
256 	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
257 	if (!ctx->dram_default_hops)
258 		return -ENOMEM;
259 
260 	hop0_addr = get_hop0_addr(ctx);
261 
262 	hop1_addr = alloc_hop(ctx);
263 	if (hop1_addr == ULLONG_MAX) {
264 		dev_err(hdev->dev, "failed to alloc hop 1\n");
265 		rc = -ENOMEM;
266 		goto hop1_err;
267 	}
268 
269 	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
270 
271 	hop2_addr = alloc_hop(ctx);
272 	if (hop2_addr == ULLONG_MAX) {
273 		dev_err(hdev->dev, "failed to alloc hop 2\n");
274 		rc = -ENOMEM;
275 		goto hop2_err;
276 	}
277 
278 	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
279 
280 	for (i = 0 ; i < num_of_hop3 ; i++) {
281 		ctx->dram_default_hops[i] = alloc_hop(ctx);
282 		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
283 			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
284 			rc = -ENOMEM;
285 			goto hop3_err;
286 		}
287 		hop3_allocated++;
288 	}
289 
290 	/* need only pte 0 in hops 0 and 1 */
291 	pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
292 	write_pte(ctx, hop0_addr, pte_val);
293 
294 	pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
295 	write_pte(ctx, hop1_addr, pte_val);
296 	get_pte(ctx, hop1_addr);
297 
298 	hop2_pte_addr = hop2_addr;
299 	for (i = 0 ; i < num_of_hop3 ; i++) {
300 		pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) |
301 				PAGE_PRESENT_MASK;
302 		write_pte(ctx, hop2_pte_addr, pte_val);
303 		get_pte(ctx, hop2_addr);
304 		hop2_pte_addr += HL_PTE_SIZE;
305 	}
306 
307 	pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) |
308 			LAST_MASK | PAGE_PRESENT_MASK;
309 
310 	for (i = 0 ; i < num_of_hop3 ; i++) {
311 		hop3_pte_addr = ctx->dram_default_hops[i];
312 		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
313 			write_final_pte(ctx, hop3_pte_addr, pte_val);
314 			get_pte(ctx, ctx->dram_default_hops[i]);
315 			hop3_pte_addr += HL_PTE_SIZE;
316 		}
317 	}
318 
319 	flush(ctx);
320 
321 	return 0;
322 
323 hop3_err:
324 	for (i = 0 ; i < hop3_allocated ; i++)
325 		free_hop(ctx, ctx->dram_default_hops[i]);
326 
327 	free_hop(ctx, hop2_addr);
328 hop2_err:
329 	free_hop(ctx, hop1_addr);
330 hop1_err:
331 	kfree(ctx->dram_default_hops);
332 
333 	return rc;
334 }
335 
dram_default_mapping_fini(struct hl_ctx * ctx)336 static void dram_default_mapping_fini(struct hl_ctx *ctx)
337 {
338 	struct hl_device *hdev = ctx->hdev;
339 	struct asic_fixed_properties *prop = &hdev->asic_prop;
340 	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
341 		hop2_pte_addr, hop3_pte_addr;
342 	int i, j;
343 
344 	if ((!hdev->dram_supports_virtual_memory) ||
345 			(!hdev->dram_default_page_mapping) ||
346 			(ctx->asid == HL_KERNEL_ASID_ID))
347 		return;
348 
349 	num_of_hop3 = prop->dram_size_for_default_page_mapping;
350 	do_div(num_of_hop3, prop->dram_page_size);
351 	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
352 
353 	hop0_addr = get_hop0_addr(ctx);
354 	/* add hop1 and hop2 */
355 	total_hops = num_of_hop3 + 2;
356 	hop1_addr = ctx->dram_default_hops[total_hops - 1];
357 	hop2_addr = ctx->dram_default_hops[total_hops - 2];
358 
359 	for (i = 0 ; i < num_of_hop3 ; i++) {
360 		hop3_pte_addr = ctx->dram_default_hops[i];
361 		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
362 			clear_pte(ctx, hop3_pte_addr);
363 			put_pte(ctx, ctx->dram_default_hops[i]);
364 			hop3_pte_addr += HL_PTE_SIZE;
365 		}
366 	}
367 
368 	hop2_pte_addr = hop2_addr;
369 	hop2_pte_addr = hop2_addr;
370 	for (i = 0 ; i < num_of_hop3 ; i++) {
371 		clear_pte(ctx, hop2_pte_addr);
372 		put_pte(ctx, hop2_addr);
373 		hop2_pte_addr += HL_PTE_SIZE;
374 	}
375 
376 	clear_pte(ctx, hop1_addr);
377 	put_pte(ctx, hop1_addr);
378 	clear_pte(ctx, hop0_addr);
379 
380 	kfree(ctx->dram_default_hops);
381 
382 	flush(ctx);
383 }
384 
385 /**
386  * hl_mmu_init() - initialize the MMU module.
387  * @hdev: habanalabs device structure.
388  *
389  * This function does the following:
390  * - Create a pool of pages for pgt_infos.
391  * - Create a shadow table for pgt
392  *
393  * Return: 0 for success, non-zero for failure.
394  */
hl_mmu_init(struct hl_device * hdev)395 int hl_mmu_init(struct hl_device *hdev)
396 {
397 	struct asic_fixed_properties *prop = &hdev->asic_prop;
398 	int rc;
399 
400 	if (!hdev->mmu_enable)
401 		return 0;
402 
403 	/* MMU H/W init was already done in device hw_init() */
404 
405 	hdev->mmu_pgt_pool =
406 			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
407 
408 	if (!hdev->mmu_pgt_pool) {
409 		dev_err(hdev->dev, "Failed to create page gen pool\n");
410 		return -ENOMEM;
411 	}
412 
413 	rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
414 			prop->mmu_hop0_tables_total_size,
415 			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
416 			-1);
417 	if (rc) {
418 		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
419 		goto err_pool_add;
420 	}
421 
422 	hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
423 					prop->mmu_hop_table_size,
424 					GFP_KERNEL | __GFP_ZERO);
425 	if (!hdev->mmu_shadow_hop0) {
426 		rc = -ENOMEM;
427 		goto err_pool_add;
428 	}
429 
430 	return 0;
431 
432 err_pool_add:
433 	gen_pool_destroy(hdev->mmu_pgt_pool);
434 
435 	return rc;
436 }
437 
438 /**
439  * hl_mmu_fini() - release the MMU module.
440  * @hdev: habanalabs device structure.
441  *
442  * This function does the following:
443  * - Disable MMU in H/W.
444  * - Free the pgt_infos pool.
445  *
446  * All contexts should be freed before calling this function.
447  */
hl_mmu_fini(struct hl_device * hdev)448 void hl_mmu_fini(struct hl_device *hdev)
449 {
450 	if (!hdev->mmu_enable)
451 		return;
452 
453 	kvfree(hdev->mmu_shadow_hop0);
454 	gen_pool_destroy(hdev->mmu_pgt_pool);
455 
456 	/* MMU H/W fini will be done in device hw_fini() */
457 }
458 
459 /**
460  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
461  * @ctx: pointer to the context structure to initialize.
462  *
463  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
464  * page tables hops related to this context.
465  * Return: 0 on success, non-zero otherwise.
466  */
hl_mmu_ctx_init(struct hl_ctx * ctx)467 int hl_mmu_ctx_init(struct hl_ctx *ctx)
468 {
469 	struct hl_device *hdev = ctx->hdev;
470 
471 	if (!hdev->mmu_enable)
472 		return 0;
473 
474 	mutex_init(&ctx->mmu_lock);
475 	hash_init(ctx->mmu_phys_hash);
476 	hash_init(ctx->mmu_shadow_hash);
477 
478 	return dram_default_mapping_init(ctx);
479 }
480 
481 /*
482  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
483  *
484  * @ctx: pointer to the context structure
485  *
486  * This function does the following:
487  * - Free any pgts which were not freed yet
488  * - Free the mutex
489  * - Free DRAM default page mapping hops
490  */
hl_mmu_ctx_fini(struct hl_ctx * ctx)491 void hl_mmu_ctx_fini(struct hl_ctx *ctx)
492 {
493 	struct hl_device *hdev = ctx->hdev;
494 	struct pgt_info *pgt_info;
495 	struct hlist_node *tmp;
496 	int i;
497 
498 	if (!hdev->mmu_enable)
499 		return;
500 
501 	dram_default_mapping_fini(ctx);
502 
503 	if (!hash_empty(ctx->mmu_shadow_hash))
504 		dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
505 
506 	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
507 		dev_err(hdev->dev,
508 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
509 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
510 		free_hop(ctx, pgt_info->shadow_addr);
511 	}
512 
513 	mutex_destroy(&ctx->mmu_lock);
514 }
515 
_hl_mmu_unmap(struct hl_ctx * ctx,u64 virt_addr)516 static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
517 {
518 	struct hl_device *hdev = ctx->hdev;
519 	struct asic_fixed_properties *prop = &hdev->asic_prop;
520 	u64 hop0_addr = 0, hop0_pte_addr = 0,
521 		hop1_addr = 0, hop1_pte_addr = 0,
522 		hop2_addr = 0, hop2_pte_addr = 0,
523 		hop3_addr = 0, hop3_pte_addr = 0,
524 		hop4_addr = 0, hop4_pte_addr = 0,
525 		curr_pte;
526 	bool is_dram_addr, is_huge, clear_hop3 = true;
527 
528 	is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
529 				prop->va_space_dram_start_address,
530 				prop->va_space_dram_end_address);
531 
532 	hop0_addr = get_hop0_addr(ctx);
533 	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
534 
535 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
536 
537 	hop1_addr = get_next_hop_addr(ctx, curr_pte);
538 
539 	if (hop1_addr == ULLONG_MAX)
540 		goto not_mapped;
541 
542 	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
543 
544 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
545 
546 	hop2_addr = get_next_hop_addr(ctx, curr_pte);
547 
548 	if (hop2_addr == ULLONG_MAX)
549 		goto not_mapped;
550 
551 	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
552 
553 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
554 
555 	hop3_addr = get_next_hop_addr(ctx, curr_pte);
556 
557 	if (hop3_addr == ULLONG_MAX)
558 		goto not_mapped;
559 
560 	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
561 
562 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
563 
564 	is_huge = curr_pte & LAST_MASK;
565 
566 	if (is_dram_addr && !is_huge) {
567 		dev_err(hdev->dev,
568 				"DRAM unmapping should use huge pages only\n");
569 		return -EFAULT;
570 	}
571 
572 	if (!is_huge) {
573 		hop4_addr = get_next_hop_addr(ctx, curr_pte);
574 
575 		if (hop4_addr == ULLONG_MAX)
576 			goto not_mapped;
577 
578 		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
579 
580 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
581 
582 		clear_hop3 = false;
583 	}
584 
585 	if (hdev->dram_default_page_mapping && is_dram_addr) {
586 		u64 default_pte = (prop->mmu_dram_default_page_addr &
587 				PTE_PHYS_ADDR_MASK) | LAST_MASK |
588 					PAGE_PRESENT_MASK;
589 		if (curr_pte == default_pte) {
590 			dev_err(hdev->dev,
591 				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
592 					virt_addr);
593 			goto not_mapped;
594 		}
595 
596 		if (!(curr_pte & PAGE_PRESENT_MASK)) {
597 			dev_err(hdev->dev,
598 				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
599 					virt_addr);
600 			goto not_mapped;
601 		}
602 
603 		write_final_pte(ctx, hop3_pte_addr, default_pte);
604 		put_pte(ctx, hop3_addr);
605 	} else {
606 		if (!(curr_pte & PAGE_PRESENT_MASK))
607 			goto not_mapped;
608 
609 		if (hop4_addr)
610 			clear_pte(ctx, hop4_pte_addr);
611 		else
612 			clear_pte(ctx, hop3_pte_addr);
613 
614 		if (hop4_addr && !put_pte(ctx, hop4_addr))
615 			clear_hop3 = true;
616 
617 		if (!clear_hop3)
618 			goto flush;
619 
620 		clear_pte(ctx, hop3_pte_addr);
621 
622 		if (put_pte(ctx, hop3_addr))
623 			goto flush;
624 
625 		clear_pte(ctx, hop2_pte_addr);
626 
627 		if (put_pte(ctx, hop2_addr))
628 			goto flush;
629 
630 		clear_pte(ctx, hop1_pte_addr);
631 
632 		if (put_pte(ctx, hop1_addr))
633 			goto flush;
634 
635 		clear_pte(ctx, hop0_pte_addr);
636 	}
637 
638 flush:
639 	flush(ctx);
640 
641 	return 0;
642 
643 not_mapped:
644 	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
645 		virt_addr);
646 
647 	return -EINVAL;
648 }
649 
650 /*
651  * hl_mmu_unmap - unmaps a virtual addr
652  *
653  * @ctx: pointer to the context structure
654  * @virt_addr: virt addr to map from
655  * @page_size: size of the page to unmap
656  *
657  * This function does the following:
658  * - Check that the virt addr is mapped
659  * - Unmap the virt addr and frees pgts if possible
660  * - Returns 0 on success, -EINVAL if the given addr is not mapped
661  *
662  * Because this function changes the page tables in the device and because it
663  * changes the MMU hash, it must be protected by a lock.
664  * However, because it maps only a single page, the lock should be implemented
665  * in a higher level in order to protect the entire mapping of the memory area
666  */
hl_mmu_unmap(struct hl_ctx * ctx,u64 virt_addr,u32 page_size)667 int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
668 {
669 	struct hl_device *hdev = ctx->hdev;
670 	u64 real_virt_addr;
671 	u32 real_page_size, npages;
672 	int i, rc;
673 
674 	if (!hdev->mmu_enable)
675 		return 0;
676 
677 	/*
678 	 * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
679 	 * is bigger, we break it to sub-pages and unmap them separately.
680 	 */
681 	if ((page_size % PAGE_SIZE_2MB) == 0) {
682 		real_page_size = PAGE_SIZE_2MB;
683 	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
684 		real_page_size = PAGE_SIZE_4KB;
685 	} else {
686 		dev_err(hdev->dev,
687 			"page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
688 				page_size);
689 
690 		return -EFAULT;
691 	}
692 
693 	npages = page_size / real_page_size;
694 	real_virt_addr = virt_addr;
695 
696 	for (i = 0 ; i < npages ; i++) {
697 		rc = _hl_mmu_unmap(ctx, real_virt_addr);
698 		if (rc)
699 			return rc;
700 
701 		real_virt_addr += real_page_size;
702 	}
703 
704 	return 0;
705 }
706 
_hl_mmu_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size)707 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
708 		u32 page_size)
709 {
710 	struct hl_device *hdev = ctx->hdev;
711 	struct asic_fixed_properties *prop = &hdev->asic_prop;
712 	u64 hop0_addr = 0, hop0_pte_addr = 0,
713 		hop1_addr = 0, hop1_pte_addr = 0,
714 		hop2_addr = 0, hop2_pte_addr = 0,
715 		hop3_addr = 0, hop3_pte_addr = 0,
716 		hop4_addr = 0, hop4_pte_addr = 0,
717 		curr_pte = 0;
718 	bool hop1_new = false, hop2_new = false, hop3_new = false,
719 		hop4_new = false, is_huge, is_dram_addr;
720 	int rc = -ENOMEM;
721 
722 	/*
723 	 * This mapping function can map a 4KB/2MB page. For 2MB page there are
724 	 * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
725 	 * pages only but user memory could have been allocated with one of the
726 	 * two page sizes. Since this is a common code for all the three cases,
727 	 * we need this hugs page check.
728 	 */
729 	is_huge = page_size == PAGE_SIZE_2MB;
730 
731 	is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
732 				prop->va_space_dram_start_address,
733 				prop->va_space_dram_end_address);
734 
735 	if (is_dram_addr && !is_huge) {
736 		dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
737 		return -EFAULT;
738 	}
739 
740 	hop0_addr = get_hop0_addr(ctx);
741 	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
742 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
743 
744 	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
745 	if (hop1_addr == ULLONG_MAX)
746 		goto err;
747 
748 	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
749 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
750 
751 	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
752 	if (hop2_addr == ULLONG_MAX)
753 		goto err;
754 
755 	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
756 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
757 
758 	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
759 	if (hop3_addr == ULLONG_MAX)
760 		goto err;
761 
762 	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
763 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
764 
765 	if (!is_huge) {
766 		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
767 		if (hop4_addr == ULLONG_MAX)
768 			goto err;
769 
770 		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
771 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
772 	}
773 
774 	if (hdev->dram_default_page_mapping && is_dram_addr) {
775 		u64 default_pte = (prop->mmu_dram_default_page_addr &
776 					PTE_PHYS_ADDR_MASK) | LAST_MASK |
777 						PAGE_PRESENT_MASK;
778 
779 		if (curr_pte != default_pte) {
780 			dev_err(hdev->dev,
781 				"DRAM: mapping already exists for virt_addr 0x%llx\n",
782 					virt_addr);
783 			rc = -EINVAL;
784 			goto err;
785 		}
786 
787 		if (hop1_new || hop2_new || hop3_new || hop4_new) {
788 			dev_err(hdev->dev,
789 				"DRAM mapping should not allocate more hops\n");
790 			rc = -EFAULT;
791 			goto err;
792 		}
793 	} else if (curr_pte & PAGE_PRESENT_MASK) {
794 		dev_err(hdev->dev,
795 			"mapping already exists for virt_addr 0x%llx\n",
796 				virt_addr);
797 
798 		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
799 			*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
800 		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
801 			*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
802 		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
803 			*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
804 		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
805 			*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
806 
807 		if (!is_huge)
808 			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
809 				*(u64 *) (uintptr_t) hop4_pte_addr,
810 				hop4_pte_addr);
811 
812 		rc = -EINVAL;
813 		goto err;
814 	}
815 
816 	curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
817 			| PAGE_PRESENT_MASK;
818 
819 	if (is_huge)
820 		write_final_pte(ctx, hop3_pte_addr, curr_pte);
821 	else
822 		write_final_pte(ctx, hop4_pte_addr, curr_pte);
823 
824 	if (hop1_new) {
825 		curr_pte =
826 			(hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
827 		write_pte(ctx, hop0_pte_addr, curr_pte);
828 	}
829 	if (hop2_new) {
830 		curr_pte =
831 			(hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
832 		write_pte(ctx, hop1_pte_addr, curr_pte);
833 		get_pte(ctx, hop1_addr);
834 	}
835 	if (hop3_new) {
836 		curr_pte =
837 			(hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
838 		write_pte(ctx, hop2_pte_addr, curr_pte);
839 		get_pte(ctx, hop2_addr);
840 	}
841 
842 	if (!is_huge) {
843 		if (hop4_new) {
844 			curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
845 					PAGE_PRESENT_MASK;
846 			write_pte(ctx, hop3_pte_addr, curr_pte);
847 			get_pte(ctx, hop3_addr);
848 		}
849 
850 		get_pte(ctx, hop4_addr);
851 	} else {
852 		get_pte(ctx, hop3_addr);
853 	}
854 
855 	flush(ctx);
856 
857 	return 0;
858 
859 err:
860 	if (hop4_new)
861 		free_hop(ctx, hop4_addr);
862 	if (hop3_new)
863 		free_hop(ctx, hop3_addr);
864 	if (hop2_new)
865 		free_hop(ctx, hop2_addr);
866 	if (hop1_new)
867 		free_hop(ctx, hop1_addr);
868 
869 	return rc;
870 }
871 
872 /*
873  * hl_mmu_map - maps a virtual addr to physical addr
874  *
875  * @ctx: pointer to the context structure
876  * @virt_addr: virt addr to map from
877  * @phys_addr: phys addr to map to
878  * @page_size: physical page size
879  *
880  * This function does the following:
881  * - Check that the virt addr is not mapped
882  * - Allocate pgts as necessary in order to map the virt addr to the phys
883  * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
884  *
885  * Because this function changes the page tables in the device and because it
886  * changes the MMU hash, it must be protected by a lock.
887  * However, because it maps only a single page, the lock should be implemented
888  * in a higher level in order to protect the entire mapping of the memory area
889  */
hl_mmu_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size)890 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
891 {
892 	struct hl_device *hdev = ctx->hdev;
893 	u64 real_virt_addr, real_phys_addr;
894 	u32 real_page_size, npages;
895 	int i, rc, mapped_cnt = 0;
896 
897 	if (!hdev->mmu_enable)
898 		return 0;
899 
900 	/*
901 	 * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
902 	 * is bigger, we break it to sub-pages and map them separately.
903 	 */
904 	if ((page_size % PAGE_SIZE_2MB) == 0) {
905 		real_page_size = PAGE_SIZE_2MB;
906 	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
907 		real_page_size = PAGE_SIZE_4KB;
908 	} else {
909 		dev_err(hdev->dev,
910 			"page size of %u is not 4KB nor 2MB aligned, can't map\n",
911 				page_size);
912 
913 		return -EFAULT;
914 	}
915 
916 	WARN_ONCE((phys_addr & (real_page_size - 1)),
917 		"Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
918 		phys_addr, real_page_size);
919 
920 	npages = page_size / real_page_size;
921 	real_virt_addr = virt_addr;
922 	real_phys_addr = phys_addr;
923 
924 	for (i = 0 ; i < npages ; i++) {
925 		rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
926 				real_page_size);
927 		if (rc)
928 			goto err;
929 
930 		real_virt_addr += real_page_size;
931 		real_phys_addr += real_page_size;
932 		mapped_cnt++;
933 	}
934 
935 	return 0;
936 
937 err:
938 	real_virt_addr = virt_addr;
939 	for (i = 0 ; i < mapped_cnt ; i++) {
940 		if (_hl_mmu_unmap(ctx, real_virt_addr))
941 			dev_warn_ratelimited(hdev->dev,
942 				"failed to unmap va: 0x%llx\n", real_virt_addr);
943 
944 		real_virt_addr += real_page_size;
945 	}
946 
947 	return rc;
948 }
949 
950 /*
951  * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
952  *
953  * @ctx: pointer to the context structure
954  *
955  */
hl_mmu_swap_out(struct hl_ctx * ctx)956 void hl_mmu_swap_out(struct hl_ctx *ctx)
957 {
958 
959 }
960 
961 /*
962  * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
963  *
964  * @ctx: pointer to the context structure
965  *
966  */
hl_mmu_swap_in(struct hl_ctx * ctx)967 void hl_mmu_swap_in(struct hl_ctx *ctx)
968 {
969 
970 }
971