1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2015 Intel Corporation.
6  *
7  * Intel SCIF driver.
8  */
9 #include <linux/intel-iommu.h>
10 #include <linux/pagemap.h>
11 #include <linux/sched/mm.h>
12 #include <linux/sched/signal.h>
13 
14 #include "scif_main.h"
15 #include "scif_map.h"
16 
17 /* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
18 #define SCIF_MAP_ULIMIT 0x40
19 
20 bool scif_ulimit_check = 1;
21 
22 /**
23  * scif_rma_ep_init:
24  * @ep: end point
25  *
26  * Initialize RMA per EP data structures.
27  */
scif_rma_ep_init(struct scif_endpt * ep)28 void scif_rma_ep_init(struct scif_endpt *ep)
29 {
30 	struct scif_endpt_rma_info *rma = &ep->rma_info;
31 
32 	mutex_init(&rma->rma_lock);
33 	init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
34 	spin_lock_init(&rma->tc_lock);
35 	mutex_init(&rma->mmn_lock);
36 	INIT_LIST_HEAD(&rma->reg_list);
37 	INIT_LIST_HEAD(&rma->remote_reg_list);
38 	atomic_set(&rma->tw_refcount, 0);
39 	atomic_set(&rma->tcw_refcount, 0);
40 	atomic_set(&rma->tcw_total_pages, 0);
41 	atomic_set(&rma->fence_refcount, 0);
42 
43 	rma->async_list_del = 0;
44 	rma->dma_chan = NULL;
45 	INIT_LIST_HEAD(&rma->mmn_list);
46 	INIT_LIST_HEAD(&rma->vma_list);
47 	init_waitqueue_head(&rma->markwq);
48 }
49 
50 /**
51  * scif_rma_ep_can_uninit:
52  * @ep: end point
53  *
54  * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
55  */
scif_rma_ep_can_uninit(struct scif_endpt * ep)56 int scif_rma_ep_can_uninit(struct scif_endpt *ep)
57 {
58 	int ret = 0;
59 
60 	mutex_lock(&ep->rma_info.rma_lock);
61 	/* Destroy RMA Info only if both lists are empty */
62 	if (list_empty(&ep->rma_info.reg_list) &&
63 	    list_empty(&ep->rma_info.remote_reg_list) &&
64 	    list_empty(&ep->rma_info.mmn_list) &&
65 	    !atomic_read(&ep->rma_info.tw_refcount) &&
66 	    !atomic_read(&ep->rma_info.tcw_refcount) &&
67 	    !atomic_read(&ep->rma_info.fence_refcount))
68 		ret = 1;
69 	mutex_unlock(&ep->rma_info.rma_lock);
70 	return ret;
71 }
72 
73 /**
74  * scif_create_pinned_pages:
75  * @nr_pages: number of pages in window
76  * @prot: read/write protection
77  *
78  * Allocate and prepare a set of pinned pages.
79  */
80 static struct scif_pinned_pages *
scif_create_pinned_pages(int nr_pages,int prot)81 scif_create_pinned_pages(int nr_pages, int prot)
82 {
83 	struct scif_pinned_pages *pin;
84 
85 	might_sleep();
86 	pin = scif_zalloc(sizeof(*pin));
87 	if (!pin)
88 		goto error;
89 
90 	pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
91 	if (!pin->pages)
92 		goto error_free_pinned_pages;
93 
94 	pin->prot = prot;
95 	pin->magic = SCIFEP_MAGIC;
96 	return pin;
97 
98 error_free_pinned_pages:
99 	scif_free(pin, sizeof(*pin));
100 error:
101 	return NULL;
102 }
103 
104 /**
105  * scif_destroy_pinned_pages:
106  * @pin: A set of pinned pages.
107  *
108  * Deallocate resources for pinned pages.
109  */
scif_destroy_pinned_pages(struct scif_pinned_pages * pin)110 static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
111 {
112 	int j;
113 	int writeable = pin->prot & SCIF_PROT_WRITE;
114 	int kernel = SCIF_MAP_KERNEL & pin->map_flags;
115 
116 	for (j = 0; j < pin->nr_pages; j++) {
117 		if (pin->pages[j] && !kernel) {
118 			if (writeable)
119 				SetPageDirty(pin->pages[j]);
120 			put_page(pin->pages[j]);
121 		}
122 	}
123 
124 	scif_free(pin->pages,
125 		  pin->nr_pages * sizeof(*pin->pages));
126 	scif_free(pin, sizeof(*pin));
127 	return 0;
128 }
129 
130 /*
131  * scif_create_window:
132  * @ep: end point
133  * @nr_pages: number of pages
134  * @offset: registration offset
135  * @temp: true if a temporary window is being created
136  *
137  * Allocate and prepare a self registration window.
138  */
scif_create_window(struct scif_endpt * ep,int nr_pages,s64 offset,bool temp)139 struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
140 				       s64 offset, bool temp)
141 {
142 	struct scif_window *window;
143 
144 	might_sleep();
145 	window = scif_zalloc(sizeof(*window));
146 	if (!window)
147 		goto error;
148 
149 	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
150 	if (!window->dma_addr)
151 		goto error_free_window;
152 
153 	window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
154 	if (!window->num_pages)
155 		goto error_free_window;
156 
157 	window->offset = offset;
158 	window->ep = (u64)ep;
159 	window->magic = SCIFEP_MAGIC;
160 	window->reg_state = OP_IDLE;
161 	init_waitqueue_head(&window->regwq);
162 	window->unreg_state = OP_IDLE;
163 	init_waitqueue_head(&window->unregwq);
164 	INIT_LIST_HEAD(&window->list);
165 	window->type = SCIF_WINDOW_SELF;
166 	window->temp = temp;
167 	return window;
168 
169 error_free_window:
170 	scif_free(window->dma_addr,
171 		  nr_pages * sizeof(*window->dma_addr));
172 	scif_free(window, sizeof(*window));
173 error:
174 	return NULL;
175 }
176 
177 /**
178  * scif_destroy_incomplete_window:
179  * @ep: end point
180  * @window: registration window
181  *
182  * Deallocate resources for self window.
183  */
scif_destroy_incomplete_window(struct scif_endpt * ep,struct scif_window * window)184 static void scif_destroy_incomplete_window(struct scif_endpt *ep,
185 					   struct scif_window *window)
186 {
187 	int err;
188 	int nr_pages = window->nr_pages;
189 	struct scif_allocmsg *alloc = &window->alloc_handle;
190 	struct scifmsg msg;
191 
192 retry:
193 	/* Wait for a SCIF_ALLOC_GNT/REJ message */
194 	err = wait_event_timeout(alloc->allocwq,
195 				 alloc->state != OP_IN_PROGRESS,
196 				 SCIF_NODE_ALIVE_TIMEOUT);
197 	if (!err && scifdev_alive(ep))
198 		goto retry;
199 
200 	mutex_lock(&ep->rma_info.rma_lock);
201 	if (alloc->state == OP_COMPLETED) {
202 		msg.uop = SCIF_FREE_VIRT;
203 		msg.src = ep->port;
204 		msg.payload[0] = ep->remote_ep;
205 		msg.payload[1] = window->alloc_handle.vaddr;
206 		msg.payload[2] = (u64)window;
207 		msg.payload[3] = SCIF_REGISTER;
208 		_scif_nodeqp_send(ep->remote_dev, &msg);
209 	}
210 	mutex_unlock(&ep->rma_info.rma_lock);
211 
212 	scif_free_window_offset(ep, window, window->offset);
213 	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
214 	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
215 	scif_free(window, sizeof(*window));
216 }
217 
218 /**
219  * scif_unmap_window:
220  * @remote_dev: SCIF remote device
221  * @window: registration window
222  *
223  * Delete any DMA mappings created for a registered self window
224  */
scif_unmap_window(struct scif_dev * remote_dev,struct scif_window * window)225 void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
226 {
227 	int j;
228 
229 	if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
230 		if (window->st) {
231 			dma_unmap_sg(&remote_dev->sdev->dev,
232 				     window->st->sgl, window->st->nents,
233 				     DMA_BIDIRECTIONAL);
234 			sg_free_table(window->st);
235 			kfree(window->st);
236 			window->st = NULL;
237 		}
238 	} else {
239 		for (j = 0; j < window->nr_contig_chunks; j++) {
240 			if (window->dma_addr[j]) {
241 				scif_unmap_single(window->dma_addr[j],
242 						  remote_dev,
243 						  window->num_pages[j] <<
244 						  PAGE_SHIFT);
245 				window->dma_addr[j] = 0x0;
246 			}
247 		}
248 	}
249 }
250 
__scif_acquire_mm(void)251 static inline struct mm_struct *__scif_acquire_mm(void)
252 {
253 	if (scif_ulimit_check)
254 		return get_task_mm(current);
255 	return NULL;
256 }
257 
__scif_release_mm(struct mm_struct * mm)258 static inline void __scif_release_mm(struct mm_struct *mm)
259 {
260 	if (mm)
261 		mmput(mm);
262 }
263 
264 static inline int
__scif_dec_pinned_vm_lock(struct mm_struct * mm,int nr_pages)265 __scif_dec_pinned_vm_lock(struct mm_struct *mm,
266 			  int nr_pages)
267 {
268 	if (!mm || !nr_pages || !scif_ulimit_check)
269 		return 0;
270 
271 	atomic64_sub(nr_pages, &mm->pinned_vm);
272 	return 0;
273 }
274 
__scif_check_inc_pinned_vm(struct mm_struct * mm,int nr_pages)275 static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
276 					     int nr_pages)
277 {
278 	unsigned long locked, lock_limit;
279 
280 	if (!mm || !nr_pages || !scif_ulimit_check)
281 		return 0;
282 
283 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
284 	locked = atomic64_add_return(nr_pages, &mm->pinned_vm);
285 
286 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
287 		atomic64_sub(nr_pages, &mm->pinned_vm);
288 		dev_err(scif_info.mdev.this_device,
289 			"locked(%lu) > lock_limit(%lu)\n",
290 			locked, lock_limit);
291 		return -ENOMEM;
292 	}
293 	return 0;
294 }
295 
296 /**
297  * scif_destroy_window:
298  * @ep: end point
299  * @window: registration window
300  *
301  * Deallocate resources for self window.
302  */
scif_destroy_window(struct scif_endpt * ep,struct scif_window * window)303 int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
304 {
305 	int j;
306 	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
307 	int nr_pages = window->nr_pages;
308 
309 	might_sleep();
310 	if (!window->temp && window->mm) {
311 		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages);
312 		__scif_release_mm(window->mm);
313 		window->mm = NULL;
314 	}
315 
316 	scif_free_window_offset(ep, window, window->offset);
317 	scif_unmap_window(ep->remote_dev, window);
318 	/*
319 	 * Decrement references for this set of pinned pages from
320 	 * this window.
321 	 */
322 	j = atomic_sub_return(1, &pinned_pages->ref_count);
323 	if (j < 0)
324 		dev_err(scif_info.mdev.this_device,
325 			"%s %d incorrect ref count %d\n",
326 			__func__, __LINE__, j);
327 	/*
328 	 * If the ref count for pinned_pages is zero then someone
329 	 * has already called scif_unpin_pages() for it and we should
330 	 * destroy the page cache.
331 	 */
332 	if (!j)
333 		scif_destroy_pinned_pages(window->pinned_pages);
334 	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
335 	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
336 	window->magic = 0;
337 	scif_free(window, sizeof(*window));
338 	return 0;
339 }
340 
341 /**
342  * scif_create_remote_lookup:
343  * @remote_dev: SCIF remote device
344  * @window: remote window
345  *
346  * Allocate and prepare lookup entries for the remote
347  * end to copy over the physical addresses.
348  * Returns 0 on success and appropriate errno on failure.
349  */
scif_create_remote_lookup(struct scif_dev * remote_dev,struct scif_window * window)350 static int scif_create_remote_lookup(struct scif_dev *remote_dev,
351 				     struct scif_window *window)
352 {
353 	int i, j, err = 0;
354 	int nr_pages = window->nr_pages;
355 	bool vmalloc_dma_phys, vmalloc_num_pages;
356 
357 	might_sleep();
358 	/* Map window */
359 	err = scif_map_single(&window->mapped_offset,
360 			      window, remote_dev, sizeof(*window));
361 	if (err)
362 		goto error_window;
363 
364 	/* Compute the number of lookup entries. 21 == 2MB Shift */
365 	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
366 					((2) * 1024 * 1024)) >> 21;
367 
368 	window->dma_addr_lookup.lookup =
369 		scif_alloc_coherent(&window->dma_addr_lookup.offset,
370 				    remote_dev, window->nr_lookup *
371 				    sizeof(*window->dma_addr_lookup.lookup),
372 				    GFP_KERNEL | __GFP_ZERO);
373 	if (!window->dma_addr_lookup.lookup) {
374 		err = -ENOMEM;
375 		goto error_window;
376 	}
377 
378 	window->num_pages_lookup.lookup =
379 		scif_alloc_coherent(&window->num_pages_lookup.offset,
380 				    remote_dev, window->nr_lookup *
381 				    sizeof(*window->num_pages_lookup.lookup),
382 				    GFP_KERNEL | __GFP_ZERO);
383 	if (!window->num_pages_lookup.lookup) {
384 		err = -ENOMEM;
385 		goto error_window;
386 	}
387 
388 	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
389 	vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
390 
391 	/* Now map each of the pages containing physical addresses */
392 	for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
393 		err = scif_map_page(&window->dma_addr_lookup.lookup[j],
394 				    vmalloc_dma_phys ?
395 				    vmalloc_to_page(&window->dma_addr[i]) :
396 				    virt_to_page(&window->dma_addr[i]),
397 				    remote_dev);
398 		if (err)
399 			goto error_window;
400 		err = scif_map_page(&window->num_pages_lookup.lookup[j],
401 				    vmalloc_num_pages ?
402 				    vmalloc_to_page(&window->num_pages[i]) :
403 				    virt_to_page(&window->num_pages[i]),
404 				    remote_dev);
405 		if (err)
406 			goto error_window;
407 	}
408 	return 0;
409 error_window:
410 	return err;
411 }
412 
413 /**
414  * scif_destroy_remote_lookup:
415  * @remote_dev: SCIF remote device
416  * @window: remote window
417  *
418  * Destroy lookup entries used for the remote
419  * end to copy over the physical addresses.
420  */
scif_destroy_remote_lookup(struct scif_dev * remote_dev,struct scif_window * window)421 static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
422 				       struct scif_window *window)
423 {
424 	int i, j;
425 
426 	if (window->nr_lookup) {
427 		struct scif_rma_lookup *lup = &window->dma_addr_lookup;
428 		struct scif_rma_lookup *npup = &window->num_pages_lookup;
429 
430 		for (i = 0, j = 0; i < window->nr_pages;
431 			i += SCIF_NR_ADDR_IN_PAGE, j++) {
432 			if (lup->lookup && lup->lookup[j])
433 				scif_unmap_single(lup->lookup[j],
434 						  remote_dev,
435 						  PAGE_SIZE);
436 			if (npup->lookup && npup->lookup[j])
437 				scif_unmap_single(npup->lookup[j],
438 						  remote_dev,
439 						  PAGE_SIZE);
440 		}
441 		if (lup->lookup)
442 			scif_free_coherent(lup->lookup, lup->offset,
443 					   remote_dev, window->nr_lookup *
444 					   sizeof(*lup->lookup));
445 		if (npup->lookup)
446 			scif_free_coherent(npup->lookup, npup->offset,
447 					   remote_dev, window->nr_lookup *
448 					   sizeof(*npup->lookup));
449 		if (window->mapped_offset)
450 			scif_unmap_single(window->mapped_offset,
451 					  remote_dev, sizeof(*window));
452 		window->nr_lookup = 0;
453 	}
454 }
455 
456 /**
457  * scif_create_remote_window:
458  * @ep: end point
459  * @nr_pages: number of pages in window
460  *
461  * Allocate and prepare a remote registration window.
462  */
463 static struct scif_window *
scif_create_remote_window(struct scif_dev * scifdev,int nr_pages)464 scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
465 {
466 	struct scif_window *window;
467 
468 	might_sleep();
469 	window = scif_zalloc(sizeof(*window));
470 	if (!window)
471 		goto error_ret;
472 
473 	window->magic = SCIFEP_MAGIC;
474 	window->nr_pages = nr_pages;
475 
476 	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
477 	if (!window->dma_addr)
478 		goto error_window;
479 
480 	window->num_pages = scif_zalloc(nr_pages *
481 					sizeof(*window->num_pages));
482 	if (!window->num_pages)
483 		goto error_window;
484 
485 	if (scif_create_remote_lookup(scifdev, window))
486 		goto error_window;
487 
488 	window->type = SCIF_WINDOW_PEER;
489 	window->unreg_state = OP_IDLE;
490 	INIT_LIST_HEAD(&window->list);
491 	return window;
492 error_window:
493 	scif_destroy_remote_window(window);
494 error_ret:
495 	return NULL;
496 }
497 
498 /**
499  * scif_destroy_remote_window:
500  * @ep: end point
501  * @window: remote registration window
502  *
503  * Deallocate resources for remote window.
504  */
505 void
scif_destroy_remote_window(struct scif_window * window)506 scif_destroy_remote_window(struct scif_window *window)
507 {
508 	scif_free(window->dma_addr, window->nr_pages *
509 		  sizeof(*window->dma_addr));
510 	scif_free(window->num_pages, window->nr_pages *
511 		  sizeof(*window->num_pages));
512 	window->magic = 0;
513 	scif_free(window, sizeof(*window));
514 }
515 
516 /**
517  * scif_iommu_map: create DMA mappings if the IOMMU is enabled
518  * @remote_dev: SCIF remote device
519  * @window: remote registration window
520  *
521  * Map the physical pages using dma_map_sg(..) and then detect the number
522  * of contiguous DMA mappings allocated
523  */
scif_iommu_map(struct scif_dev * remote_dev,struct scif_window * window)524 static int scif_iommu_map(struct scif_dev *remote_dev,
525 			  struct scif_window *window)
526 {
527 	struct scatterlist *sg;
528 	int i, err;
529 	scif_pinned_pages_t pin = window->pinned_pages;
530 
531 	window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
532 	if (!window->st)
533 		return -ENOMEM;
534 
535 	err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
536 	if (err)
537 		return err;
538 
539 	for_each_sg(window->st->sgl, sg, window->st->nents, i)
540 		sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
541 
542 	err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
543 			 window->st->nents, DMA_BIDIRECTIONAL);
544 	if (!err)
545 		return -ENOMEM;
546 	/* Detect contiguous ranges of DMA mappings */
547 	sg = window->st->sgl;
548 	for (i = 0; sg; i++) {
549 		dma_addr_t last_da;
550 
551 		window->dma_addr[i] = sg_dma_address(sg);
552 		window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
553 		last_da = sg_dma_address(sg) + sg_dma_len(sg);
554 		while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
555 			window->num_pages[i] +=
556 				(sg_dma_len(sg) >> PAGE_SHIFT);
557 			last_da = window->dma_addr[i] +
558 				sg_dma_len(sg);
559 		}
560 		window->nr_contig_chunks++;
561 	}
562 	return 0;
563 }
564 
565 /**
566  * scif_map_window:
567  * @remote_dev: SCIF remote device
568  * @window: self registration window
569  *
570  * Map pages of a window into the aperture/PCI.
571  * Also determine addresses required for DMA.
572  */
573 int
scif_map_window(struct scif_dev * remote_dev,struct scif_window * window)574 scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
575 {
576 	int i, j, k, err = 0, nr_contig_pages;
577 	scif_pinned_pages_t pin;
578 	phys_addr_t phys_prev, phys_curr;
579 
580 	might_sleep();
581 
582 	pin = window->pinned_pages;
583 
584 	if (intel_iommu_enabled && !scifdev_self(remote_dev))
585 		return scif_iommu_map(remote_dev, window);
586 
587 	for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
588 		phys_prev = page_to_phys(pin->pages[i]);
589 		nr_contig_pages = 1;
590 
591 		/* Detect physically contiguous chunks */
592 		for (k = i + 1; k < window->nr_pages; k++) {
593 			phys_curr = page_to_phys(pin->pages[k]);
594 			if (phys_curr != (phys_prev + PAGE_SIZE))
595 				break;
596 			phys_prev = phys_curr;
597 			nr_contig_pages++;
598 		}
599 		window->num_pages[j] = nr_contig_pages;
600 		window->nr_contig_chunks++;
601 		if (scif_is_mgmt_node()) {
602 			/*
603 			 * Management node has to deal with SMPT on X100 and
604 			 * hence the DMA mapping is required
605 			 */
606 			err = scif_map_single(&window->dma_addr[j],
607 					      phys_to_virt(page_to_phys(
608 							   pin->pages[i])),
609 					      remote_dev,
610 					      nr_contig_pages << PAGE_SHIFT);
611 			if (err)
612 				return err;
613 		} else {
614 			window->dma_addr[j] = page_to_phys(pin->pages[i]);
615 		}
616 	}
617 	return err;
618 }
619 
620 /**
621  * scif_send_scif_unregister:
622  * @ep: end point
623  * @window: self registration window
624  *
625  * Send a SCIF_UNREGISTER message.
626  */
scif_send_scif_unregister(struct scif_endpt * ep,struct scif_window * window)627 static int scif_send_scif_unregister(struct scif_endpt *ep,
628 				     struct scif_window *window)
629 {
630 	struct scifmsg msg;
631 
632 	msg.uop = SCIF_UNREGISTER;
633 	msg.src = ep->port;
634 	msg.payload[0] = window->alloc_handle.vaddr;
635 	msg.payload[1] = (u64)window;
636 	return scif_nodeqp_send(ep->remote_dev, &msg);
637 }
638 
639 /**
640  * scif_unregister_window:
641  * @window: self registration window
642  *
643  * Send an unregistration request and wait for a response.
644  */
scif_unregister_window(struct scif_window * window)645 int scif_unregister_window(struct scif_window *window)
646 {
647 	int err = 0;
648 	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
649 	bool send_msg = false;
650 
651 	might_sleep();
652 	switch (window->unreg_state) {
653 	case OP_IDLE:
654 	{
655 		window->unreg_state = OP_IN_PROGRESS;
656 		send_msg = true;
657 	}
658 		/* fall through */
659 	case OP_IN_PROGRESS:
660 	{
661 		scif_get_window(window, 1);
662 		mutex_unlock(&ep->rma_info.rma_lock);
663 		if (send_msg) {
664 			err = scif_send_scif_unregister(ep, window);
665 			if (err) {
666 				window->unreg_state = OP_COMPLETED;
667 				goto done;
668 			}
669 		} else {
670 			/* Return ENXIO since unregistration is in progress */
671 			mutex_lock(&ep->rma_info.rma_lock);
672 			return -ENXIO;
673 		}
674 retry:
675 		/* Wait for a SCIF_UNREGISTER_(N)ACK message */
676 		err = wait_event_timeout(window->unregwq,
677 					 window->unreg_state != OP_IN_PROGRESS,
678 					 SCIF_NODE_ALIVE_TIMEOUT);
679 		if (!err && scifdev_alive(ep))
680 			goto retry;
681 		if (!err) {
682 			err = -ENODEV;
683 			window->unreg_state = OP_COMPLETED;
684 			dev_err(scif_info.mdev.this_device,
685 				"%s %d err %d\n", __func__, __LINE__, err);
686 		}
687 		if (err > 0)
688 			err = 0;
689 done:
690 		mutex_lock(&ep->rma_info.rma_lock);
691 		scif_put_window(window, 1);
692 		break;
693 	}
694 	case OP_FAILED:
695 	{
696 		if (!scifdev_alive(ep)) {
697 			err = -ENODEV;
698 			window->unreg_state = OP_COMPLETED;
699 		}
700 		break;
701 	}
702 	case OP_COMPLETED:
703 		break;
704 	default:
705 		err = -ENODEV;
706 	}
707 
708 	if (window->unreg_state == OP_COMPLETED && window->ref_count)
709 		scif_put_window(window, window->nr_pages);
710 
711 	if (!window->ref_count) {
712 		atomic_inc(&ep->rma_info.tw_refcount);
713 		list_del_init(&window->list);
714 		scif_free_window_offset(ep, window, window->offset);
715 		mutex_unlock(&ep->rma_info.rma_lock);
716 		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
717 		    scifdev_alive(ep)) {
718 			scif_drain_dma_intr(ep->remote_dev->sdev,
719 					    ep->rma_info.dma_chan);
720 		} else {
721 			if (!__scif_dec_pinned_vm_lock(window->mm,
722 						       window->nr_pages)) {
723 				__scif_release_mm(window->mm);
724 				window->mm = NULL;
725 			}
726 		}
727 		scif_queue_for_cleanup(window, &scif_info.rma);
728 		mutex_lock(&ep->rma_info.rma_lock);
729 	}
730 	return err;
731 }
732 
733 /**
734  * scif_send_alloc_request:
735  * @ep: end point
736  * @window: self registration window
737  *
738  * Send a remote window allocation request
739  */
scif_send_alloc_request(struct scif_endpt * ep,struct scif_window * window)740 static int scif_send_alloc_request(struct scif_endpt *ep,
741 				   struct scif_window *window)
742 {
743 	struct scifmsg msg;
744 	struct scif_allocmsg *alloc = &window->alloc_handle;
745 
746 	/* Set up the Alloc Handle */
747 	alloc->state = OP_IN_PROGRESS;
748 	init_waitqueue_head(&alloc->allocwq);
749 
750 	/* Send out an allocation request */
751 	msg.uop = SCIF_ALLOC_REQ;
752 	msg.payload[1] = window->nr_pages;
753 	msg.payload[2] = (u64)&window->alloc_handle;
754 	return _scif_nodeqp_send(ep->remote_dev, &msg);
755 }
756 
757 /**
758  * scif_prep_remote_window:
759  * @ep: end point
760  * @window: self registration window
761  *
762  * Send a remote window allocation request, wait for an allocation response,
763  * and prepares the remote window by copying over the page lists
764  */
scif_prep_remote_window(struct scif_endpt * ep,struct scif_window * window)765 static int scif_prep_remote_window(struct scif_endpt *ep,
766 				   struct scif_window *window)
767 {
768 	struct scifmsg msg;
769 	struct scif_window *remote_window;
770 	struct scif_allocmsg *alloc = &window->alloc_handle;
771 	dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
772 	int i = 0, j = 0;
773 	int nr_contig_chunks, loop_nr_contig_chunks;
774 	int remaining_nr_contig_chunks, nr_lookup;
775 	int err, map_err;
776 
777 	map_err = scif_map_window(ep->remote_dev, window);
778 	if (map_err)
779 		dev_err(&ep->remote_dev->sdev->dev,
780 			"%s %d map_err %d\n", __func__, __LINE__, map_err);
781 	remaining_nr_contig_chunks = window->nr_contig_chunks;
782 	nr_contig_chunks = window->nr_contig_chunks;
783 retry:
784 	/* Wait for a SCIF_ALLOC_GNT/REJ message */
785 	err = wait_event_timeout(alloc->allocwq,
786 				 alloc->state != OP_IN_PROGRESS,
787 				 SCIF_NODE_ALIVE_TIMEOUT);
788 	mutex_lock(&ep->rma_info.rma_lock);
789 	/* Synchronize with the thread waking up allocwq */
790 	mutex_unlock(&ep->rma_info.rma_lock);
791 	if (!err && scifdev_alive(ep))
792 		goto retry;
793 
794 	if (!err)
795 		err = -ENODEV;
796 
797 	if (err > 0)
798 		err = 0;
799 	else
800 		return err;
801 
802 	/* Bail out. The remote end rejected this request */
803 	if (alloc->state == OP_FAILED)
804 		return -ENOMEM;
805 
806 	if (map_err) {
807 		dev_err(&ep->remote_dev->sdev->dev,
808 			"%s %d err %d\n", __func__, __LINE__, map_err);
809 		msg.uop = SCIF_FREE_VIRT;
810 		msg.src = ep->port;
811 		msg.payload[0] = ep->remote_ep;
812 		msg.payload[1] = window->alloc_handle.vaddr;
813 		msg.payload[2] = (u64)window;
814 		msg.payload[3] = SCIF_REGISTER;
815 		spin_lock(&ep->lock);
816 		if (ep->state == SCIFEP_CONNECTED)
817 			err = _scif_nodeqp_send(ep->remote_dev, &msg);
818 		else
819 			err = -ENOTCONN;
820 		spin_unlock(&ep->lock);
821 		return err;
822 	}
823 
824 	remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
825 				     ep->remote_dev);
826 
827 	/* Compute the number of lookup entries. 21 == 2MB Shift */
828 	nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
829 			  >> ilog2(SCIF_NR_ADDR_IN_PAGE);
830 
831 	dma_phys_lookup =
832 		scif_ioremap(remote_window->dma_addr_lookup.offset,
833 			     nr_lookup *
834 			     sizeof(*remote_window->dma_addr_lookup.lookup),
835 			     ep->remote_dev);
836 	num_pages_lookup =
837 		scif_ioremap(remote_window->num_pages_lookup.offset,
838 			     nr_lookup *
839 			     sizeof(*remote_window->num_pages_lookup.lookup),
840 			     ep->remote_dev);
841 
842 	while (remaining_nr_contig_chunks) {
843 		loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
844 					      (int)SCIF_NR_ADDR_IN_PAGE);
845 		/* #1/2 - Copy  physical addresses over to the remote side */
846 
847 		/* #2/2 - Copy DMA addresses (addresses that are fed into the
848 		 * DMA engine) We transfer bus addresses which are then
849 		 * converted into a MIC physical address on the remote
850 		 * side if it is a MIC, if the remote node is a mgmt node we
851 		 * transfer the MIC physical address
852 		 */
853 		tmp = scif_ioremap(dma_phys_lookup[j],
854 				   loop_nr_contig_chunks *
855 				   sizeof(*window->dma_addr),
856 				   ep->remote_dev);
857 		tmp1 = scif_ioremap(num_pages_lookup[j],
858 				    loop_nr_contig_chunks *
859 				    sizeof(*window->num_pages),
860 				    ep->remote_dev);
861 		if (scif_is_mgmt_node()) {
862 			memcpy_toio((void __force __iomem *)tmp,
863 				    &window->dma_addr[i], loop_nr_contig_chunks
864 				    * sizeof(*window->dma_addr));
865 			memcpy_toio((void __force __iomem *)tmp1,
866 				    &window->num_pages[i], loop_nr_contig_chunks
867 				    * sizeof(*window->num_pages));
868 		} else {
869 			if (scifdev_is_p2p(ep->remote_dev)) {
870 				/*
871 				 * add remote node's base address for this node
872 				 * to convert it into a MIC address
873 				 */
874 				int m;
875 				dma_addr_t dma_addr;
876 
877 				for (m = 0; m < loop_nr_contig_chunks; m++) {
878 					dma_addr = window->dma_addr[i + m] +
879 						ep->remote_dev->base_addr;
880 					writeq(dma_addr,
881 					       (void __force __iomem *)&tmp[m]);
882 				}
883 				memcpy_toio((void __force __iomem *)tmp1,
884 					    &window->num_pages[i],
885 					    loop_nr_contig_chunks
886 					    * sizeof(*window->num_pages));
887 			} else {
888 				/* Mgmt node or loopback - transfer DMA
889 				 * addresses as is, this is the same as a
890 				 * MIC physical address (we use the dma_addr
891 				 * and not the phys_addr array since the
892 				 * phys_addr is only setup if there is a mmap()
893 				 * request from the mgmt node)
894 				 */
895 				memcpy_toio((void __force __iomem *)tmp,
896 					    &window->dma_addr[i],
897 					    loop_nr_contig_chunks *
898 					    sizeof(*window->dma_addr));
899 				memcpy_toio((void __force __iomem *)tmp1,
900 					    &window->num_pages[i],
901 					    loop_nr_contig_chunks *
902 					    sizeof(*window->num_pages));
903 			}
904 		}
905 		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
906 		i += loop_nr_contig_chunks;
907 		j++;
908 		scif_iounmap(tmp, loop_nr_contig_chunks *
909 			     sizeof(*window->dma_addr), ep->remote_dev);
910 		scif_iounmap(tmp1, loop_nr_contig_chunks *
911 			     sizeof(*window->num_pages), ep->remote_dev);
912 	}
913 
914 	/* Prepare the remote window for the peer */
915 	remote_window->peer_window = (u64)window;
916 	remote_window->offset = window->offset;
917 	remote_window->prot = window->prot;
918 	remote_window->nr_contig_chunks = nr_contig_chunks;
919 	remote_window->ep = ep->remote_ep;
920 	scif_iounmap(num_pages_lookup,
921 		     nr_lookup *
922 		     sizeof(*remote_window->num_pages_lookup.lookup),
923 		     ep->remote_dev);
924 	scif_iounmap(dma_phys_lookup,
925 		     nr_lookup *
926 		     sizeof(*remote_window->dma_addr_lookup.lookup),
927 		     ep->remote_dev);
928 	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
929 	window->peer_window = alloc->vaddr;
930 	return err;
931 }
932 
933 /**
934  * scif_send_scif_register:
935  * @ep: end point
936  * @window: self registration window
937  *
938  * Send a SCIF_REGISTER message if EP is connected and wait for a
939  * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
940  * message so that the peer can free its remote window allocated earlier.
941  */
scif_send_scif_register(struct scif_endpt * ep,struct scif_window * window)942 static int scif_send_scif_register(struct scif_endpt *ep,
943 				   struct scif_window *window)
944 {
945 	int err = 0;
946 	struct scifmsg msg;
947 
948 	msg.src = ep->port;
949 	msg.payload[0] = ep->remote_ep;
950 	msg.payload[1] = window->alloc_handle.vaddr;
951 	msg.payload[2] = (u64)window;
952 	spin_lock(&ep->lock);
953 	if (ep->state == SCIFEP_CONNECTED) {
954 		msg.uop = SCIF_REGISTER;
955 		window->reg_state = OP_IN_PROGRESS;
956 		err = _scif_nodeqp_send(ep->remote_dev, &msg);
957 		spin_unlock(&ep->lock);
958 		if (!err) {
959 retry:
960 			/* Wait for a SCIF_REGISTER_(N)ACK message */
961 			err = wait_event_timeout(window->regwq,
962 						 window->reg_state !=
963 						 OP_IN_PROGRESS,
964 						 SCIF_NODE_ALIVE_TIMEOUT);
965 			if (!err && scifdev_alive(ep))
966 				goto retry;
967 			err = !err ? -ENODEV : 0;
968 			if (window->reg_state == OP_FAILED)
969 				err = -ENOTCONN;
970 		}
971 	} else {
972 		msg.uop = SCIF_FREE_VIRT;
973 		msg.payload[3] = SCIF_REGISTER;
974 		err = _scif_nodeqp_send(ep->remote_dev, &msg);
975 		spin_unlock(&ep->lock);
976 		if (!err)
977 			err = -ENOTCONN;
978 	}
979 	return err;
980 }
981 
982 /**
983  * scif_get_window_offset:
984  * @ep: end point descriptor
985  * @flags: flags
986  * @offset: offset hint
987  * @num_pages: number of pages
988  * @out_offset: computed offset returned by reference.
989  *
990  * Compute/Claim a new offset for this EP.
991  */
scif_get_window_offset(struct scif_endpt * ep,int flags,s64 offset,int num_pages,s64 * out_offset)992 int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
993 			   int num_pages, s64 *out_offset)
994 {
995 	s64 page_index;
996 	struct iova *iova_ptr;
997 	int err = 0;
998 
999 	if (flags & SCIF_MAP_FIXED) {
1000 		page_index = SCIF_IOVA_PFN(offset);
1001 		iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1002 					page_index + num_pages - 1);
1003 		if (!iova_ptr)
1004 			err = -EADDRINUSE;
1005 	} else {
1006 		iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1007 				      SCIF_DMA_63BIT_PFN - 1, 0);
1008 		if (!iova_ptr)
1009 			err = -ENOMEM;
1010 	}
1011 	if (!err)
1012 		*out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1013 	return err;
1014 }
1015 
1016 /**
1017  * scif_free_window_offset:
1018  * @ep: end point descriptor
1019  * @window: registration window
1020  * @offset: Offset to be freed
1021  *
1022  * Free offset for this EP. The callee is supposed to grab
1023  * the RMA mutex before calling this API.
1024  */
scif_free_window_offset(struct scif_endpt * ep,struct scif_window * window,s64 offset)1025 void scif_free_window_offset(struct scif_endpt *ep,
1026 			     struct scif_window *window, s64 offset)
1027 {
1028 	if ((window && !window->offset_freed) || !window) {
1029 		free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1030 		if (window)
1031 			window->offset_freed = true;
1032 	}
1033 }
1034 
1035 /**
1036  * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1037  * @msg:        Interrupt message
1038  *
1039  * Remote side is requesting a memory allocation.
1040  */
scif_alloc_req(struct scif_dev * scifdev,struct scifmsg * msg)1041 void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1042 {
1043 	int err;
1044 	struct scif_window *window = NULL;
1045 	int nr_pages = msg->payload[1];
1046 
1047 	window = scif_create_remote_window(scifdev, nr_pages);
1048 	if (!window) {
1049 		err = -ENOMEM;
1050 		goto error;
1051 	}
1052 
1053 	/* The peer's allocation request is granted */
1054 	msg->uop = SCIF_ALLOC_GNT;
1055 	msg->payload[0] = (u64)window;
1056 	msg->payload[1] = window->mapped_offset;
1057 	err = scif_nodeqp_send(scifdev, msg);
1058 	if (err)
1059 		scif_destroy_remote_window(window);
1060 	return;
1061 error:
1062 	/* The peer's allocation request is rejected */
1063 	dev_err(&scifdev->sdev->dev,
1064 		"%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1065 		__func__, __LINE__, err, window, nr_pages);
1066 	msg->uop = SCIF_ALLOC_REJ;
1067 	scif_nodeqp_send(scifdev, msg);
1068 }
1069 
1070 /**
1071  * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1072  * @msg:        Interrupt message
1073  *
1074  * Remote side responded to a memory allocation.
1075  */
scif_alloc_gnt_rej(struct scif_dev * scifdev,struct scifmsg * msg)1076 void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1077 {
1078 	struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1079 	struct scif_window *window = container_of(handle, struct scif_window,
1080 						  alloc_handle);
1081 	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1082 
1083 	mutex_lock(&ep->rma_info.rma_lock);
1084 	handle->vaddr = msg->payload[0];
1085 	handle->phys_addr = msg->payload[1];
1086 	if (msg->uop == SCIF_ALLOC_GNT)
1087 		handle->state = OP_COMPLETED;
1088 	else
1089 		handle->state = OP_FAILED;
1090 	wake_up(&handle->allocwq);
1091 	mutex_unlock(&ep->rma_info.rma_lock);
1092 }
1093 
1094 /**
1095  * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1096  * @msg:        Interrupt message
1097  *
1098  * Free up memory kmalloc'd earlier.
1099  */
scif_free_virt(struct scif_dev * scifdev,struct scifmsg * msg)1100 void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1101 {
1102 	struct scif_window *window = (struct scif_window *)msg->payload[1];
1103 
1104 	scif_destroy_remote_window(window);
1105 }
1106 
1107 static void
scif_fixup_aper_base(struct scif_dev * dev,struct scif_window * window)1108 scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1109 {
1110 	int j;
1111 	struct scif_hw_dev *sdev = dev->sdev;
1112 	phys_addr_t apt_base = 0;
1113 
1114 	/*
1115 	 * Add the aperture base if the DMA address is not card relative
1116 	 * since the DMA addresses need to be an offset into the bar
1117 	 */
1118 	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1119 	    sdev->aper && !sdev->card_rel_da)
1120 		apt_base = sdev->aper->pa;
1121 	else
1122 		return;
1123 
1124 	for (j = 0; j < window->nr_contig_chunks; j++) {
1125 		if (window->num_pages[j])
1126 			window->dma_addr[j] += apt_base;
1127 		else
1128 			break;
1129 	}
1130 }
1131 
1132 /**
1133  * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1134  * @msg:        Interrupt message
1135  *
1136  * Update remote window list with a new registered window.
1137  */
scif_recv_reg(struct scif_dev * scifdev,struct scifmsg * msg)1138 void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1139 {
1140 	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1141 	struct scif_window *window =
1142 		(struct scif_window *)msg->payload[1];
1143 
1144 	mutex_lock(&ep->rma_info.rma_lock);
1145 	spin_lock(&ep->lock);
1146 	if (ep->state == SCIFEP_CONNECTED) {
1147 		msg->uop = SCIF_REGISTER_ACK;
1148 		scif_nodeqp_send(ep->remote_dev, msg);
1149 		scif_fixup_aper_base(ep->remote_dev, window);
1150 		/* No further failures expected. Insert new window */
1151 		scif_insert_window(window, &ep->rma_info.remote_reg_list);
1152 	} else {
1153 		msg->uop = SCIF_REGISTER_NACK;
1154 		scif_nodeqp_send(ep->remote_dev, msg);
1155 	}
1156 	spin_unlock(&ep->lock);
1157 	mutex_unlock(&ep->rma_info.rma_lock);
1158 	/* free up any lookup resources now that page lists are transferred */
1159 	scif_destroy_remote_lookup(ep->remote_dev, window);
1160 	/*
1161 	 * We could not insert the window but we need to
1162 	 * destroy the window.
1163 	 */
1164 	if (msg->uop == SCIF_REGISTER_NACK)
1165 		scif_destroy_remote_window(window);
1166 }
1167 
1168 /**
1169  * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1170  * @msg:        Interrupt message
1171  *
1172  * Remove window from remote registration list;
1173  */
scif_recv_unreg(struct scif_dev * scifdev,struct scifmsg * msg)1174 void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1175 {
1176 	struct scif_rma_req req;
1177 	struct scif_window *window = NULL;
1178 	struct scif_window *recv_window =
1179 		(struct scif_window *)msg->payload[0];
1180 	struct scif_endpt *ep;
1181 	int del_window = 0;
1182 
1183 	ep = (struct scif_endpt *)recv_window->ep;
1184 	req.out_window = &window;
1185 	req.offset = recv_window->offset;
1186 	req.prot = 0;
1187 	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1188 	req.type = SCIF_WINDOW_FULL;
1189 	req.head = &ep->rma_info.remote_reg_list;
1190 	msg->payload[0] = ep->remote_ep;
1191 
1192 	mutex_lock(&ep->rma_info.rma_lock);
1193 	/* Does a valid window exist? */
1194 	if (scif_query_window(&req)) {
1195 		dev_err(&scifdev->sdev->dev,
1196 			"%s %d -ENXIO\n", __func__, __LINE__);
1197 		msg->uop = SCIF_UNREGISTER_ACK;
1198 		goto error;
1199 	}
1200 	if (window) {
1201 		if (window->ref_count)
1202 			scif_put_window(window, window->nr_pages);
1203 		else
1204 			dev_err(&scifdev->sdev->dev,
1205 				"%s %d ref count should be +ve\n",
1206 				__func__, __LINE__);
1207 		window->unreg_state = OP_COMPLETED;
1208 		if (!window->ref_count) {
1209 			msg->uop = SCIF_UNREGISTER_ACK;
1210 			atomic_inc(&ep->rma_info.tw_refcount);
1211 			ep->rma_info.async_list_del = 1;
1212 			list_del_init(&window->list);
1213 			del_window = 1;
1214 		} else {
1215 			/* NACK! There are valid references to this window */
1216 			msg->uop = SCIF_UNREGISTER_NACK;
1217 		}
1218 	} else {
1219 		/* The window did not make its way to the list at all. ACK */
1220 		msg->uop = SCIF_UNREGISTER_ACK;
1221 		scif_destroy_remote_window(recv_window);
1222 	}
1223 error:
1224 	mutex_unlock(&ep->rma_info.rma_lock);
1225 	if (del_window)
1226 		scif_drain_dma_intr(ep->remote_dev->sdev,
1227 				    ep->rma_info.dma_chan);
1228 	scif_nodeqp_send(ep->remote_dev, msg);
1229 	if (del_window)
1230 		scif_queue_for_cleanup(window, &scif_info.rma);
1231 }
1232 
1233 /**
1234  * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1235  * @msg:        Interrupt message
1236  *
1237  * Wake up the window waiting to complete registration.
1238  */
scif_recv_reg_ack(struct scif_dev * scifdev,struct scifmsg * msg)1239 void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1240 {
1241 	struct scif_window *window =
1242 		(struct scif_window *)msg->payload[2];
1243 	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1244 
1245 	mutex_lock(&ep->rma_info.rma_lock);
1246 	window->reg_state = OP_COMPLETED;
1247 	wake_up(&window->regwq);
1248 	mutex_unlock(&ep->rma_info.rma_lock);
1249 }
1250 
1251 /**
1252  * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1253  * @msg:        Interrupt message
1254  *
1255  * Wake up the window waiting to inform it that registration
1256  * cannot be completed.
1257  */
scif_recv_reg_nack(struct scif_dev * scifdev,struct scifmsg * msg)1258 void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1259 {
1260 	struct scif_window *window =
1261 		(struct scif_window *)msg->payload[2];
1262 	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1263 
1264 	mutex_lock(&ep->rma_info.rma_lock);
1265 	window->reg_state = OP_FAILED;
1266 	wake_up(&window->regwq);
1267 	mutex_unlock(&ep->rma_info.rma_lock);
1268 }
1269 
1270 /**
1271  * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1272  * @msg:        Interrupt message
1273  *
1274  * Wake up the window waiting to complete unregistration.
1275  */
scif_recv_unreg_ack(struct scif_dev * scifdev,struct scifmsg * msg)1276 void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1277 {
1278 	struct scif_window *window =
1279 		(struct scif_window *)msg->payload[1];
1280 	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1281 
1282 	mutex_lock(&ep->rma_info.rma_lock);
1283 	window->unreg_state = OP_COMPLETED;
1284 	wake_up(&window->unregwq);
1285 	mutex_unlock(&ep->rma_info.rma_lock);
1286 }
1287 
1288 /**
1289  * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1290  * @msg:        Interrupt message
1291  *
1292  * Wake up the window waiting to inform it that unregistration
1293  * cannot be completed immediately.
1294  */
scif_recv_unreg_nack(struct scif_dev * scifdev,struct scifmsg * msg)1295 void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1296 {
1297 	struct scif_window *window =
1298 		(struct scif_window *)msg->payload[1];
1299 	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1300 
1301 	mutex_lock(&ep->rma_info.rma_lock);
1302 	window->unreg_state = OP_FAILED;
1303 	wake_up(&window->unregwq);
1304 	mutex_unlock(&ep->rma_info.rma_lock);
1305 }
1306 
__scif_pin_pages(void * addr,size_t len,int * out_prot,int map_flags,scif_pinned_pages_t * pages)1307 int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1308 		     int map_flags, scif_pinned_pages_t *pages)
1309 {
1310 	struct scif_pinned_pages *pinned_pages;
1311 	int nr_pages, err = 0, i;
1312 	bool vmalloc_addr = false;
1313 	bool try_upgrade = false;
1314 	int prot = *out_prot;
1315 	int ulimit = 0;
1316 	struct mm_struct *mm = NULL;
1317 
1318 	/* Unsupported flags */
1319 	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1320 		return -EINVAL;
1321 	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1322 
1323 	/* Unsupported protection requested */
1324 	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1325 		return -EINVAL;
1326 
1327 	/* addr/len must be page aligned. len should be non zero */
1328 	if (!len ||
1329 	    (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1330 	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1331 		return -EINVAL;
1332 
1333 	might_sleep();
1334 
1335 	nr_pages = len >> PAGE_SHIFT;
1336 
1337 	/* Allocate a set of pinned pages */
1338 	pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1339 	if (!pinned_pages)
1340 		return -ENOMEM;
1341 
1342 	if (map_flags & SCIF_MAP_KERNEL) {
1343 		if (is_vmalloc_addr(addr))
1344 			vmalloc_addr = true;
1345 
1346 		for (i = 0; i < nr_pages; i++) {
1347 			if (vmalloc_addr)
1348 				pinned_pages->pages[i] =
1349 					vmalloc_to_page(addr + (i * PAGE_SIZE));
1350 			else
1351 				pinned_pages->pages[i] =
1352 					virt_to_page(addr + (i * PAGE_SIZE));
1353 		}
1354 		pinned_pages->nr_pages = nr_pages;
1355 		pinned_pages->map_flags = SCIF_MAP_KERNEL;
1356 	} else {
1357 		/*
1358 		 * SCIF supports registration caching. If a registration has
1359 		 * been requested with read only permissions, then we try
1360 		 * to pin the pages with RW permissions so that a subsequent
1361 		 * transfer with RW permission can hit the cache instead of
1362 		 * invalidating it. If the upgrade fails with RW then we
1363 		 * revert back to R permission and retry
1364 		 */
1365 		if (prot == SCIF_PROT_READ)
1366 			try_upgrade = true;
1367 		prot |= SCIF_PROT_WRITE;
1368 retry:
1369 		mm = current->mm;
1370 		if (ulimit) {
1371 			err = __scif_check_inc_pinned_vm(mm, nr_pages);
1372 			if (err) {
1373 				pinned_pages->nr_pages = 0;
1374 				goto error_unmap;
1375 			}
1376 		}
1377 
1378 		pinned_pages->nr_pages = get_user_pages_fast(
1379 				(u64)addr,
1380 				nr_pages,
1381 				(prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
1382 				pinned_pages->pages);
1383 		if (nr_pages != pinned_pages->nr_pages) {
1384 			if (try_upgrade) {
1385 				if (ulimit)
1386 					__scif_dec_pinned_vm_lock(mm, nr_pages);
1387 				/* Roll back any pinned pages */
1388 				for (i = 0; i < pinned_pages->nr_pages; i++) {
1389 					if (pinned_pages->pages[i])
1390 						put_page(
1391 						pinned_pages->pages[i]);
1392 				}
1393 				prot &= ~SCIF_PROT_WRITE;
1394 				try_upgrade = false;
1395 				goto retry;
1396 			}
1397 		}
1398 		pinned_pages->map_flags = 0;
1399 	}
1400 
1401 	if (pinned_pages->nr_pages < nr_pages) {
1402 		err = -EFAULT;
1403 		pinned_pages->nr_pages = nr_pages;
1404 		goto dec_pinned;
1405 	}
1406 
1407 	*out_prot = prot;
1408 	atomic_set(&pinned_pages->ref_count, 1);
1409 	*pages = pinned_pages;
1410 	return err;
1411 dec_pinned:
1412 	if (ulimit)
1413 		__scif_dec_pinned_vm_lock(mm, nr_pages);
1414 	/* Something went wrong! Rollback */
1415 error_unmap:
1416 	pinned_pages->nr_pages = nr_pages;
1417 	scif_destroy_pinned_pages(pinned_pages);
1418 	*pages = NULL;
1419 	dev_dbg(scif_info.mdev.this_device,
1420 		"%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1421 	return err;
1422 }
1423 
scif_pin_pages(void * addr,size_t len,int prot,int map_flags,scif_pinned_pages_t * pages)1424 int scif_pin_pages(void *addr, size_t len, int prot,
1425 		   int map_flags, scif_pinned_pages_t *pages)
1426 {
1427 	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1428 }
1429 EXPORT_SYMBOL_GPL(scif_pin_pages);
1430 
scif_unpin_pages(scif_pinned_pages_t pinned_pages)1431 int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1432 {
1433 	int err = 0, ret;
1434 
1435 	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1436 		return -EINVAL;
1437 
1438 	ret = atomic_sub_return(1, &pinned_pages->ref_count);
1439 	if (ret < 0) {
1440 		dev_err(scif_info.mdev.this_device,
1441 			"%s %d scif_unpin_pages called without pinning? rc %d\n",
1442 			__func__, __LINE__, ret);
1443 		return -EINVAL;
1444 	}
1445 	/*
1446 	 * Destroy the window if the ref count for this set of pinned
1447 	 * pages has dropped to zero. If it is positive then there is
1448 	 * a valid registered window which is backed by these pages and
1449 	 * it will be destroyed once all such windows are unregistered.
1450 	 */
1451 	if (!ret)
1452 		err = scif_destroy_pinned_pages(pinned_pages);
1453 
1454 	return err;
1455 }
1456 EXPORT_SYMBOL_GPL(scif_unpin_pages);
1457 
1458 static inline void
scif_insert_local_window(struct scif_window * window,struct scif_endpt * ep)1459 scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1460 {
1461 	mutex_lock(&ep->rma_info.rma_lock);
1462 	scif_insert_window(window, &ep->rma_info.reg_list);
1463 	mutex_unlock(&ep->rma_info.rma_lock);
1464 }
1465 
scif_register_pinned_pages(scif_epd_t epd,scif_pinned_pages_t pinned_pages,off_t offset,int map_flags)1466 off_t scif_register_pinned_pages(scif_epd_t epd,
1467 				 scif_pinned_pages_t pinned_pages,
1468 				 off_t offset, int map_flags)
1469 {
1470 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1471 	s64 computed_offset;
1472 	struct scif_window *window;
1473 	int err;
1474 	size_t len;
1475 	struct device *spdev;
1476 
1477 	/* Unsupported flags */
1478 	if (map_flags & ~SCIF_MAP_FIXED)
1479 		return -EINVAL;
1480 
1481 	len = pinned_pages->nr_pages << PAGE_SHIFT;
1482 
1483 	/*
1484 	 * Offset is not page aligned/negative or offset+len
1485 	 * wraps around with SCIF_MAP_FIXED.
1486 	 */
1487 	if ((map_flags & SCIF_MAP_FIXED) &&
1488 	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
1489 	    (offset < 0) ||
1490 	    (len > LONG_MAX - offset)))
1491 		return -EINVAL;
1492 
1493 	might_sleep();
1494 
1495 	err = scif_verify_epd(ep);
1496 	if (err)
1497 		return err;
1498 	/*
1499 	 * It is an error to pass pinned_pages to scif_register_pinned_pages()
1500 	 * after calling scif_unpin_pages().
1501 	 */
1502 	if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1503 		return -EINVAL;
1504 
1505 	/* Compute the offset for this registration */
1506 	err = scif_get_window_offset(ep, map_flags, offset,
1507 				     len, &computed_offset);
1508 	if (err) {
1509 		atomic_sub(1, &pinned_pages->ref_count);
1510 		return err;
1511 	}
1512 
1513 	/* Allocate and prepare self registration window */
1514 	window = scif_create_window(ep, pinned_pages->nr_pages,
1515 				    computed_offset, false);
1516 	if (!window) {
1517 		atomic_sub(1, &pinned_pages->ref_count);
1518 		scif_free_window_offset(ep, NULL, computed_offset);
1519 		return -ENOMEM;
1520 	}
1521 
1522 	window->pinned_pages = pinned_pages;
1523 	window->nr_pages = pinned_pages->nr_pages;
1524 	window->prot = pinned_pages->prot;
1525 
1526 	spdev = scif_get_peer_dev(ep->remote_dev);
1527 	if (IS_ERR(spdev)) {
1528 		err = PTR_ERR(spdev);
1529 		scif_destroy_window(ep, window);
1530 		return err;
1531 	}
1532 	err = scif_send_alloc_request(ep, window);
1533 	if (err) {
1534 		dev_err(&ep->remote_dev->sdev->dev,
1535 			"%s %d err %d\n", __func__, __LINE__, err);
1536 		goto error_unmap;
1537 	}
1538 
1539 	/* Prepare the remote registration window */
1540 	err = scif_prep_remote_window(ep, window);
1541 	if (err) {
1542 		dev_err(&ep->remote_dev->sdev->dev,
1543 			"%s %d err %d\n", __func__, __LINE__, err);
1544 		goto error_unmap;
1545 	}
1546 
1547 	/* Tell the peer about the new window */
1548 	err = scif_send_scif_register(ep, window);
1549 	if (err) {
1550 		dev_err(&ep->remote_dev->sdev->dev,
1551 			"%s %d err %d\n", __func__, __LINE__, err);
1552 		goto error_unmap;
1553 	}
1554 
1555 	scif_put_peer_dev(spdev);
1556 	/* No further failures expected. Insert new window */
1557 	scif_insert_local_window(window, ep);
1558 	return computed_offset;
1559 error_unmap:
1560 	scif_destroy_window(ep, window);
1561 	scif_put_peer_dev(spdev);
1562 	dev_err(&ep->remote_dev->sdev->dev,
1563 		"%s %d err %d\n", __func__, __LINE__, err);
1564 	return err;
1565 }
1566 EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1567 
scif_register(scif_epd_t epd,void * addr,size_t len,off_t offset,int prot,int map_flags)1568 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1569 		    int prot, int map_flags)
1570 {
1571 	scif_pinned_pages_t pinned_pages;
1572 	off_t err;
1573 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1574 	s64 computed_offset;
1575 	struct scif_window *window;
1576 	struct mm_struct *mm = NULL;
1577 	struct device *spdev;
1578 
1579 	dev_dbg(scif_info.mdev.this_device,
1580 		"SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1581 		epd, addr, len, offset, prot, map_flags);
1582 	/* Unsupported flags */
1583 	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1584 		return -EINVAL;
1585 
1586 	/*
1587 	 * Offset is not page aligned/negative or offset+len
1588 	 * wraps around with SCIF_MAP_FIXED.
1589 	 */
1590 	if ((map_flags & SCIF_MAP_FIXED) &&
1591 	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
1592 	    (offset < 0) ||
1593 	    (len > LONG_MAX - offset)))
1594 		return -EINVAL;
1595 
1596 	/* Unsupported protection requested */
1597 	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1598 		return -EINVAL;
1599 
1600 	/* addr/len must be page aligned. len should be non zero */
1601 	if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1602 	    (ALIGN(len, PAGE_SIZE) != len))
1603 		return -EINVAL;
1604 
1605 	might_sleep();
1606 
1607 	err = scif_verify_epd(ep);
1608 	if (err)
1609 		return err;
1610 
1611 	/* Compute the offset for this registration */
1612 	err = scif_get_window_offset(ep, map_flags, offset,
1613 				     len >> PAGE_SHIFT, &computed_offset);
1614 	if (err)
1615 		return err;
1616 
1617 	spdev = scif_get_peer_dev(ep->remote_dev);
1618 	if (IS_ERR(spdev)) {
1619 		err = PTR_ERR(spdev);
1620 		scif_free_window_offset(ep, NULL, computed_offset);
1621 		return err;
1622 	}
1623 	/* Allocate and prepare self registration window */
1624 	window = scif_create_window(ep, len >> PAGE_SHIFT,
1625 				    computed_offset, false);
1626 	if (!window) {
1627 		scif_free_window_offset(ep, NULL, computed_offset);
1628 		scif_put_peer_dev(spdev);
1629 		return -ENOMEM;
1630 	}
1631 
1632 	window->nr_pages = len >> PAGE_SHIFT;
1633 
1634 	err = scif_send_alloc_request(ep, window);
1635 	if (err) {
1636 		scif_destroy_incomplete_window(ep, window);
1637 		scif_put_peer_dev(spdev);
1638 		return err;
1639 	}
1640 
1641 	if (!(map_flags & SCIF_MAP_KERNEL)) {
1642 		mm = __scif_acquire_mm();
1643 		map_flags |= SCIF_MAP_ULIMIT;
1644 	}
1645 	/* Pin down the pages */
1646 	err = __scif_pin_pages(addr, len, &prot,
1647 			       map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1648 			       &pinned_pages);
1649 	if (err) {
1650 		scif_destroy_incomplete_window(ep, window);
1651 		__scif_release_mm(mm);
1652 		goto error;
1653 	}
1654 
1655 	window->pinned_pages = pinned_pages;
1656 	window->prot = pinned_pages->prot;
1657 	window->mm = mm;
1658 
1659 	/* Prepare the remote registration window */
1660 	err = scif_prep_remote_window(ep, window);
1661 	if (err) {
1662 		dev_err(&ep->remote_dev->sdev->dev,
1663 			"%s %d err %ld\n", __func__, __LINE__, err);
1664 		goto error_unmap;
1665 	}
1666 
1667 	/* Tell the peer about the new window */
1668 	err = scif_send_scif_register(ep, window);
1669 	if (err) {
1670 		dev_err(&ep->remote_dev->sdev->dev,
1671 			"%s %d err %ld\n", __func__, __LINE__, err);
1672 		goto error_unmap;
1673 	}
1674 
1675 	scif_put_peer_dev(spdev);
1676 	/* No further failures expected. Insert new window */
1677 	scif_insert_local_window(window, ep);
1678 	dev_dbg(&ep->remote_dev->sdev->dev,
1679 		"SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1680 		epd, addr, len, computed_offset);
1681 	return computed_offset;
1682 error_unmap:
1683 	scif_destroy_window(ep, window);
1684 error:
1685 	scif_put_peer_dev(spdev);
1686 	dev_err(&ep->remote_dev->sdev->dev,
1687 		"%s %d err %ld\n", __func__, __LINE__, err);
1688 	return err;
1689 }
1690 EXPORT_SYMBOL_GPL(scif_register);
1691 
1692 int
scif_unregister(scif_epd_t epd,off_t offset,size_t len)1693 scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1694 {
1695 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1696 	struct scif_window *window = NULL;
1697 	struct scif_rma_req req;
1698 	int nr_pages, err;
1699 	struct device *spdev;
1700 
1701 	dev_dbg(scif_info.mdev.this_device,
1702 		"SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1703 		ep, offset, len);
1704 	/* len must be page aligned. len should be non zero */
1705 	if (!len ||
1706 	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1707 		return -EINVAL;
1708 
1709 	/* Offset is not page aligned or offset+len wraps around */
1710 	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1711 	    (offset < 0) ||
1712 	    (len > LONG_MAX - offset))
1713 		return -EINVAL;
1714 
1715 	err = scif_verify_epd(ep);
1716 	if (err)
1717 		return err;
1718 
1719 	might_sleep();
1720 	nr_pages = len >> PAGE_SHIFT;
1721 
1722 	req.out_window = &window;
1723 	req.offset = offset;
1724 	req.prot = 0;
1725 	req.nr_bytes = len;
1726 	req.type = SCIF_WINDOW_FULL;
1727 	req.head = &ep->rma_info.reg_list;
1728 
1729 	spdev = scif_get_peer_dev(ep->remote_dev);
1730 	if (IS_ERR(spdev)) {
1731 		err = PTR_ERR(spdev);
1732 		return err;
1733 	}
1734 	mutex_lock(&ep->rma_info.rma_lock);
1735 	/* Does a valid window exist? */
1736 	err = scif_query_window(&req);
1737 	if (err) {
1738 		dev_err(&ep->remote_dev->sdev->dev,
1739 			"%s %d err %d\n", __func__, __LINE__, err);
1740 		goto error;
1741 	}
1742 	/* Unregister all the windows in this range */
1743 	err = scif_rma_list_unregister(window, offset, nr_pages);
1744 	if (err)
1745 		dev_err(&ep->remote_dev->sdev->dev,
1746 			"%s %d err %d\n", __func__, __LINE__, err);
1747 error:
1748 	mutex_unlock(&ep->rma_info.rma_lock);
1749 	scif_put_peer_dev(spdev);
1750 	return err;
1751 }
1752 EXPORT_SYMBOL_GPL(scif_unregister);
1753