1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2015 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * Intel SCIF driver.
16  *
17  */
18 #include "scif_main.h"
19 
20 /*
21  * struct scif_vma_info - Information about a remote memory mapping
22  *			  created via scif_mmap(..)
23  * @vma: VM area struct
24  * @list: link to list of active vmas
25  */
26 struct scif_vma_info {
27 	struct vm_area_struct *vma;
28 	struct list_head list;
29 };
30 
scif_recv_munmap(struct scif_dev * scifdev,struct scifmsg * msg)31 void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
32 {
33 	struct scif_rma_req req;
34 	struct scif_window *window = NULL;
35 	struct scif_window *recv_window =
36 		(struct scif_window *)msg->payload[0];
37 	struct scif_endpt *ep;
38 
39 	ep = (struct scif_endpt *)recv_window->ep;
40 	req.out_window = &window;
41 	req.offset = recv_window->offset;
42 	req.prot = recv_window->prot;
43 	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
44 	req.type = SCIF_WINDOW_FULL;
45 	req.head = &ep->rma_info.reg_list;
46 	msg->payload[0] = ep->remote_ep;
47 
48 	mutex_lock(&ep->rma_info.rma_lock);
49 	/* Does a valid window exist? */
50 	if (scif_query_window(&req)) {
51 		dev_err(&scifdev->sdev->dev,
52 			"%s %d -ENXIO\n", __func__, __LINE__);
53 		msg->uop = SCIF_UNREGISTER_ACK;
54 		goto error;
55 	}
56 
57 	scif_put_window(window, window->nr_pages);
58 
59 	if (!window->ref_count) {
60 		atomic_inc(&ep->rma_info.tw_refcount);
61 		ep->rma_info.async_list_del = 1;
62 		list_del_init(&window->list);
63 		scif_free_window_offset(ep, window, window->offset);
64 	}
65 error:
66 	mutex_unlock(&ep->rma_info.rma_lock);
67 	if (window && !window->ref_count)
68 		scif_queue_for_cleanup(window, &scif_info.rma);
69 }
70 
71 /*
72  * Remove valid remote memory mappings created via scif_mmap(..) from the
73  * process address space since the remote node is lost
74  */
__scif_zap_mmaps(struct scif_endpt * ep)75 static void __scif_zap_mmaps(struct scif_endpt *ep)
76 {
77 	struct list_head *item;
78 	struct scif_vma_info *info;
79 	struct vm_area_struct *vma;
80 	unsigned long size;
81 
82 	spin_lock(&ep->lock);
83 	list_for_each(item, &ep->rma_info.vma_list) {
84 		info = list_entry(item, struct scif_vma_info, list);
85 		vma = info->vma;
86 		size = vma->vm_end - vma->vm_start;
87 		zap_vma_ptes(vma, vma->vm_start, size);
88 		dev_dbg(scif_info.mdev.this_device,
89 			"%s ep %p zap vma %p size 0x%lx\n",
90 			__func__, ep, info->vma, size);
91 	}
92 	spin_unlock(&ep->lock);
93 }
94 
95 /*
96  * Traverse the list of endpoints for a particular remote node and
97  * zap valid remote memory mappings since the remote node is lost
98  */
_scif_zap_mmaps(int node,struct list_head * head)99 static void _scif_zap_mmaps(int node, struct list_head *head)
100 {
101 	struct scif_endpt *ep;
102 	struct list_head *item;
103 
104 	mutex_lock(&scif_info.connlock);
105 	list_for_each(item, head) {
106 		ep = list_entry(item, struct scif_endpt, list);
107 		if (ep->remote_dev->node == node)
108 			__scif_zap_mmaps(ep);
109 	}
110 	mutex_unlock(&scif_info.connlock);
111 }
112 
113 /*
114  * Wrapper for removing remote memory mappings for a particular node. This API
115  * is called by peer nodes as part of handling a lost node.
116  */
scif_zap_mmaps(int node)117 void scif_zap_mmaps(int node)
118 {
119 	_scif_zap_mmaps(node, &scif_info.connected);
120 	_scif_zap_mmaps(node, &scif_info.disconnected);
121 }
122 
123 /*
124  * This API is only called while handling a lost node:
125  * a) Remote node is dead.
126  * b) Remote memory mappings have been zapped
127  * So we can traverse the remote_reg_list without any locks. Since
128  * the window has not yet been unregistered we can drop the ref count
129  * and queue it to the cleanup thread.
130  */
__scif_cleanup_rma_for_zombies(struct scif_endpt * ep)131 static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
132 {
133 	struct list_head *pos, *tmp;
134 	struct scif_window *window;
135 
136 	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
137 		window = list_entry(pos, struct scif_window, list);
138 		if (window->ref_count)
139 			scif_put_window(window, window->nr_pages);
140 		else
141 			dev_err(scif_info.mdev.this_device,
142 				"%s %d unexpected\n",
143 				__func__, __LINE__);
144 		if (!window->ref_count) {
145 			atomic_inc(&ep->rma_info.tw_refcount);
146 			list_del_init(&window->list);
147 			scif_queue_for_cleanup(window, &scif_info.rma);
148 		}
149 	}
150 }
151 
152 /* Cleanup remote registration lists for zombie endpoints */
scif_cleanup_rma_for_zombies(int node)153 void scif_cleanup_rma_for_zombies(int node)
154 {
155 	struct scif_endpt *ep;
156 	struct list_head *item;
157 
158 	mutex_lock(&scif_info.eplock);
159 	list_for_each(item, &scif_info.zombie) {
160 		ep = list_entry(item, struct scif_endpt, list);
161 		if (ep->remote_dev && ep->remote_dev->node == node)
162 			__scif_cleanup_rma_for_zombies(ep);
163 	}
164 	mutex_unlock(&scif_info.eplock);
165 	flush_work(&scif_info.misc_work);
166 }
167 
168 /* Insert the VMA into the per endpoint VMA list */
scif_insert_vma(struct scif_endpt * ep,struct vm_area_struct * vma)169 static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
170 {
171 	struct scif_vma_info *info;
172 	int err = 0;
173 
174 	info = kzalloc(sizeof(*info), GFP_KERNEL);
175 	if (!info) {
176 		err = -ENOMEM;
177 		goto done;
178 	}
179 	info->vma = vma;
180 	spin_lock(&ep->lock);
181 	list_add_tail(&info->list, &ep->rma_info.vma_list);
182 	spin_unlock(&ep->lock);
183 done:
184 	return err;
185 }
186 
187 /* Delete the VMA from the per endpoint VMA list */
scif_delete_vma(struct scif_endpt * ep,struct vm_area_struct * vma)188 static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
189 {
190 	struct list_head *item;
191 	struct scif_vma_info *info;
192 
193 	spin_lock(&ep->lock);
194 	list_for_each(item, &ep->rma_info.vma_list) {
195 		info = list_entry(item, struct scif_vma_info, list);
196 		if (info->vma == vma) {
197 			list_del(&info->list);
198 			kfree(info);
199 			break;
200 		}
201 	}
202 	spin_unlock(&ep->lock);
203 }
204 
scif_get_phys(phys_addr_t phys,struct scif_endpt * ep)205 static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
206 {
207 	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
208 	struct scif_hw_dev *sdev = scifdev->sdev;
209 	phys_addr_t out_phys, apt_base = 0;
210 
211 	/*
212 	 * If the DMA address is card relative then we need to add the
213 	 * aperture base for mmap to work correctly
214 	 */
215 	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
216 		apt_base = sdev->aper->pa;
217 	out_phys = apt_base + phys;
218 	return out_phys;
219 }
220 
scif_get_pages(scif_epd_t epd,off_t offset,size_t len,struct scif_range ** pages)221 int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
222 		   struct scif_range **pages)
223 {
224 	struct scif_endpt *ep = (struct scif_endpt *)epd;
225 	struct scif_rma_req req;
226 	struct scif_window *window = NULL;
227 	int nr_pages, err, i;
228 
229 	dev_dbg(scif_info.mdev.this_device,
230 		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
231 		ep, offset, len);
232 	err = scif_verify_epd(ep);
233 	if (err)
234 		return err;
235 
236 	if (!len || (offset < 0) ||
237 	    (offset + len < offset) ||
238 	    (ALIGN(offset, PAGE_SIZE) != offset) ||
239 	    (ALIGN(len, PAGE_SIZE) != len))
240 		return -EINVAL;
241 
242 	nr_pages = len >> PAGE_SHIFT;
243 
244 	req.out_window = &window;
245 	req.offset = offset;
246 	req.prot = 0;
247 	req.nr_bytes = len;
248 	req.type = SCIF_WINDOW_SINGLE;
249 	req.head = &ep->rma_info.remote_reg_list;
250 
251 	mutex_lock(&ep->rma_info.rma_lock);
252 	/* Does a valid window exist? */
253 	err = scif_query_window(&req);
254 	if (err) {
255 		dev_err(&ep->remote_dev->sdev->dev,
256 			"%s %d err %d\n", __func__, __LINE__, err);
257 		goto error;
258 	}
259 
260 	/* Allocate scif_range */
261 	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
262 	if (!*pages) {
263 		err = -ENOMEM;
264 		goto error;
265 	}
266 
267 	/* Allocate phys addr array */
268 	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
269 	if (!((*pages)->phys_addr)) {
270 		err = -ENOMEM;
271 		goto error;
272 	}
273 
274 	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
275 		/* Allocate virtual address array */
276 		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
277 		if (!(*pages)->va) {
278 			err = -ENOMEM;
279 			goto error;
280 		}
281 	}
282 	/* Populate the values */
283 	(*pages)->cookie = window;
284 	(*pages)->nr_pages = nr_pages;
285 	(*pages)->prot_flags = window->prot;
286 
287 	for (i = 0; i < nr_pages; i++) {
288 		(*pages)->phys_addr[i] =
289 			__scif_off_to_dma_addr(window, offset +
290 					       (i * PAGE_SIZE));
291 		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
292 							ep);
293 		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
294 			(*pages)->va[i] =
295 				ep->remote_dev->sdev->aper->va +
296 				(*pages)->phys_addr[i] -
297 				ep->remote_dev->sdev->aper->pa;
298 	}
299 
300 	scif_get_window(window, nr_pages);
301 error:
302 	mutex_unlock(&ep->rma_info.rma_lock);
303 	if (err) {
304 		if (*pages) {
305 			scif_free((*pages)->phys_addr,
306 				  nr_pages * sizeof(dma_addr_t));
307 			scif_free((*pages)->va,
308 				  nr_pages * sizeof(void *));
309 			kfree(*pages);
310 			*pages = NULL;
311 		}
312 		dev_err(&ep->remote_dev->sdev->dev,
313 			"%s %d err %d\n", __func__, __LINE__, err);
314 	}
315 	return err;
316 }
317 EXPORT_SYMBOL_GPL(scif_get_pages);
318 
scif_put_pages(struct scif_range * pages)319 int scif_put_pages(struct scif_range *pages)
320 {
321 	struct scif_endpt *ep;
322 	struct scif_window *window;
323 	struct scifmsg msg;
324 
325 	if (!pages || !pages->cookie)
326 		return -EINVAL;
327 
328 	window = pages->cookie;
329 
330 	if (!window || window->magic != SCIFEP_MAGIC)
331 		return -EINVAL;
332 
333 	ep = (struct scif_endpt *)window->ep;
334 	/*
335 	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
336 	 * callee should be allowed to release references to the pages,
337 	 * else the endpoint was not connected in the first place,
338 	 * hence the ENOTCONN.
339 	 */
340 	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
341 		return -ENOTCONN;
342 
343 	mutex_lock(&ep->rma_info.rma_lock);
344 
345 	scif_put_window(window, pages->nr_pages);
346 
347 	/* Initiate window destruction if ref count is zero */
348 	if (!window->ref_count) {
349 		list_del(&window->list);
350 		mutex_unlock(&ep->rma_info.rma_lock);
351 		scif_drain_dma_intr(ep->remote_dev->sdev,
352 				    ep->rma_info.dma_chan);
353 		/* Inform the peer about this window being destroyed. */
354 		msg.uop = SCIF_MUNMAP;
355 		msg.src = ep->port;
356 		msg.payload[0] = window->peer_window;
357 		/* No error handling for notification messages */
358 		scif_nodeqp_send(ep->remote_dev, &msg);
359 		/* Destroy this window from the peer's registered AS */
360 		scif_destroy_remote_window(window);
361 	} else {
362 		mutex_unlock(&ep->rma_info.rma_lock);
363 	}
364 
365 	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
366 	scif_free(pages->va, pages->nr_pages * sizeof(void *));
367 	kfree(pages);
368 	return 0;
369 }
370 EXPORT_SYMBOL_GPL(scif_put_pages);
371 
372 /*
373  * scif_rma_list_mmap:
374  *
375  * Traverse the remote registration list starting from start_window:
376  * 1) Create VtoP mappings via remap_pfn_range(..)
377  * 2) Once step 1) and 2) complete successfully then traverse the range of
378  *    windows again and bump the reference count.
379  * RMA lock must be held.
380  */
scif_rma_list_mmap(struct scif_window * start_window,s64 offset,int nr_pages,struct vm_area_struct * vma)381 static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
382 			      int nr_pages, struct vm_area_struct *vma)
383 {
384 	s64 end_offset, loop_offset = offset;
385 	struct scif_window *window = start_window;
386 	int loop_nr_pages, nr_pages_left = nr_pages;
387 	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
388 	struct list_head *head = &ep->rma_info.remote_reg_list;
389 	int i, err = 0;
390 	dma_addr_t phys_addr;
391 	struct scif_window_iter src_win_iter;
392 	size_t contig_bytes = 0;
393 
394 	might_sleep();
395 	list_for_each_entry_from(window, head, list) {
396 		end_offset = window->offset +
397 			(window->nr_pages << PAGE_SHIFT);
398 		loop_nr_pages = min_t(int,
399 				      (end_offset - loop_offset) >> PAGE_SHIFT,
400 				      nr_pages_left);
401 		scif_init_window_iter(window, &src_win_iter);
402 		for (i = 0; i < loop_nr_pages; i++) {
403 			phys_addr = scif_off_to_dma_addr(window, loop_offset,
404 							 &contig_bytes,
405 							 &src_win_iter);
406 			phys_addr = scif_get_phys(phys_addr, ep);
407 			err = remap_pfn_range(vma,
408 					      vma->vm_start +
409 					      loop_offset - offset,
410 					      phys_addr >> PAGE_SHIFT,
411 					      PAGE_SIZE,
412 					      vma->vm_page_prot);
413 			if (err)
414 				goto error;
415 			loop_offset += PAGE_SIZE;
416 		}
417 		nr_pages_left -= loop_nr_pages;
418 		if (!nr_pages_left)
419 			break;
420 	}
421 	/*
422 	 * No more failures expected. Bump up the ref count for all
423 	 * the windows. Another traversal from start_window required
424 	 * for handling errors encountered across windows during
425 	 * remap_pfn_range(..).
426 	 */
427 	loop_offset = offset;
428 	nr_pages_left = nr_pages;
429 	window = start_window;
430 	head = &ep->rma_info.remote_reg_list;
431 	list_for_each_entry_from(window, head, list) {
432 		end_offset = window->offset +
433 			(window->nr_pages << PAGE_SHIFT);
434 		loop_nr_pages = min_t(int,
435 				      (end_offset - loop_offset) >> PAGE_SHIFT,
436 				      nr_pages_left);
437 		scif_get_window(window, loop_nr_pages);
438 		nr_pages_left -= loop_nr_pages;
439 		loop_offset += (loop_nr_pages << PAGE_SHIFT);
440 		if (!nr_pages_left)
441 			break;
442 	}
443 error:
444 	if (err)
445 		dev_err(scif_info.mdev.this_device,
446 			"%s %d err %d\n", __func__, __LINE__, err);
447 	return err;
448 }
449 
450 /*
451  * scif_rma_list_munmap:
452  *
453  * Traverse the remote registration list starting from window:
454  * 1) Decrement ref count.
455  * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
456  * RMA lock must be held.
457  */
scif_rma_list_munmap(struct scif_window * start_window,s64 offset,int nr_pages)458 static void scif_rma_list_munmap(struct scif_window *start_window,
459 				 s64 offset, int nr_pages)
460 {
461 	struct scifmsg msg;
462 	s64 loop_offset = offset, end_offset;
463 	int loop_nr_pages, nr_pages_left = nr_pages;
464 	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
465 	struct list_head *head = &ep->rma_info.remote_reg_list;
466 	struct scif_window *window = start_window, *_window;
467 
468 	msg.uop = SCIF_MUNMAP;
469 	msg.src = ep->port;
470 	loop_offset = offset;
471 	nr_pages_left = nr_pages;
472 	list_for_each_entry_safe_from(window, _window, head, list) {
473 		end_offset = window->offset +
474 			(window->nr_pages << PAGE_SHIFT);
475 		loop_nr_pages = min_t(int,
476 				      (end_offset - loop_offset) >> PAGE_SHIFT,
477 				      nr_pages_left);
478 		scif_put_window(window, loop_nr_pages);
479 		if (!window->ref_count) {
480 			struct scif_dev *rdev = ep->remote_dev;
481 
482 			scif_drain_dma_intr(rdev->sdev,
483 					    ep->rma_info.dma_chan);
484 			/* Inform the peer about this munmap */
485 			msg.payload[0] = window->peer_window;
486 			/* No error handling for Notification messages. */
487 			scif_nodeqp_send(ep->remote_dev, &msg);
488 			list_del(&window->list);
489 			/* Destroy this window from the peer's registered AS */
490 			scif_destroy_remote_window(window);
491 		}
492 		nr_pages_left -= loop_nr_pages;
493 		loop_offset += (loop_nr_pages << PAGE_SHIFT);
494 		if (!nr_pages_left)
495 			break;
496 	}
497 }
498 
499 /*
500  * The private data field of each VMA used to mmap a remote window
501  * points to an instance of struct vma_pvt
502  */
503 struct vma_pvt {
504 	struct scif_endpt *ep;	/* End point for remote window */
505 	s64 offset;		/* offset within remote window */
506 	bool valid_offset;	/* offset is valid only if the original
507 				 * mmap request was for a single page
508 				 * else the offset within the vma is
509 				 * the correct offset
510 				 */
511 	struct kref ref;
512 };
513 
vma_pvt_release(struct kref * ref)514 static void vma_pvt_release(struct kref *ref)
515 {
516 	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
517 
518 	kfree(vmapvt);
519 }
520 
521 /**
522  * scif_vma_open - VMA open driver callback
523  * @vma: VMM memory area.
524  * The open method is called by the kernel to allow the subsystem implementing
525  * the VMA to initialize the area. This method is invoked any time a new
526  * reference to the VMA is made (when a process forks, for example).
527  * The one exception happens when the VMA is first created by mmap;
528  * in this case, the driver's mmap method is called instead.
529  * This function is also invoked when an existing VMA is split by the kernel
530  * due to a call to munmap on a subset of the VMA resulting in two VMAs.
531  * The kernel invokes this function only on one of the two VMAs.
532  */
scif_vma_open(struct vm_area_struct * vma)533 static void scif_vma_open(struct vm_area_struct *vma)
534 {
535 	struct vma_pvt *vmapvt = vma->vm_private_data;
536 
537 	dev_dbg(scif_info.mdev.this_device,
538 		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
539 		vma->vm_start, vma->vm_end);
540 	scif_insert_vma(vmapvt->ep, vma);
541 	kref_get(&vmapvt->ref);
542 }
543 
544 /**
545  * scif_munmap - VMA close driver callback.
546  * @vma: VMM memory area.
547  * When an area is destroyed, the kernel calls its close operation.
548  * Note that there's no usage count associated with VMA's; the area
549  * is opened and closed exactly once by each process that uses it.
550  */
scif_munmap(struct vm_area_struct * vma)551 static void scif_munmap(struct vm_area_struct *vma)
552 {
553 	struct scif_endpt *ep;
554 	struct vma_pvt *vmapvt = vma->vm_private_data;
555 	int nr_pages = vma_pages(vma);
556 	s64 offset;
557 	struct scif_rma_req req;
558 	struct scif_window *window = NULL;
559 	int err;
560 
561 	might_sleep();
562 	dev_dbg(scif_info.mdev.this_device,
563 		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
564 		vma->vm_start, vma->vm_end);
565 	ep = vmapvt->ep;
566 	offset = vmapvt->valid_offset ? vmapvt->offset :
567 		(vma->vm_pgoff) << PAGE_SHIFT;
568 	dev_dbg(scif_info.mdev.this_device,
569 		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
570 		ep, nr_pages, offset);
571 	req.out_window = &window;
572 	req.offset = offset;
573 	req.nr_bytes = vma->vm_end - vma->vm_start;
574 	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
575 	req.type = SCIF_WINDOW_PARTIAL;
576 	req.head = &ep->rma_info.remote_reg_list;
577 
578 	mutex_lock(&ep->rma_info.rma_lock);
579 
580 	err = scif_query_window(&req);
581 	if (err)
582 		dev_err(scif_info.mdev.this_device,
583 			"%s %d err %d\n", __func__, __LINE__, err);
584 	else
585 		scif_rma_list_munmap(window, offset, nr_pages);
586 
587 	mutex_unlock(&ep->rma_info.rma_lock);
588 	/*
589 	 * The kernel probably zeroes these out but we still want
590 	 * to clean up our own mess just in case.
591 	 */
592 	vma->vm_ops = NULL;
593 	vma->vm_private_data = NULL;
594 	kref_put(&vmapvt->ref, vma_pvt_release);
595 	scif_delete_vma(ep, vma);
596 }
597 
598 static const struct vm_operations_struct scif_vm_ops = {
599 	.open = scif_vma_open,
600 	.close = scif_munmap,
601 };
602 
603 /**
604  * scif_mmap - Map pages in virtual address space to a remote window.
605  * @vma: VMM memory area.
606  * @epd: endpoint descriptor
607  *
608  * Return: Upon successful completion, scif_mmap() returns zero
609  * else an apt error is returned as documented in scif.h
610  */
scif_mmap(struct vm_area_struct * vma,scif_epd_t epd)611 int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
612 {
613 	struct scif_rma_req req;
614 	struct scif_window *window = NULL;
615 	struct scif_endpt *ep = (struct scif_endpt *)epd;
616 	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
617 	int nr_pages = vma_pages(vma);
618 	int err;
619 	struct vma_pvt *vmapvt;
620 
621 	dev_dbg(scif_info.mdev.this_device,
622 		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
623 		ep, start_offset, nr_pages);
624 	err = scif_verify_epd(ep);
625 	if (err)
626 		return err;
627 
628 	might_sleep();
629 
630 	err = scif_insert_vma(ep, vma);
631 	if (err)
632 		return err;
633 
634 	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
635 	if (!vmapvt) {
636 		scif_delete_vma(ep, vma);
637 		return -ENOMEM;
638 	}
639 
640 	vmapvt->ep = ep;
641 	kref_init(&vmapvt->ref);
642 
643 	req.out_window = &window;
644 	req.offset = start_offset;
645 	req.nr_bytes = vma->vm_end - vma->vm_start;
646 	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
647 	req.type = SCIF_WINDOW_PARTIAL;
648 	req.head = &ep->rma_info.remote_reg_list;
649 
650 	mutex_lock(&ep->rma_info.rma_lock);
651 	/* Does a valid window exist? */
652 	err = scif_query_window(&req);
653 	if (err) {
654 		dev_err(&ep->remote_dev->sdev->dev,
655 			"%s %d err %d\n", __func__, __LINE__, err);
656 		goto error_unlock;
657 	}
658 
659 	/* Default prot for loopback */
660 	if (!scifdev_self(ep->remote_dev))
661 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
662 
663 	/*
664 	 * VM_DONTCOPY - Do not copy this vma on fork
665 	 * VM_DONTEXPAND - Cannot expand with mremap()
666 	 * VM_RESERVED - Count as reserved_vm like IO
667 	 * VM_PFNMAP - Page-ranges managed without "struct page"
668 	 * VM_IO - Memory mapped I/O or similar
669 	 *
670 	 * We do not want to copy this VMA automatically on a fork(),
671 	 * expand this VMA due to mremap() or swap out these pages since
672 	 * the VMA is actually backed by physical pages in the remote
673 	 * node's physical memory and not via a struct page.
674 	 */
675 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
676 
677 	if (!scifdev_self(ep->remote_dev))
678 		vma->vm_flags |= VM_IO | VM_PFNMAP;
679 
680 	/* Map this range of windows */
681 	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
682 	if (err) {
683 		dev_err(&ep->remote_dev->sdev->dev,
684 			"%s %d err %d\n", __func__, __LINE__, err);
685 		goto error_unlock;
686 	}
687 	/* Set up the driver call back */
688 	vma->vm_ops = &scif_vm_ops;
689 	vma->vm_private_data = vmapvt;
690 error_unlock:
691 	mutex_unlock(&ep->rma_info.rma_lock);
692 	if (err) {
693 		kfree(vmapvt);
694 		dev_err(&ep->remote_dev->sdev->dev,
695 			"%s %d err %d\n", __func__, __LINE__, err);
696 		scif_delete_vma(ep, vma);
697 	}
698 	return err;
699 }
700