1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7 #include "rxe.h"
8 #include "rxe_loc.h"
9
10 /* Return a random 8 bit key value that is
11 * different than the last_key. Set last_key to -1
12 * if this is the first key for an MR or MW
13 */
rxe_get_next_key(u32 last_key)14 u8 rxe_get_next_key(u32 last_key)
15 {
16 u8 key;
17
18 do {
19 get_random_bytes(&key, 1);
20 } while (key == last_key);
21
22 return key;
23 }
24
mr_check_range(struct rxe_mr * mr,u64 iova,size_t length)25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27
28
29 switch (mr->type) {
30 case IB_MR_TYPE_DMA:
31 return 0;
32
33 case IB_MR_TYPE_USER:
34 case IB_MR_TYPE_MEM_REG:
35 if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
36 iova > mr->ibmr.iova + mr->ibmr.length - length)
37 return -EFAULT;
38 return 0;
39
40 default:
41 pr_warn("%s: mr type (%d) not supported\n",
42 __func__, mr->type);
43 return -EFAULT;
44 }
45 }
46
47 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
48 | IB_ACCESS_REMOTE_WRITE \
49 | IB_ACCESS_REMOTE_ATOMIC)
50
rxe_mr_init(int access,struct rxe_mr * mr)51 static void rxe_mr_init(int access, struct rxe_mr *mr)
52 {
53 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
54 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
55
56 /* set ibmr->l/rkey and also copy into private l/rkey
57 * for user MRs these will always be the same
58 * for cases where caller 'owns' the key portion
59 * they may be different until REG_MR WQE is executed.
60 */
61 mr->lkey = mr->ibmr.lkey = lkey;
62 mr->rkey = mr->ibmr.rkey = rkey;
63
64 mr->state = RXE_MR_STATE_INVALID;
65 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
66 }
67
rxe_mr_alloc(struct rxe_mr * mr,int num_buf)68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
69 {
70 int i;
71 int num_map;
72 struct rxe_map **map = mr->map;
73
74 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
75
76 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
77 if (!mr->map)
78 goto err1;
79
80 for (i = 0; i < num_map; i++) {
81 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
82 if (!mr->map[i])
83 goto err2;
84 }
85
86 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
87
88 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
89 mr->map_mask = RXE_BUF_PER_MAP - 1;
90
91 mr->num_buf = num_buf;
92 mr->num_map = num_map;
93 mr->max_buf = num_map * RXE_BUF_PER_MAP;
94
95 return 0;
96
97 err2:
98 for (i--; i >= 0; i--)
99 kfree(mr->map[i]);
100
101 kfree(mr->map);
102 err1:
103 return -ENOMEM;
104 }
105
rxe_mr_init_dma(int access,struct rxe_mr * mr)106 void rxe_mr_init_dma(int access, struct rxe_mr *mr)
107 {
108 rxe_mr_init(access, mr);
109
110 mr->access = access;
111 mr->state = RXE_MR_STATE_VALID;
112 mr->type = IB_MR_TYPE_DMA;
113 }
114
rxe_mr_init_user(struct rxe_dev * rxe,u64 start,u64 length,u64 iova,int access,struct rxe_mr * mr)115 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
116 int access, struct rxe_mr *mr)
117 {
118 struct rxe_map **map;
119 struct rxe_phys_buf *buf = NULL;
120 struct ib_umem *umem;
121 struct sg_page_iter sg_iter;
122 int num_buf;
123 void *vaddr;
124 int err;
125 int i;
126
127 umem = ib_umem_get(&rxe->ib_dev, start, length, access);
128 if (IS_ERR(umem)) {
129 pr_warn("%s: Unable to pin memory region err = %d\n",
130 __func__, (int)PTR_ERR(umem));
131 err = PTR_ERR(umem);
132 goto err_out;
133 }
134
135 num_buf = ib_umem_num_pages(umem);
136
137 rxe_mr_init(access, mr);
138
139 err = rxe_mr_alloc(mr, num_buf);
140 if (err) {
141 pr_warn("%s: Unable to allocate memory for map\n",
142 __func__);
143 goto err_release_umem;
144 }
145
146 mr->page_shift = PAGE_SHIFT;
147 mr->page_mask = PAGE_SIZE - 1;
148
149 num_buf = 0;
150 map = mr->map;
151 if (length > 0) {
152 buf = map[0]->buf;
153
154 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
155 if (num_buf >= RXE_BUF_PER_MAP) {
156 map++;
157 buf = map[0]->buf;
158 num_buf = 0;
159 }
160
161 vaddr = page_address(sg_page_iter_page(&sg_iter));
162 if (!vaddr) {
163 pr_warn("%s: Unable to get virtual address\n",
164 __func__);
165 err = -ENOMEM;
166 goto err_cleanup_map;
167 }
168
169 buf->addr = (uintptr_t)vaddr;
170 buf->size = PAGE_SIZE;
171 num_buf++;
172 buf++;
173
174 }
175 }
176
177 mr->umem = umem;
178 mr->access = access;
179 mr->offset = ib_umem_offset(umem);
180 mr->state = RXE_MR_STATE_VALID;
181 mr->type = IB_MR_TYPE_USER;
182
183 return 0;
184
185 err_cleanup_map:
186 for (i = 0; i < mr->num_map; i++)
187 kfree(mr->map[i]);
188 kfree(mr->map);
189 err_release_umem:
190 ib_umem_release(umem);
191 err_out:
192 return err;
193 }
194
rxe_mr_init_fast(int max_pages,struct rxe_mr * mr)195 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
196 {
197 int err;
198
199 /* always allow remote access for FMRs */
200 rxe_mr_init(IB_ACCESS_REMOTE, mr);
201
202 err = rxe_mr_alloc(mr, max_pages);
203 if (err)
204 goto err1;
205
206 mr->max_buf = max_pages;
207 mr->state = RXE_MR_STATE_FREE;
208 mr->type = IB_MR_TYPE_MEM_REG;
209
210 return 0;
211
212 err1:
213 return err;
214 }
215
lookup_iova(struct rxe_mr * mr,u64 iova,int * m_out,int * n_out,size_t * offset_out)216 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
217 size_t *offset_out)
218 {
219 size_t offset = iova - mr->ibmr.iova + mr->offset;
220 int map_index;
221 int buf_index;
222 u64 length;
223
224 if (likely(mr->page_shift)) {
225 *offset_out = offset & mr->page_mask;
226 offset >>= mr->page_shift;
227 *n_out = offset & mr->map_mask;
228 *m_out = offset >> mr->map_shift;
229 } else {
230 map_index = 0;
231 buf_index = 0;
232
233 length = mr->map[map_index]->buf[buf_index].size;
234
235 while (offset >= length) {
236 offset -= length;
237 buf_index++;
238
239 if (buf_index == RXE_BUF_PER_MAP) {
240 map_index++;
241 buf_index = 0;
242 }
243 length = mr->map[map_index]->buf[buf_index].size;
244 }
245
246 *m_out = map_index;
247 *n_out = buf_index;
248 *offset_out = offset;
249 }
250 }
251
iova_to_vaddr(struct rxe_mr * mr,u64 iova,int length)252 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
253 {
254 size_t offset;
255 int m, n;
256 void *addr;
257
258 if (mr->state != RXE_MR_STATE_VALID) {
259 pr_warn("mr not in valid state\n");
260 addr = NULL;
261 goto out;
262 }
263
264 if (!mr->map) {
265 addr = (void *)(uintptr_t)iova;
266 goto out;
267 }
268
269 if (mr_check_range(mr, iova, length)) {
270 pr_warn("range violation\n");
271 addr = NULL;
272 goto out;
273 }
274
275 lookup_iova(mr, iova, &m, &n, &offset);
276
277 if (offset + length > mr->map[m]->buf[n].size) {
278 pr_warn("crosses page boundary\n");
279 addr = NULL;
280 goto out;
281 }
282
283 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
284
285 out:
286 return addr;
287 }
288
289 /* copy data from a range (vaddr, vaddr+length-1) to or from
290 * a mr object starting at iova.
291 */
rxe_mr_copy(struct rxe_mr * mr,u64 iova,void * addr,int length,enum rxe_mr_copy_dir dir)292 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
293 enum rxe_mr_copy_dir dir)
294 {
295 int err;
296 int bytes;
297 u8 *va;
298 struct rxe_map **map;
299 struct rxe_phys_buf *buf;
300 int m;
301 int i;
302 size_t offset;
303
304 if (length == 0)
305 return 0;
306
307 if (mr->type == IB_MR_TYPE_DMA) {
308 u8 *src, *dest;
309
310 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
311
312 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
313
314 memcpy(dest, src, length);
315
316 return 0;
317 }
318
319 WARN_ON_ONCE(!mr->map);
320
321 err = mr_check_range(mr, iova, length);
322 if (err) {
323 err = -EFAULT;
324 goto err1;
325 }
326
327 lookup_iova(mr, iova, &m, &i, &offset);
328
329 map = mr->map + m;
330 buf = map[0]->buf + i;
331
332 while (length > 0) {
333 u8 *src, *dest;
334
335 va = (u8 *)(uintptr_t)buf->addr + offset;
336 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
337 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
338
339 bytes = buf->size - offset;
340
341 if (bytes > length)
342 bytes = length;
343
344 memcpy(dest, src, bytes);
345
346 length -= bytes;
347 addr += bytes;
348
349 offset = 0;
350 buf++;
351 i++;
352
353 if (i == RXE_BUF_PER_MAP) {
354 i = 0;
355 map++;
356 buf = map[0]->buf;
357 }
358 }
359
360 return 0;
361
362 err1:
363 return err;
364 }
365
366 /* copy data in or out of a wqe, i.e. sg list
367 * under the control of a dma descriptor
368 */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum rxe_mr_copy_dir dir)369 int copy_data(
370 struct rxe_pd *pd,
371 int access,
372 struct rxe_dma_info *dma,
373 void *addr,
374 int length,
375 enum rxe_mr_copy_dir dir)
376 {
377 int bytes;
378 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
379 int offset = dma->sge_offset;
380 int resid = dma->resid;
381 struct rxe_mr *mr = NULL;
382 u64 iova;
383 int err;
384
385 if (length == 0)
386 return 0;
387
388 if (length > resid) {
389 err = -EINVAL;
390 goto err2;
391 }
392
393 if (sge->length && (offset < sge->length)) {
394 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
395 if (!mr) {
396 err = -EINVAL;
397 goto err1;
398 }
399 }
400
401 while (length > 0) {
402 bytes = length;
403
404 if (offset >= sge->length) {
405 if (mr) {
406 rxe_put(mr);
407 mr = NULL;
408 }
409 sge++;
410 dma->cur_sge++;
411 offset = 0;
412
413 if (dma->cur_sge >= dma->num_sge) {
414 err = -ENOSPC;
415 goto err2;
416 }
417
418 if (sge->length) {
419 mr = lookup_mr(pd, access, sge->lkey,
420 RXE_LOOKUP_LOCAL);
421 if (!mr) {
422 err = -EINVAL;
423 goto err1;
424 }
425 } else {
426 continue;
427 }
428 }
429
430 if (bytes > sge->length - offset)
431 bytes = sge->length - offset;
432
433 if (bytes > 0) {
434 iova = sge->addr + offset;
435
436 err = rxe_mr_copy(mr, iova, addr, bytes, dir);
437 if (err)
438 goto err2;
439
440 offset += bytes;
441 resid -= bytes;
442 length -= bytes;
443 addr += bytes;
444 }
445 }
446
447 dma->sge_offset = offset;
448 dma->resid = resid;
449
450 if (mr)
451 rxe_put(mr);
452
453 return 0;
454
455 err2:
456 if (mr)
457 rxe_put(mr);
458 err1:
459 return err;
460 }
461
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)462 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
463 {
464 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
465 int offset = dma->sge_offset;
466 int resid = dma->resid;
467
468 while (length) {
469 unsigned int bytes;
470
471 if (offset >= sge->length) {
472 sge++;
473 dma->cur_sge++;
474 offset = 0;
475 if (dma->cur_sge >= dma->num_sge)
476 return -ENOSPC;
477 }
478
479 bytes = length;
480
481 if (bytes > sge->length - offset)
482 bytes = sge->length - offset;
483
484 offset += bytes;
485 resid -= bytes;
486 length -= bytes;
487 }
488
489 dma->sge_offset = offset;
490 dma->resid = resid;
491
492 return 0;
493 }
494
495 /* (1) find the mr corresponding to lkey/rkey
496 * depending on lookup_type
497 * (2) verify that the (qp) pd matches the mr pd
498 * (3) verify that the mr can support the requested access
499 * (4) verify that mr state is valid
500 */
lookup_mr(struct rxe_pd * pd,int access,u32 key,enum rxe_mr_lookup_type type)501 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
502 enum rxe_mr_lookup_type type)
503 {
504 struct rxe_mr *mr;
505 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
506 int index = key >> 8;
507
508 mr = rxe_pool_get_index(&rxe->mr_pool, index);
509 if (!mr)
510 return NULL;
511
512 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
513 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
514 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
515 mr->state != RXE_MR_STATE_VALID)) {
516 rxe_put(mr);
517 mr = NULL;
518 }
519
520 return mr;
521 }
522
rxe_invalidate_mr(struct rxe_qp * qp,u32 key)523 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
524 {
525 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
526 struct rxe_mr *mr;
527 int ret;
528
529 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
530 if (!mr) {
531 pr_err("%s: No MR for key %#x\n", __func__, key);
532 ret = -EINVAL;
533 goto err;
534 }
535
536 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
537 pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
538 __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
539 ret = -EINVAL;
540 goto err_drop_ref;
541 }
542
543 if (atomic_read(&mr->num_mw) > 0) {
544 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
545 __func__);
546 ret = -EINVAL;
547 goto err_drop_ref;
548 }
549
550 if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
551 pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
552 ret = -EINVAL;
553 goto err_drop_ref;
554 }
555
556 mr->state = RXE_MR_STATE_FREE;
557 ret = 0;
558
559 err_drop_ref:
560 rxe_put(mr);
561 err:
562 return ret;
563 }
564
565 /* user can (re)register fast MR by executing a REG_MR WQE.
566 * user is expected to hold a reference on the ib mr until the
567 * WQE completes.
568 * Once a fast MR is created this is the only way to change the
569 * private keys. It is the responsibility of the user to maintain
570 * the ib mr keys in sync with rxe mr keys.
571 */
rxe_reg_fast_mr(struct rxe_qp * qp,struct rxe_send_wqe * wqe)572 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
573 {
574 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
575 u32 key = wqe->wr.wr.reg.key;
576 u32 access = wqe->wr.wr.reg.access;
577
578 /* user can only register MR in free state */
579 if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
580 pr_warn("%s: mr->lkey = 0x%x not free\n",
581 __func__, mr->lkey);
582 return -EINVAL;
583 }
584
585 /* user can only register mr with qp in same protection domain */
586 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
587 pr_warn("%s: qp->pd and mr->pd don't match\n",
588 __func__);
589 return -EINVAL;
590 }
591
592 /* user is only allowed to change key portion of l/rkey */
593 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
594 pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
595 __func__, key, mr->lkey);
596 return -EINVAL;
597 }
598
599 mr->access = access;
600 mr->lkey = key;
601 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
602 mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
603 mr->state = RXE_MR_STATE_VALID;
604
605 return 0;
606 }
607
rxe_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)608 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
609 {
610 struct rxe_mr *mr = to_rmr(ibmr);
611
612 /* See IBA 10.6.7.2.6 */
613 if (atomic_read(&mr->num_mw) > 0)
614 return -EINVAL;
615
616 rxe_cleanup(mr);
617
618 return 0;
619 }
620
rxe_mr_cleanup(struct rxe_pool_elem * elem)621 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
622 {
623 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
624 int i;
625
626 rxe_put(mr_pd(mr));
627 ib_umem_release(mr->umem);
628
629 if (mr->map) {
630 for (i = 0; i < mr->num_map; i++)
631 kfree(mr->map[i]);
632
633 kfree(mr->map);
634 }
635 }
636