1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/smp.h>
5 #include "dr_types.h"
6
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10 #define DR_SEND_INFO_POOL_SIZE 1000
11
12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
13
14 struct dr_data_seg {
15 u64 addr;
16 u32 length;
17 u32 lkey;
18 unsigned int send_flags;
19 };
20
21 enum send_info_type {
22 WRITE_ICM = 0,
23 GTA_ARG = 1,
24 };
25
26 struct postsend_info {
27 enum send_info_type type;
28 struct dr_data_seg write;
29 struct dr_data_seg read;
30 u64 remote_addr;
31 u32 rkey;
32 };
33
34 struct dr_qp_rtr_attr {
35 struct mlx5dr_cmd_gid_attr dgid_attr;
36 enum ib_mtu mtu;
37 u32 qp_num;
38 u16 port_num;
39 u8 min_rnr_timer;
40 u8 sgid_index;
41 u16 udp_src_port;
42 u8 fl:1;
43 };
44
45 struct dr_qp_rts_attr {
46 u8 timeout;
47 u8 retry_cnt;
48 u8 rnr_retry;
49 };
50
51 struct dr_qp_init_attr {
52 u32 cqn;
53 u32 pdn;
54 u32 max_send_wr;
55 u32 max_send_sge;
56 struct mlx5_uars_page *uar;
57 u8 isolate_vl_tc:1;
58 };
59
60 struct mlx5dr_send_info_pool_obj {
61 struct mlx5dr_ste_send_info ste_send_info;
62 struct mlx5dr_send_info_pool *pool;
63 struct list_head list_node;
64 };
65
66 struct mlx5dr_send_info_pool {
67 struct list_head free_list;
68 };
69
dr_send_info_pool_fill(struct mlx5dr_send_info_pool * pool)70 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool)
71 {
72 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
73 int i;
74
75 for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) {
76 pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL);
77 if (!pool_obj)
78 goto clean_pool;
79
80 pool_obj->pool = pool;
81 list_add_tail(&pool_obj->list_node, &pool->free_list);
82 }
83
84 return 0;
85
86 clean_pool:
87 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
88 list_del(&pool_obj->list_node);
89 kfree(pool_obj);
90 }
91
92 return -ENOMEM;
93 }
94
dr_send_info_pool_destroy(struct mlx5dr_send_info_pool * pool)95 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool)
96 {
97 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
98
99 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
100 list_del(&pool_obj->list_node);
101 kfree(pool_obj);
102 }
103
104 kfree(pool);
105 }
106
mlx5dr_send_info_pool_destroy(struct mlx5dr_domain * dmn)107 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn)
108 {
109 dr_send_info_pool_destroy(dmn->send_info_pool_tx);
110 dr_send_info_pool_destroy(dmn->send_info_pool_rx);
111 }
112
dr_send_info_pool_create(void)113 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void)
114 {
115 struct mlx5dr_send_info_pool *pool;
116 int ret;
117
118 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
119 if (!pool)
120 return NULL;
121
122 INIT_LIST_HEAD(&pool->free_list);
123
124 ret = dr_send_info_pool_fill(pool);
125 if (ret) {
126 kfree(pool);
127 return NULL;
128 }
129
130 return pool;
131 }
132
mlx5dr_send_info_pool_create(struct mlx5dr_domain * dmn)133 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn)
134 {
135 dmn->send_info_pool_rx = dr_send_info_pool_create();
136 if (!dmn->send_info_pool_rx)
137 return -ENOMEM;
138
139 dmn->send_info_pool_tx = dr_send_info_pool_create();
140 if (!dmn->send_info_pool_tx) {
141 dr_send_info_pool_destroy(dmn->send_info_pool_rx);
142 return -ENOMEM;
143 }
144
145 return 0;
146 }
147
148 struct mlx5dr_ste_send_info
mlx5dr_send_info_alloc(struct mlx5dr_domain * dmn,enum mlx5dr_domain_nic_type nic_type)149 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn,
150 enum mlx5dr_domain_nic_type nic_type)
151 {
152 struct mlx5dr_send_info_pool_obj *pool_obj;
153 struct mlx5dr_send_info_pool *pool;
154 int ret;
155
156 pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx :
157 dmn->send_info_pool_tx;
158
159 if (unlikely(list_empty(&pool->free_list))) {
160 ret = dr_send_info_pool_fill(pool);
161 if (ret)
162 return NULL;
163 }
164
165 pool_obj = list_first_entry_or_null(&pool->free_list,
166 struct mlx5dr_send_info_pool_obj,
167 list_node);
168
169 if (likely(pool_obj)) {
170 list_del_init(&pool_obj->list_node);
171 } else {
172 WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool");
173 return NULL;
174 }
175
176 return &pool_obj->ste_send_info;
177 }
178
mlx5dr_send_info_free(struct mlx5dr_ste_send_info * ste_send_info)179 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info)
180 {
181 struct mlx5dr_send_info_pool_obj *pool_obj;
182
183 pool_obj = container_of(ste_send_info,
184 struct mlx5dr_send_info_pool_obj,
185 ste_send_info);
186
187 list_add(&pool_obj->list_node, &pool_obj->pool->free_list);
188 }
189
dr_parse_cqe(struct mlx5dr_cq * dr_cq,struct mlx5_cqe64 * cqe64)190 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
191 {
192 unsigned int idx;
193 u8 opcode;
194
195 opcode = get_cqe_opcode(cqe64);
196 if (opcode == MLX5_CQE_REQ_ERR) {
197 idx = be16_to_cpu(cqe64->wqe_counter) &
198 (dr_cq->qp->sq.wqe_cnt - 1);
199 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
200 } else if (opcode == MLX5_CQE_RESP_ERR) {
201 ++dr_cq->qp->sq.cc;
202 } else {
203 idx = be16_to_cpu(cqe64->wqe_counter) &
204 (dr_cq->qp->sq.wqe_cnt - 1);
205 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
206
207 return CQ_OK;
208 }
209
210 return CQ_POLL_ERR;
211 }
212
dr_cq_poll_one(struct mlx5dr_cq * dr_cq)213 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
214 {
215 struct mlx5_cqe64 *cqe64;
216 int err;
217
218 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
219 if (!cqe64) {
220 if (unlikely(dr_cq->mdev->state ==
221 MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
222 mlx5_core_dbg_once(dr_cq->mdev,
223 "Polling CQ while device is shutting down\n");
224 return CQ_POLL_ERR;
225 }
226 return CQ_EMPTY;
227 }
228
229 mlx5_cqwq_pop(&dr_cq->wq);
230 err = dr_parse_cqe(dr_cq, cqe64);
231 mlx5_cqwq_update_db_record(&dr_cq->wq);
232
233 return err;
234 }
235
dr_poll_cq(struct mlx5dr_cq * dr_cq,int ne)236 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
237 {
238 int npolled;
239 int err = 0;
240
241 for (npolled = 0; npolled < ne; ++npolled) {
242 err = dr_cq_poll_one(dr_cq);
243 if (err != CQ_OK)
244 break;
245 }
246
247 return err == CQ_POLL_ERR ? err : npolled;
248 }
249
dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr * attr)250 static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
251 {
252 return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
253 sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
254 sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
255 }
256
257 /* We calculate for specific RC QP with the required functionality */
dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr * attr)258 static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
259 {
260 int update_arg_size;
261 int inl_size = 0;
262 int tot_size;
263 int size;
264
265 update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
266
267 size = sizeof(struct mlx5_wqe_ctrl_seg) +
268 sizeof(struct mlx5_wqe_raddr_seg);
269 inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
270 DR_STE_SIZE, 16);
271
272 size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
273
274 size = max(size, update_arg_size);
275
276 tot_size = max(size, inl_size);
277
278 return ALIGN(tot_size, MLX5_SEND_WQE_BB);
279 }
280
dr_create_rc_qp(struct mlx5_core_dev * mdev,struct dr_qp_init_attr * attr)281 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
282 struct dr_qp_init_attr *attr)
283 {
284 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
285 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
286 struct mlx5_wq_param wqp;
287 struct mlx5dr_qp *dr_qp;
288 int wqe_size;
289 int inlen;
290 void *qpc;
291 void *in;
292 int err;
293
294 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
295 if (!dr_qp)
296 return NULL;
297
298 wqp.buf_numa_node = mdev->priv.numa_node;
299 wqp.db_numa_node = mdev->priv.numa_node;
300
301 dr_qp->rq.pc = 0;
302 dr_qp->rq.cc = 0;
303 dr_qp->rq.wqe_cnt = 256;
304 dr_qp->sq.pc = 0;
305 dr_qp->sq.cc = 0;
306 dr_qp->sq.head = 0;
307 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
308
309 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
310 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
311 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
312 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
313 &dr_qp->wq_ctrl);
314 if (err) {
315 mlx5_core_warn(mdev, "Can't create QP WQ\n");
316 goto err_wq;
317 }
318
319 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
320 sizeof(dr_qp->sq.wqe_head[0]),
321 GFP_KERNEL);
322
323 if (!dr_qp->sq.wqe_head) {
324 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
325 goto err_wqe_head;
326 }
327
328 inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
329 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
330 dr_qp->wq_ctrl.buf.npages;
331 in = kvzalloc(inlen, GFP_KERNEL);
332 if (!in) {
333 err = -ENOMEM;
334 goto err_in;
335 }
336
337 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
338 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
339 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
340 MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
341 MLX5_SET(qpc, qpc, pd, attr->pdn);
342 MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
343 MLX5_SET(qpc, qpc, log_page_size,
344 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
345 MLX5_SET(qpc, qpc, fre, 1);
346 MLX5_SET(qpc, qpc, rlky, 1);
347 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
348 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
349 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
350 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
351 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
352 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
353 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
354 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
355 if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
356 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
357 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
358 (__be64 *)MLX5_ADDR_OF(create_qp_in,
359 in, pas));
360
361 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
362 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
363 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
364 kvfree(in);
365 if (err)
366 goto err_in;
367 dr_qp->uar = attr->uar;
368 wqe_size = dr_qp_calc_rc_send_wqe(attr);
369 dr_qp->max_inline_data = min(wqe_size -
370 (sizeof(struct mlx5_wqe_ctrl_seg) +
371 sizeof(struct mlx5_wqe_raddr_seg) +
372 sizeof(struct mlx5_wqe_inline_seg)),
373 (2 * MLX5_SEND_WQE_BB -
374 (sizeof(struct mlx5_wqe_ctrl_seg) +
375 sizeof(struct mlx5_wqe_raddr_seg) +
376 sizeof(struct mlx5_wqe_inline_seg))));
377
378 return dr_qp;
379
380 err_in:
381 kfree(dr_qp->sq.wqe_head);
382 err_wqe_head:
383 mlx5_wq_destroy(&dr_qp->wq_ctrl);
384 err_wq:
385 kfree(dr_qp);
386 return NULL;
387 }
388
dr_destroy_qp(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp)389 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
390 struct mlx5dr_qp *dr_qp)
391 {
392 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
393
394 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
395 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
396 mlx5_cmd_exec_in(mdev, destroy_qp, in);
397
398 kfree(dr_qp->sq.wqe_head);
399 mlx5_wq_destroy(&dr_qp->wq_ctrl);
400 kfree(dr_qp);
401 }
402
dr_cmd_notify_hw(struct mlx5dr_qp * dr_qp,void * ctrl)403 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
404 {
405 dma_wmb();
406 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
407
408 /* After wmb() the hw aware of new work */
409 wmb();
410
411 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
412 }
413
414 static void
dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg * wq_ctrl,u32 remote_addr,struct dr_data_seg * data_seg,int * size)415 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
416 u32 remote_addr,
417 struct dr_data_seg *data_seg,
418 int *size)
419 {
420 struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
421 struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
422
423 wq_ctrl->general_id = cpu_to_be32(remote_addr);
424 wq_flow_seg = (void *)(wq_ctrl + 1);
425
426 /* mlx5_wqe_flow_update_ctrl_seg - all reserved */
427 memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
428 wq_arg_seg = (void *)(wq_flow_seg + 1);
429
430 memcpy(wq_arg_seg->argument_list,
431 (void *)(uintptr_t)data_seg->addr,
432 data_seg->length);
433
434 *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
435 sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */
436 sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */
437 MLX5_SEND_WQE_DS;
438 }
439
dr_set_data_inl_seg(struct mlx5dr_qp * dr_qp,struct dr_data_seg * data_seg,void * wqe)440 static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
441 struct dr_data_seg *data_seg, void *wqe)
442 {
443 int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
444 sizeof(struct mlx5_wqe_raddr_seg) +
445 sizeof(struct mlx5_wqe_inline_seg);
446 struct mlx5_wqe_inline_seg *seg;
447 int left_space;
448 int inl = 0;
449 void *addr;
450 int len;
451 int idx;
452
453 seg = wqe;
454 wqe += sizeof(*seg);
455 addr = (void *)(unsigned long)(data_seg->addr);
456 len = data_seg->length;
457 inl += len;
458 left_space = MLX5_SEND_WQE_BB - inline_header_size;
459
460 if (likely(len > left_space)) {
461 memcpy(wqe, addr, left_space);
462 len -= left_space;
463 addr += left_space;
464 idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
465 wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
466 }
467
468 memcpy(wqe, addr, len);
469
470 if (likely(inl)) {
471 seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
472 return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
473 MLX5_SEND_WQE_DS);
474 } else {
475 return 0;
476 }
477 }
478
479 static void
dr_rdma_handle_icm_write_segments(struct mlx5dr_qp * dr_qp,struct mlx5_wqe_ctrl_seg * wq_ctrl,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,unsigned int * size)480 dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
481 struct mlx5_wqe_ctrl_seg *wq_ctrl,
482 u64 remote_addr,
483 u32 rkey,
484 struct dr_data_seg *data_seg,
485 unsigned int *size)
486 {
487 struct mlx5_wqe_raddr_seg *wq_raddr;
488 struct mlx5_wqe_data_seg *wq_dseg;
489
490 wq_raddr = (void *)(wq_ctrl + 1);
491
492 wq_raddr->raddr = cpu_to_be64(remote_addr);
493 wq_raddr->rkey = cpu_to_be32(rkey);
494 wq_raddr->reserved = 0;
495
496 wq_dseg = (void *)(wq_raddr + 1);
497 /* WQE ctrl segment + WQE remote addr segment */
498 *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
499
500 if (data_seg->send_flags & IB_SEND_INLINE) {
501 *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
502 } else {
503 wq_dseg->byte_count = cpu_to_be32(data_seg->length);
504 wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
505 wq_dseg->addr = cpu_to_be64(data_seg->addr);
506 *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */
507 }
508 }
509
dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg * wq_ctrl,struct dr_data_seg * data_seg)510 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
511 struct dr_data_seg *data_seg)
512 {
513 wq_ctrl->signature = 0;
514 wq_ctrl->rsvd[0] = 0;
515 wq_ctrl->rsvd[1] = 0;
516 wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
517 MLX5_WQE_CTRL_CQ_UPDATE : 0;
518 wq_ctrl->imm = 0;
519 }
520
dr_rdma_segments(struct mlx5dr_qp * dr_qp,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,u32 opcode,bool notify_hw)521 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
522 u32 rkey, struct dr_data_seg *data_seg,
523 u32 opcode, bool notify_hw)
524 {
525 struct mlx5_wqe_ctrl_seg *wq_ctrl;
526 int opcode_mod = 0;
527 unsigned int size;
528 unsigned int idx;
529
530 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
531
532 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
533 dr_set_ctrl_seg(wq_ctrl, data_seg);
534
535 switch (opcode) {
536 case MLX5_OPCODE_RDMA_READ:
537 case MLX5_OPCODE_RDMA_WRITE:
538 dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
539 rkey, data_seg, &size);
540 break;
541 case MLX5_OPCODE_FLOW_TBL_ACCESS:
542 opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
543 dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
544 data_seg, &size);
545 break;
546 default:
547 WARN(true, "illegal opcode %d", opcode);
548 return;
549 }
550
551 /* --------------------------------------------------------
552 * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
553 * --------------------------------------------------------
554 */
555 wq_ctrl->opmod_idx_opcode =
556 cpu_to_be32((opcode_mod << 24) |
557 ((dr_qp->sq.pc & 0xffff) << 8) |
558 opcode);
559 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
560
561 dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
562 dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
563
564 if (notify_hw)
565 dr_cmd_notify_hw(dr_qp, wq_ctrl);
566 }
567
dr_post_send(struct mlx5dr_qp * dr_qp,struct postsend_info * send_info)568 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
569 {
570 if (send_info->type == WRITE_ICM) {
571 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
572 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
573 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
574 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
575 } else { /* GTA_ARG */
576 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
577 &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
578 }
579
580 }
581
582 /**
583 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
584 * with send_list parameters:
585 *
586 * @ste: The data that attached to this specific ste
587 * @size: of data to write
588 * @offset: of the data from start of the hw_ste entry
589 * @data: data
590 * @ste_info: ste to be sent with send_list
591 * @send_list: to append into it
592 * @copy_data: if true indicates that the data should be kept because
593 * it's not backuped any where (like in re-hash).
594 * if false, it lets the data to be updated after
595 * it was added to the list.
596 */
mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste * ste,u16 size,u16 offset,u8 * data,struct mlx5dr_ste_send_info * ste_info,struct list_head * send_list,bool copy_data)597 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
598 u16 offset, u8 *data,
599 struct mlx5dr_ste_send_info *ste_info,
600 struct list_head *send_list,
601 bool copy_data)
602 {
603 ste_info->size = size;
604 ste_info->ste = ste;
605 ste_info->offset = offset;
606
607 if (copy_data) {
608 memcpy(ste_info->data_cont, data, size);
609 ste_info->data = ste_info->data_cont;
610 } else {
611 ste_info->data = data;
612 }
613
614 list_add_tail(&ste_info->send_list, send_list);
615 }
616
617 /* The function tries to consume one wc each time, unless the queue is full, in
618 * that case, which means that the hw is behind the sw in a full queue len
619 * the function will drain the cq till it empty.
620 */
dr_handle_pending_wc(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)621 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
622 struct mlx5dr_send_ring *send_ring)
623 {
624 bool is_drain = false;
625 int ne;
626
627 if (send_ring->pending_wqe < send_ring->signal_th)
628 return 0;
629
630 /* Queue is full start drain it */
631 if (send_ring->pending_wqe >=
632 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
633 is_drain = true;
634
635 do {
636 ne = dr_poll_cq(send_ring->cq, 1);
637 if (unlikely(ne < 0)) {
638 mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
639 send_ring->qp->qpn);
640 send_ring->err_state = true;
641 return ne;
642 } else if (ne == 1) {
643 send_ring->pending_wqe -= send_ring->signal_th;
644 }
645 } while (ne == 1 ||
646 (is_drain && send_ring->pending_wqe >= send_ring->signal_th));
647
648 return 0;
649 }
650
dr_fill_write_args_segs(struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)651 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
652 struct postsend_info *send_info)
653 {
654 send_ring->pending_wqe++;
655
656 if (send_ring->pending_wqe % send_ring->signal_th == 0)
657 send_info->write.send_flags |= IB_SEND_SIGNALED;
658 else
659 send_info->write.send_flags &= ~IB_SEND_SIGNALED;
660 }
661
dr_fill_write_icm_segs(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)662 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
663 struct mlx5dr_send_ring *send_ring,
664 struct postsend_info *send_info)
665 {
666 u32 buff_offset;
667
668 if (send_info->write.length > dmn->info.max_inline_size) {
669 buff_offset = (send_ring->tx_head &
670 (dmn->send_ring->signal_th - 1)) *
671 send_ring->max_post_send_size;
672 /* Copy to ring mr */
673 memcpy(send_ring->buf + buff_offset,
674 (void *)(uintptr_t)send_info->write.addr,
675 send_info->write.length);
676 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
677 send_info->write.lkey = send_ring->mr->mkey;
678
679 send_ring->tx_head++;
680 }
681
682 send_ring->pending_wqe++;
683 if (!send_info->write.lkey)
684 send_info->write.send_flags |= IB_SEND_INLINE;
685
686 if (send_ring->pending_wqe % send_ring->signal_th == 0)
687 send_info->write.send_flags |= IB_SEND_SIGNALED;
688 else
689 send_info->write.send_flags &= ~IB_SEND_SIGNALED;
690
691 send_ring->pending_wqe++;
692 send_info->read.length = send_info->write.length;
693
694 /* Read into dedicated sync buffer */
695 send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
696 send_info->read.lkey = send_ring->sync_mr->mkey;
697
698 if (send_ring->pending_wqe % send_ring->signal_th == 0)
699 send_info->read.send_flags |= IB_SEND_SIGNALED;
700 else
701 send_info->read.send_flags &= ~IB_SEND_SIGNALED;
702 }
703
dr_fill_data_segs(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)704 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
705 struct mlx5dr_send_ring *send_ring,
706 struct postsend_info *send_info)
707 {
708 if (send_info->type == WRITE_ICM)
709 dr_fill_write_icm_segs(dmn, send_ring, send_info);
710 else /* args */
711 dr_fill_write_args_segs(send_ring, send_info);
712 }
713
dr_postsend_icm_data(struct mlx5dr_domain * dmn,struct postsend_info * send_info)714 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
715 struct postsend_info *send_info)
716 {
717 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
718 int ret;
719
720 if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
721 send_ring->err_state)) {
722 mlx5_core_dbg_once(dmn->mdev,
723 "Skipping post send: QP err state: %d, device state: %d\n",
724 send_ring->err_state, dmn->mdev->state);
725 return 0;
726 }
727
728 spin_lock(&send_ring->lock);
729
730 ret = dr_handle_pending_wc(dmn, send_ring);
731 if (ret)
732 goto out_unlock;
733
734 dr_fill_data_segs(dmn, send_ring, send_info);
735 dr_post_send(send_ring->qp, send_info);
736
737 out_unlock:
738 spin_unlock(&send_ring->lock);
739 return ret;
740 }
741
dr_get_tbl_copy_details(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 ** data,u32 * byte_size,int * iterations,int * num_stes)742 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
743 struct mlx5dr_ste_htbl *htbl,
744 u8 **data,
745 u32 *byte_size,
746 int *iterations,
747 int *num_stes)
748 {
749 u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
750 int alloc_size;
751
752 if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
753 *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
754 *byte_size = dmn->send_ring->max_post_send_size;
755 alloc_size = *byte_size;
756 *num_stes = *byte_size / DR_STE_SIZE;
757 } else {
758 *iterations = 1;
759 *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
760 alloc_size = *num_stes * DR_STE_SIZE;
761 }
762
763 *data = kvzalloc(alloc_size, GFP_KERNEL);
764 if (!*data)
765 return -ENOMEM;
766
767 return 0;
768 }
769
770 /**
771 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
772 *
773 * @dmn: Domain
774 * @ste: The ste struct that contains the data (at
775 * least part of it)
776 * @data: The real data to send size data
777 * @size: for writing.
778 * @offset: The offset from the icm mapped data to
779 * start write to this for write only part of the
780 * buffer.
781 *
782 * Return: 0 on success.
783 */
mlx5dr_send_postsend_ste(struct mlx5dr_domain * dmn,struct mlx5dr_ste * ste,u8 * data,u16 size,u16 offset)784 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
785 u8 *data, u16 size, u16 offset)
786 {
787 struct postsend_info send_info = {};
788
789 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
790
791 send_info.write.addr = (uintptr_t)data;
792 send_info.write.length = size;
793 send_info.write.lkey = 0;
794 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
795 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
796
797 return dr_postsend_icm_data(dmn, &send_info);
798 }
799
mlx5dr_send_postsend_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * formatted_ste,u8 * mask)800 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
801 struct mlx5dr_ste_htbl *htbl,
802 u8 *formatted_ste, u8 *mask)
803 {
804 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
805 int num_stes_per_iter;
806 int iterations;
807 u8 *data;
808 int ret;
809 int i;
810 int j;
811
812 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
813 &iterations, &num_stes_per_iter);
814 if (ret)
815 return ret;
816
817 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
818
819 /* Send the data iteration times */
820 for (i = 0; i < iterations; i++) {
821 u32 ste_index = i * (byte_size / DR_STE_SIZE);
822 struct postsend_info send_info = {};
823
824 /* Copy all ste's on the data buffer
825 * need to add the bit_mask
826 */
827 for (j = 0; j < num_stes_per_iter; j++) {
828 struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
829 u32 ste_off = j * DR_STE_SIZE;
830
831 if (mlx5dr_ste_is_not_used(ste)) {
832 memcpy(data + ste_off,
833 formatted_ste, DR_STE_SIZE);
834 } else {
835 /* Copy data */
836 memcpy(data + ste_off,
837 htbl->chunk->hw_ste_arr +
838 DR_STE_SIZE_REDUCED * (ste_index + j),
839 DR_STE_SIZE_REDUCED);
840 /* Copy bit_mask */
841 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
842 mask, DR_STE_SIZE_MASK);
843 /* Only when we have mask we need to re-arrange the STE */
844 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
845 data + (j * DR_STE_SIZE),
846 DR_STE_SIZE);
847 }
848 }
849
850 send_info.write.addr = (uintptr_t)data;
851 send_info.write.length = byte_size;
852 send_info.write.lkey = 0;
853 send_info.remote_addr =
854 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
855 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
856
857 ret = dr_postsend_icm_data(dmn, &send_info);
858 if (ret)
859 goto out_free;
860 }
861
862 out_free:
863 kvfree(data);
864 return ret;
865 }
866
867 /* Initialize htble with default STEs */
mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * ste_init_data,bool update_hw_ste)868 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
869 struct mlx5dr_ste_htbl *htbl,
870 u8 *ste_init_data,
871 bool update_hw_ste)
872 {
873 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
874 int iterations;
875 int num_stes;
876 u8 *copy_dst;
877 u8 *data;
878 int ret;
879 int i;
880
881 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
882 &iterations, &num_stes);
883 if (ret)
884 return ret;
885
886 if (update_hw_ste) {
887 /* Copy the reduced STE to hash table ste_arr */
888 for (i = 0; i < num_stes; i++) {
889 copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
890 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
891 }
892 }
893
894 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
895
896 /* Copy the same STE on the data buffer */
897 for (i = 0; i < num_stes; i++) {
898 copy_dst = data + i * DR_STE_SIZE;
899 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
900 }
901
902 /* Send the data iteration times */
903 for (i = 0; i < iterations; i++) {
904 u8 ste_index = i * (byte_size / DR_STE_SIZE);
905 struct postsend_info send_info = {};
906
907 send_info.write.addr = (uintptr_t)data;
908 send_info.write.length = byte_size;
909 send_info.write.lkey = 0;
910 send_info.remote_addr =
911 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
912 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
913
914 ret = dr_postsend_icm_data(dmn, &send_info);
915 if (ret)
916 goto out_free;
917 }
918
919 out_free:
920 kvfree(data);
921 return ret;
922 }
923
mlx5dr_send_postsend_action(struct mlx5dr_domain * dmn,struct mlx5dr_action * action)924 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
925 struct mlx5dr_action *action)
926 {
927 struct postsend_info send_info = {};
928
929 send_info.write.addr = (uintptr_t)action->rewrite->data;
930 send_info.write.length = action->rewrite->num_of_actions *
931 DR_MODIFY_ACTION_SIZE;
932 send_info.write.lkey = 0;
933 send_info.remote_addr =
934 mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
935 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
936
937 return dr_postsend_icm_data(dmn, &send_info);
938 }
939
mlx5dr_send_postsend_pattern(struct mlx5dr_domain * dmn,struct mlx5dr_icm_chunk * chunk,u16 num_of_actions,u8 * data)940 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
941 struct mlx5dr_icm_chunk *chunk,
942 u16 num_of_actions,
943 u8 *data)
944 {
945 struct postsend_info send_info = {};
946 int ret;
947
948 send_info.write.addr = (uintptr_t)data;
949 send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
950 send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
951 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
952
953 ret = dr_postsend_icm_data(dmn, &send_info);
954 if (ret)
955 return ret;
956
957 return 0;
958 }
959
mlx5dr_send_postsend_args(struct mlx5dr_domain * dmn,u64 arg_id,u16 num_of_actions,u8 * actions_data)960 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
961 u16 num_of_actions, u8 *actions_data)
962 {
963 int data_len, iter = 0, cur_sent;
964 u64 addr;
965 int ret;
966
967 addr = (uintptr_t)actions_data;
968 data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
969
970 do {
971 struct postsend_info send_info = {};
972
973 send_info.type = GTA_ARG;
974 send_info.write.addr = addr;
975 cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
976 send_info.write.length = cur_sent;
977 send_info.write.lkey = 0;
978 send_info.remote_addr = arg_id + iter;
979
980 ret = dr_postsend_icm_data(dmn, &send_info);
981 if (ret)
982 goto out;
983
984 iter++;
985 addr += cur_sent;
986 data_len -= cur_sent;
987 } while (data_len > 0);
988
989 out:
990 return ret;
991 }
992
dr_modify_qp_rst2init(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,int port)993 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
994 struct mlx5dr_qp *dr_qp,
995 int port)
996 {
997 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
998 void *qpc;
999
1000 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
1001
1002 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
1003 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
1004 MLX5_SET(qpc, qpc, rre, 1);
1005 MLX5_SET(qpc, qpc, rwe, 1);
1006
1007 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
1008 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
1009
1010 return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
1011 }
1012
dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rts_attr * attr)1013 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
1014 struct mlx5dr_qp *dr_qp,
1015 struct dr_qp_rts_attr *attr)
1016 {
1017 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
1018 void *qpc;
1019
1020 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
1021
1022 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
1023
1024 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
1025 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
1026 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
1027
1028 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
1029 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
1030
1031 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
1032 }
1033
dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rtr_attr * attr)1034 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
1035 struct mlx5dr_qp *dr_qp,
1036 struct dr_qp_rtr_attr *attr)
1037 {
1038 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
1039 void *qpc;
1040
1041 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
1042
1043 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
1044
1045 MLX5_SET(qpc, qpc, mtu, attr->mtu);
1046 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
1047 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
1048 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
1049 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
1050 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
1051 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
1052 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
1053 attr->sgid_index);
1054
1055 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
1056 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
1057 attr->udp_src_port);
1058
1059 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
1060 MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
1061 MLX5_SET(qpc, qpc, min_rnr_nak, 1);
1062
1063 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
1064 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
1065
1066 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
1067 }
1068
dr_send_allow_fl(struct mlx5dr_cmd_caps * caps)1069 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
1070 {
1071 /* Check whether RC RoCE QP creation with force loopback is allowed.
1072 * There are two separate capability bits for this:
1073 * - force loopback when RoCE is enabled
1074 * - force loopback when RoCE is disabled
1075 */
1076 return ((caps->roce_caps.roce_en &&
1077 caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
1078 (!caps->roce_caps.roce_en &&
1079 caps->roce_caps.fl_rc_qp_when_roce_disabled));
1080 }
1081
dr_prepare_qp_to_rts(struct mlx5dr_domain * dmn)1082 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
1083 {
1084 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
1085 struct dr_qp_rts_attr rts_attr = {};
1086 struct dr_qp_rtr_attr rtr_attr = {};
1087 enum ib_mtu mtu = IB_MTU_1024;
1088 u16 gid_index = 0;
1089 int port = 1;
1090 int ret;
1091
1092 /* Init */
1093 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
1094 if (ret) {
1095 mlx5dr_err(dmn, "Failed modify QP rst2init\n");
1096 return ret;
1097 }
1098
1099 /* RTR */
1100 rtr_attr.mtu = mtu;
1101 rtr_attr.qp_num = dr_qp->qpn;
1102 rtr_attr.min_rnr_timer = 12;
1103 rtr_attr.port_num = port;
1104 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
1105
1106 /* If QP creation with force loopback is allowed, then there
1107 * is no need for GID index when creating the QP.
1108 * Otherwise we query GID attributes and use GID index.
1109 */
1110 rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
1111 if (!rtr_attr.fl) {
1112 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
1113 &rtr_attr.dgid_attr);
1114 if (ret)
1115 return ret;
1116
1117 rtr_attr.sgid_index = gid_index;
1118 }
1119
1120 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
1121 if (ret) {
1122 mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
1123 return ret;
1124 }
1125
1126 /* RTS */
1127 rts_attr.timeout = 14;
1128 rts_attr.retry_cnt = 7;
1129 rts_attr.rnr_retry = 7;
1130
1131 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
1132 if (ret) {
1133 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
1134 return ret;
1135 }
1136
1137 return 0;
1138 }
1139
dr_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)1140 static void dr_cq_complete(struct mlx5_core_cq *mcq,
1141 struct mlx5_eqe *eqe)
1142 {
1143 pr_err("CQ completion CQ: #%u\n", mcq->cqn);
1144 }
1145
dr_create_cq(struct mlx5_core_dev * mdev,struct mlx5_uars_page * uar,size_t ncqe)1146 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
1147 struct mlx5_uars_page *uar,
1148 size_t ncqe)
1149 {
1150 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
1151 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1152 struct mlx5_wq_param wqp;
1153 struct mlx5_cqe64 *cqe;
1154 struct mlx5dr_cq *cq;
1155 int inlen, err, eqn;
1156 void *cqc, *in;
1157 __be64 *pas;
1158 int vector;
1159 u32 i;
1160
1161 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1162 if (!cq)
1163 return NULL;
1164
1165 ncqe = roundup_pow_of_two(ncqe);
1166 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
1167
1168 wqp.buf_numa_node = mdev->priv.numa_node;
1169 wqp.db_numa_node = mdev->priv.numa_node;
1170
1171 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
1172 &cq->wq_ctrl);
1173 if (err)
1174 goto out;
1175
1176 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1177 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1178 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
1179 }
1180
1181 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1182 sizeof(u64) * cq->wq_ctrl.buf.npages;
1183 in = kvzalloc(inlen, GFP_KERNEL);
1184 if (!in)
1185 goto err_cqwq;
1186
1187 vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev);
1188 err = mlx5_comp_eqn_get(mdev, vector, &eqn);
1189 if (err) {
1190 kvfree(in);
1191 goto err_cqwq;
1192 }
1193
1194 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1195 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
1196 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1197 MLX5_SET(cqc, cqc, uar_page, uar->index);
1198 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1199 MLX5_ADAPTER_PAGE_SHIFT);
1200 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1201
1202 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
1203 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
1204
1205 cq->mcq.comp = dr_cq_complete;
1206
1207 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
1208 kvfree(in);
1209
1210 if (err)
1211 goto err_cqwq;
1212
1213 cq->mcq.cqe_sz = 64;
1214 cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1215 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1216 *cq->mcq.set_ci_db = 0;
1217
1218 /* set no-zero value, in order to avoid the HW to run db-recovery on
1219 * CQ that used in polling mode.
1220 */
1221 *cq->mcq.arm_db = cpu_to_be32(2 << 28);
1222
1223 cq->mcq.vector = 0;
1224 cq->mcq.uar = uar;
1225 cq->mdev = mdev;
1226
1227 return cq;
1228
1229 err_cqwq:
1230 mlx5_wq_destroy(&cq->wq_ctrl);
1231 out:
1232 kfree(cq);
1233 return NULL;
1234 }
1235
dr_destroy_cq(struct mlx5_core_dev * mdev,struct mlx5dr_cq * cq)1236 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
1237 {
1238 mlx5_core_destroy_cq(mdev, &cq->mcq);
1239 mlx5_wq_destroy(&cq->wq_ctrl);
1240 kfree(cq);
1241 }
1242
dr_create_mkey(struct mlx5_core_dev * mdev,u32 pdn,u32 * mkey)1243 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
1244 {
1245 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
1246 void *mkc;
1247
1248 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1249 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
1250 MLX5_SET(mkc, mkc, a, 1);
1251 MLX5_SET(mkc, mkc, rw, 1);
1252 MLX5_SET(mkc, mkc, rr, 1);
1253 MLX5_SET(mkc, mkc, lw, 1);
1254 MLX5_SET(mkc, mkc, lr, 1);
1255
1256 MLX5_SET(mkc, mkc, pd, pdn);
1257 MLX5_SET(mkc, mkc, length64, 1);
1258 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1259
1260 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
1261 }
1262
dr_reg_mr(struct mlx5_core_dev * mdev,u32 pdn,void * buf,size_t size)1263 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
1264 u32 pdn, void *buf, size_t size)
1265 {
1266 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1267 struct device *dma_device;
1268 dma_addr_t dma_addr;
1269 int err;
1270
1271 if (!mr)
1272 return NULL;
1273
1274 dma_device = mlx5_core_dma_dev(mdev);
1275 dma_addr = dma_map_single(dma_device, buf, size,
1276 DMA_BIDIRECTIONAL);
1277 err = dma_mapping_error(dma_device, dma_addr);
1278 if (err) {
1279 mlx5_core_warn(mdev, "Can't dma buf\n");
1280 kfree(mr);
1281 return NULL;
1282 }
1283
1284 err = dr_create_mkey(mdev, pdn, &mr->mkey);
1285 if (err) {
1286 mlx5_core_warn(mdev, "Can't create mkey\n");
1287 dma_unmap_single(dma_device, dma_addr, size,
1288 DMA_BIDIRECTIONAL);
1289 kfree(mr);
1290 return NULL;
1291 }
1292
1293 mr->dma_addr = dma_addr;
1294 mr->size = size;
1295 mr->addr = buf;
1296
1297 return mr;
1298 }
1299
dr_dereg_mr(struct mlx5_core_dev * mdev,struct mlx5dr_mr * mr)1300 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
1301 {
1302 mlx5_core_destroy_mkey(mdev, mr->mkey);
1303 dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
1304 DMA_BIDIRECTIONAL);
1305 kfree(mr);
1306 }
1307
mlx5dr_send_ring_alloc(struct mlx5dr_domain * dmn)1308 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
1309 {
1310 struct dr_qp_init_attr init_attr = {};
1311 int cq_size;
1312 int size;
1313 int ret;
1314
1315 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
1316 if (!dmn->send_ring)
1317 return -ENOMEM;
1318
1319 cq_size = QUEUE_SIZE + 1;
1320 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
1321 if (!dmn->send_ring->cq) {
1322 mlx5dr_err(dmn, "Failed creating CQ\n");
1323 ret = -ENOMEM;
1324 goto free_send_ring;
1325 }
1326
1327 init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
1328 init_attr.pdn = dmn->pdn;
1329 init_attr.uar = dmn->uar;
1330 init_attr.max_send_wr = QUEUE_SIZE;
1331
1332 /* Isolated VL is applicable only if force loopback is supported */
1333 if (dr_send_allow_fl(&dmn->info.caps))
1334 init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
1335
1336 spin_lock_init(&dmn->send_ring->lock);
1337
1338 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
1339 if (!dmn->send_ring->qp) {
1340 mlx5dr_err(dmn, "Failed creating QP\n");
1341 ret = -ENOMEM;
1342 goto clean_cq;
1343 }
1344
1345 dmn->send_ring->cq->qp = dmn->send_ring->qp;
1346
1347 dmn->info.max_send_wr = QUEUE_SIZE;
1348 init_attr.max_send_sge = 1;
1349 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
1350 DR_STE_SIZE);
1351
1352 dmn->send_ring->signal_th = dmn->info.max_send_wr /
1353 SIGNAL_PER_DIV_QUEUE;
1354
1355 /* Prepare qp to be used */
1356 ret = dr_prepare_qp_to_rts(dmn);
1357 if (ret)
1358 goto clean_qp;
1359
1360 dmn->send_ring->max_post_send_size =
1361 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
1362 DR_ICM_TYPE_STE);
1363
1364 /* Allocating the max size as a buffer for writing */
1365 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
1366 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
1367 if (!dmn->send_ring->buf) {
1368 ret = -ENOMEM;
1369 goto clean_qp;
1370 }
1371
1372 dmn->send_ring->buf_size = size;
1373
1374 dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
1375 dmn->pdn, dmn->send_ring->buf, size);
1376 if (!dmn->send_ring->mr) {
1377 ret = -ENOMEM;
1378 goto free_mem;
1379 }
1380
1381 dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
1382 GFP_KERNEL);
1383 if (!dmn->send_ring->sync_buff) {
1384 ret = -ENOMEM;
1385 goto clean_mr;
1386 }
1387
1388 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
1389 dmn->pdn, dmn->send_ring->sync_buff,
1390 dmn->send_ring->max_post_send_size);
1391 if (!dmn->send_ring->sync_mr) {
1392 ret = -ENOMEM;
1393 goto free_sync_mem;
1394 }
1395
1396 return 0;
1397
1398 free_sync_mem:
1399 kfree(dmn->send_ring->sync_buff);
1400 clean_mr:
1401 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
1402 free_mem:
1403 kfree(dmn->send_ring->buf);
1404 clean_qp:
1405 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
1406 clean_cq:
1407 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
1408 free_send_ring:
1409 kfree(dmn->send_ring);
1410
1411 return ret;
1412 }
1413
mlx5dr_send_ring_free(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)1414 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
1415 struct mlx5dr_send_ring *send_ring)
1416 {
1417 dr_destroy_qp(dmn->mdev, send_ring->qp);
1418 dr_destroy_cq(dmn->mdev, send_ring->cq);
1419 dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
1420 dr_dereg_mr(dmn->mdev, send_ring->mr);
1421 kfree(send_ring->buf);
1422 kfree(send_ring->sync_buff);
1423 kfree(send_ring);
1424 }
1425
mlx5dr_send_ring_force_drain(struct mlx5dr_domain * dmn)1426 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
1427 {
1428 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
1429 struct postsend_info send_info = {};
1430 u8 data[DR_STE_SIZE];
1431 int num_of_sends_req;
1432 int ret;
1433 int i;
1434
1435 /* Sending this amount of requests makes sure we will get drain */
1436 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
1437
1438 /* Send fake requests forcing the last to be signaled */
1439 send_info.write.addr = (uintptr_t)data;
1440 send_info.write.length = DR_STE_SIZE;
1441 send_info.write.lkey = 0;
1442 /* Using the sync_mr in order to write/read */
1443 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
1444 send_info.rkey = send_ring->sync_mr->mkey;
1445
1446 for (i = 0; i < num_of_sends_req; i++) {
1447 ret = dr_postsend_icm_data(dmn, &send_info);
1448 if (ret)
1449 return ret;
1450 }
1451
1452 spin_lock(&send_ring->lock);
1453 ret = dr_handle_pending_wc(dmn, send_ring);
1454 spin_unlock(&send_ring->lock);
1455
1456 return ret;
1457 }
1458