1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 #include "mpesw.h"
45
46 enum {
47 MLX5_LAG_EGRESS_PORT_1 = 1,
48 MLX5_LAG_EGRESS_PORT_2,
49 };
50
51 /* General purpose, use for short periods of time.
52 * Beware of lock dependencies (preferably, no locks should be acquired
53 * under it).
54 */
55 static DEFINE_SPINLOCK(lag_lock);
56
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
58 {
59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
61
62 if (mode == MLX5_LAG_MODE_MPESW)
63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
64
65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
66 }
67
lag_active_port_bits(struct mlx5_lag * ldev)68 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
69 {
70 u8 enabled_ports[MLX5_MAX_PORTS] = {};
71 u8 active_port = 0;
72 int num_enabled;
73 int idx;
74
75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports,
76 &num_enabled);
77 for (idx = 0; idx < num_enabled; idx++)
78 active_port |= BIT_MASK(enabled_ports[idx]);
79
80 return active_port;
81 }
82
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 * ports,int mode,unsigned long flags)83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
84 unsigned long flags)
85 {
86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
87 &flags);
88 int port_sel_mode = get_port_sel_mode(mode, flags);
89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
90 void *lag_ctx;
91
92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95
96 switch (port_sel_mode) {
97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
100 break;
101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
103 break;
104
105 MLX5_SET(lagc, lag_ctx, active_port,
106 lag_active_port_bits(mlx5_lag_dev(dev)));
107 break;
108 default:
109 break;
110 }
111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
112
113 return mlx5_cmd_exec_in(dev, create_lag, in);
114 }
115
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 num_ports,u8 * ports)116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
117 u8 *ports)
118 {
119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
121
122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
123 MLX5_SET(modify_lag_in, in, field_select, 0x1);
124
125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
127
128 return mlx5_cmd_exec_in(dev, modify_lag, in);
129 }
130
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
132 {
133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
134
135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
136
137 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
138 }
139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
140
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
142 {
143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
144
145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
146
147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
150
mlx5_infer_tx_disabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_disabled)151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
152 u8 *ports, int *num_disabled)
153 {
154 int i;
155
156 *num_disabled = 0;
157 for (i = 0; i < num_ports; i++) {
158 if (!tracker->netdev_state[i].tx_enabled ||
159 !tracker->netdev_state[i].link_up)
160 ports[(*num_disabled)++] = i;
161 }
162 }
163
mlx5_infer_tx_enabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_enabled)164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
165 u8 *ports, int *num_enabled)
166 {
167 int i;
168
169 *num_enabled = 0;
170 for (i = 0; i < num_ports; i++) {
171 if (tracker->netdev_state[i].tx_enabled &&
172 tracker->netdev_state[i].link_up)
173 ports[(*num_enabled)++] = i;
174 }
175
176 if (*num_enabled == 0)
177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
178 }
179
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
181 struct mlx5_lag *ldev,
182 struct lag_tracker *tracker,
183 unsigned long flags)
184 {
185 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
186 u8 enabled_ports[MLX5_MAX_PORTS] = {};
187 int written = 0;
188 int num_enabled;
189 int idx;
190 int err;
191 int i;
192 int j;
193
194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
196 &num_enabled);
197 for (i = 0; i < num_enabled; i++) {
198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
199 if (err != 3)
200 return;
201 written += err;
202 }
203 buf[written - 2] = 0;
204 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
205 } else {
206 for (i = 0; i < ldev->ports; i++) {
207 for (j = 0; j < ldev->buckets; j++) {
208 idx = i * ldev->buckets + j;
209 err = scnprintf(buf + written, 10,
210 " port %d:%d", i + 1, ldev->v2p_map[idx]);
211 if (err != 9)
212 return;
213 written += err;
214 }
215 }
216 mlx5_core_info(dev, "lag map:%s\n", buf);
217 }
218 }
219
220 static int mlx5_lag_netdev_event(struct notifier_block *this,
221 unsigned long event, void *ptr);
222 static void mlx5_do_bond_work(struct work_struct *work);
223
mlx5_ldev_free(struct kref * ref)224 static void mlx5_ldev_free(struct kref *ref)
225 {
226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
227
228 if (ldev->nb.notifier_call)
229 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
230 mlx5_lag_mp_cleanup(ldev);
231 destroy_workqueue(ldev->wq);
232 mlx5_lag_mpesw_cleanup(ldev);
233 mutex_destroy(&ldev->lock);
234 kfree(ldev);
235 }
236
mlx5_ldev_put(struct mlx5_lag * ldev)237 static void mlx5_ldev_put(struct mlx5_lag *ldev)
238 {
239 kref_put(&ldev->ref, mlx5_ldev_free);
240 }
241
mlx5_ldev_get(struct mlx5_lag * ldev)242 static void mlx5_ldev_get(struct mlx5_lag *ldev)
243 {
244 kref_get(&ldev->ref);
245 }
246
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)247 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
248 {
249 struct mlx5_lag *ldev;
250 int err;
251
252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
253 if (!ldev)
254 return NULL;
255
256 ldev->wq = create_singlethread_workqueue("mlx5_lag");
257 if (!ldev->wq) {
258 kfree(ldev);
259 return NULL;
260 }
261
262 kref_init(&ldev->ref);
263 mutex_init(&ldev->lock);
264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
265
266 ldev->nb.notifier_call = mlx5_lag_netdev_event;
267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
268 ldev->nb.notifier_call = NULL;
269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
270 }
271 ldev->mode = MLX5_LAG_MODE_NONE;
272
273 err = mlx5_lag_mp_init(ldev);
274 if (err)
275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
276 err);
277
278 mlx5_lag_mpesw_init(ldev);
279 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
280 ldev->buckets = 1;
281
282 return ldev;
283 }
284
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)285 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
286 struct net_device *ndev)
287 {
288 int i;
289
290 for (i = 0; i < ldev->ports; i++)
291 if (ldev->pf[i].netdev == ndev)
292 return i;
293
294 return -ENOENT;
295 }
296
__mlx5_lag_is_roce(struct mlx5_lag * ldev)297 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
298 {
299 return ldev->mode == MLX5_LAG_MODE_ROCE;
300 }
301
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)302 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
303 {
304 return ldev->mode == MLX5_LAG_MODE_SRIOV;
305 }
306
307 /* Create a mapping between steering slots and active ports.
308 * As we have ldev->buckets slots per port first assume the native
309 * mapping should be used.
310 * If there are ports that are disabled fill the relevant slots
311 * with mapping that points to active ports.
312 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 num_ports,u8 buckets,u8 * ports)313 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
314 u8 num_ports,
315 u8 buckets,
316 u8 *ports)
317 {
318 int disabled[MLX5_MAX_PORTS] = {};
319 int enabled[MLX5_MAX_PORTS] = {};
320 int disabled_ports_num = 0;
321 int enabled_ports_num = 0;
322 int idx;
323 u32 rand;
324 int i;
325 int j;
326
327 for (i = 0; i < num_ports; i++) {
328 if (tracker->netdev_state[i].tx_enabled &&
329 tracker->netdev_state[i].link_up)
330 enabled[enabled_ports_num++] = i;
331 else
332 disabled[disabled_ports_num++] = i;
333 }
334
335 /* Use native mapping by default where each port's buckets
336 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
337 */
338 for (i = 0; i < num_ports; i++)
339 for (j = 0; j < buckets; j++) {
340 idx = i * buckets + j;
341 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
342 }
343
344 /* If all ports are disabled/enabled keep native mapping */
345 if (enabled_ports_num == num_ports ||
346 disabled_ports_num == num_ports)
347 return;
348
349 /* Go over the disabled ports and for each assign a random active port */
350 for (i = 0; i < disabled_ports_num; i++) {
351 for (j = 0; j < buckets; j++) {
352 get_random_bytes(&rand, 4);
353 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
354 }
355 }
356 }
357
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)358 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
359 {
360 int i;
361
362 for (i = 0; i < ldev->ports; i++)
363 if (ldev->pf[i].has_drop)
364 return true;
365 return false;
366 }
367
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)368 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
369 {
370 int i;
371
372 for (i = 0; i < ldev->ports; i++) {
373 if (!ldev->pf[i].has_drop)
374 continue;
375
376 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
377 MLX5_VPORT_UPLINK);
378 ldev->pf[i].has_drop = false;
379 }
380 }
381
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)382 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
383 struct lag_tracker *tracker)
384 {
385 u8 disabled_ports[MLX5_MAX_PORTS] = {};
386 struct mlx5_core_dev *dev;
387 int disabled_index;
388 int num_disabled;
389 int err;
390 int i;
391
392 /* First delete the current drop rule so there won't be any dropped
393 * packets
394 */
395 mlx5_lag_drop_rule_cleanup(ldev);
396
397 if (!ldev->tracker.has_inactive)
398 return;
399
400 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
401
402 for (i = 0; i < num_disabled; i++) {
403 disabled_index = disabled_ports[i];
404 dev = ldev->pf[disabled_index].dev;
405 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
406 MLX5_VPORT_UPLINK);
407 if (!err)
408 ldev->pf[disabled_index].has_drop = true;
409 else
410 mlx5_core_err(dev,
411 "Failed to create lag drop rule, error: %d", err);
412 }
413 }
414
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)415 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
416 {
417 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
418 void *lag_ctx;
419
420 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
421
422 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
423 MLX5_SET(modify_lag_in, in, field_select, 0x2);
424
425 MLX5_SET(lagc, lag_ctx, active_port, ports);
426
427 return mlx5_cmd_exec_in(dev, modify_lag, in);
428 }
429
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)430 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
431 {
432 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
433 u8 active_ports;
434 int ret;
435
436 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
437 ret = mlx5_lag_port_sel_modify(ldev, ports);
438 if (ret ||
439 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
440 return ret;
441
442 active_ports = lag_active_port_bits(ldev);
443
444 return mlx5_cmd_modify_active_port(dev0, active_ports);
445 }
446 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
447 }
448
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)449 void mlx5_modify_lag(struct mlx5_lag *ldev,
450 struct lag_tracker *tracker)
451 {
452 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
453 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
454 int idx;
455 int err;
456 int i;
457 int j;
458
459 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
460
461 for (i = 0; i < ldev->ports; i++) {
462 for (j = 0; j < ldev->buckets; j++) {
463 idx = i * ldev->buckets + j;
464 if (ports[idx] == ldev->v2p_map[idx])
465 continue;
466 err = _mlx5_modify_lag(ldev, ports);
467 if (err) {
468 mlx5_core_err(dev0,
469 "Failed to modify LAG (%d)\n",
470 err);
471 return;
472 }
473 memcpy(ldev->v2p_map, ports, sizeof(ports));
474
475 mlx5_lag_print_mapping(dev0, ldev, tracker,
476 ldev->mode_flags);
477 break;
478 }
479 }
480
481 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
482 !(ldev->mode == MLX5_LAG_MODE_ROCE))
483 mlx5_lag_drop_rule_setup(ldev, tracker);
484 }
485
mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag * ldev,unsigned long * flags)486 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
487 unsigned long *flags)
488 {
489 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
490
491 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
492 if (ldev->ports > 2)
493 return -EINVAL;
494 return 0;
495 }
496
497 if (ldev->ports > 2)
498 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
499
500 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
501
502 return 0;
503 }
504
mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long * flags)505 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
506 struct lag_tracker *tracker,
507 enum mlx5_lag_mode mode,
508 unsigned long *flags)
509 {
510 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
511
512 if (mode == MLX5_LAG_MODE_MPESW)
513 return;
514
515 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
516 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
517 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
518 }
519
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)520 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
521 struct lag_tracker *tracker, bool shared_fdb,
522 unsigned long *flags)
523 {
524 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
525
526 *flags = 0;
527 if (shared_fdb) {
528 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
529 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
530 }
531
532 if (mode == MLX5_LAG_MODE_MPESW)
533 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
534
535 if (roce_lag)
536 return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
537
538 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
539 return 0;
540 }
541
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)542 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
543 {
544 int port_sel_mode = get_port_sel_mode(mode, flags);
545
546 switch (port_sel_mode) {
547 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
548 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
549 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
550 default: return "invalid";
551 }
552 }
553
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)554 static int mlx5_create_lag(struct mlx5_lag *ldev,
555 struct lag_tracker *tracker,
556 enum mlx5_lag_mode mode,
557 unsigned long flags)
558 {
559 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
560 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
561 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
562 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
563 int err;
564
565 if (tracker)
566 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
567 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
568 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
569
570 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
571 if (err) {
572 mlx5_core_err(dev0,
573 "Failed to create LAG (%d)\n",
574 err);
575 return err;
576 }
577
578 if (shared_fdb) {
579 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
580 dev1->priv.eswitch);
581 if (err)
582 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
583 else
584 mlx5_core_info(dev0, "Operation mode is single FDB\n");
585 }
586
587 if (err) {
588 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
589 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
590 mlx5_core_err(dev0,
591 "Failed to deactivate RoCE LAG; driver restart required\n");
592 }
593
594 return err;
595 }
596
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)597 int mlx5_activate_lag(struct mlx5_lag *ldev,
598 struct lag_tracker *tracker,
599 enum mlx5_lag_mode mode,
600 bool shared_fdb)
601 {
602 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
603 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
604 unsigned long flags = 0;
605 int err;
606
607 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
608 if (err)
609 return err;
610
611 if (mode != MLX5_LAG_MODE_MPESW) {
612 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
613 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
614 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
615 ldev->v2p_map);
616 if (err) {
617 mlx5_core_err(dev0,
618 "Failed to create LAG port selection(%d)\n",
619 err);
620 return err;
621 }
622 }
623 }
624
625 err = mlx5_create_lag(ldev, tracker, mode, flags);
626 if (err) {
627 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
628 mlx5_lag_port_sel_destroy(ldev);
629 if (roce_lag)
630 mlx5_core_err(dev0,
631 "Failed to activate RoCE LAG\n");
632 else
633 mlx5_core_err(dev0,
634 "Failed to activate VF LAG\n"
635 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
636 return err;
637 }
638
639 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
640 !roce_lag)
641 mlx5_lag_drop_rule_setup(ldev, tracker);
642
643 ldev->mode = mode;
644 ldev->mode_flags = flags;
645 return 0;
646 }
647
mlx5_deactivate_lag(struct mlx5_lag * ldev)648 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
649 {
650 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
651 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
652 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
653 bool roce_lag = __mlx5_lag_is_roce(ldev);
654 unsigned long flags = ldev->mode_flags;
655 int err;
656
657 ldev->mode = MLX5_LAG_MODE_NONE;
658 ldev->mode_flags = 0;
659 mlx5_lag_mp_reset(ldev);
660
661 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
662 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
663 dev1->priv.eswitch);
664 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
665 }
666
667 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
668 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
669 if (err) {
670 if (roce_lag) {
671 mlx5_core_err(dev0,
672 "Failed to deactivate RoCE LAG; driver restart required\n");
673 } else {
674 mlx5_core_err(dev0,
675 "Failed to deactivate VF LAG; driver restart required\n"
676 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
677 }
678 return err;
679 }
680
681 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
682 mlx5_lag_port_sel_destroy(ldev);
683 if (mlx5_lag_has_drop_rule(ldev))
684 mlx5_lag_drop_rule_cleanup(ldev);
685
686 return 0;
687 }
688
689 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_check_prereq(struct mlx5_lag * ldev)690 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
691 {
692 #ifdef CONFIG_MLX5_ESWITCH
693 struct mlx5_core_dev *dev;
694 u8 mode;
695 #endif
696 int i;
697
698 for (i = 0; i < ldev->ports; i++)
699 if (!ldev->pf[i].dev)
700 return false;
701
702 #ifdef CONFIG_MLX5_ESWITCH
703 for (i = 0; i < ldev->ports; i++) {
704 dev = ldev->pf[i].dev;
705 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
706 return false;
707 }
708
709 dev = ldev->pf[MLX5_LAG_P1].dev;
710 mode = mlx5_eswitch_mode(dev);
711 for (i = 0; i < ldev->ports; i++)
712 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
713 return false;
714
715 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
716 return false;
717 #else
718 for (i = 0; i < ldev->ports; i++)
719 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
720 return false;
721 #endif
722 return true;
723 }
724
mlx5_lag_add_devices(struct mlx5_lag * ldev)725 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
726 {
727 int i;
728
729 for (i = 0; i < ldev->ports; i++) {
730 if (!ldev->pf[i].dev)
731 continue;
732
733 if (ldev->pf[i].dev->priv.flags &
734 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
735 continue;
736
737 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
738 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
739 }
740 }
741
mlx5_lag_remove_devices(struct mlx5_lag * ldev)742 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
743 {
744 int i;
745
746 for (i = 0; i < ldev->ports; i++) {
747 if (!ldev->pf[i].dev)
748 continue;
749
750 if (ldev->pf[i].dev->priv.flags &
751 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
752 continue;
753
754 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
755 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
756 }
757 }
758
mlx5_disable_lag(struct mlx5_lag * ldev)759 void mlx5_disable_lag(struct mlx5_lag *ldev)
760 {
761 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
762 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
763 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
764 bool roce_lag;
765 int err;
766 int i;
767
768 roce_lag = __mlx5_lag_is_roce(ldev);
769
770 if (shared_fdb) {
771 mlx5_lag_remove_devices(ldev);
772 } else if (roce_lag) {
773 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
774 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
775 mlx5_rescan_drivers_locked(dev0);
776 }
777 for (i = 1; i < ldev->ports; i++)
778 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
779 }
780
781 err = mlx5_deactivate_lag(ldev);
782 if (err)
783 return;
784
785 if (shared_fdb || roce_lag)
786 mlx5_lag_add_devices(ldev);
787
788 if (shared_fdb) {
789 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
790 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
791 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
792 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
793 }
794 }
795
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)796 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
797 {
798 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
799 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
800
801 if (is_mdev_switchdev_mode(dev0) &&
802 is_mdev_switchdev_mode(dev1) &&
803 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
804 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
805 mlx5_devcom_is_paired(dev0->priv.devcom,
806 MLX5_DEVCOM_ESW_OFFLOADS) &&
807 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
808 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
809 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
810 return true;
811
812 return false;
813 }
814
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)815 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
816 {
817 bool roce_lag = true;
818 int i;
819
820 for (i = 0; i < ldev->ports; i++)
821 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
822
823 #ifdef CONFIG_MLX5_ESWITCH
824 for (i = 0; i < ldev->ports; i++)
825 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
826 #endif
827
828 return roce_lag;
829 }
830
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)831 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
832 {
833 return do_bond && __mlx5_lag_is_active(ldev) &&
834 ldev->mode != MLX5_LAG_MODE_MPESW;
835 }
836
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)837 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
838 {
839 return !do_bond && __mlx5_lag_is_active(ldev) &&
840 ldev->mode != MLX5_LAG_MODE_MPESW;
841 }
842
mlx5_do_bond(struct mlx5_lag * ldev)843 static void mlx5_do_bond(struct mlx5_lag *ldev)
844 {
845 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
846 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
847 struct lag_tracker tracker = { };
848 bool do_bond, roce_lag;
849 int err;
850 int i;
851
852 if (!mlx5_lag_is_ready(ldev)) {
853 do_bond = false;
854 } else {
855 /* VF LAG is in multipath mode, ignore bond change requests */
856 if (mlx5_lag_is_multipath(dev0))
857 return;
858
859 tracker = ldev->tracker;
860
861 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
862 }
863
864 if (do_bond && !__mlx5_lag_is_active(ldev)) {
865 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
866
867 roce_lag = mlx5_lag_is_roce_lag(ldev);
868
869 if (shared_fdb || roce_lag)
870 mlx5_lag_remove_devices(ldev);
871
872 err = mlx5_activate_lag(ldev, &tracker,
873 roce_lag ? MLX5_LAG_MODE_ROCE :
874 MLX5_LAG_MODE_SRIOV,
875 shared_fdb);
876 if (err) {
877 if (shared_fdb || roce_lag)
878 mlx5_lag_add_devices(ldev);
879
880 return;
881 } else if (roce_lag) {
882 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
883 mlx5_rescan_drivers_locked(dev0);
884 for (i = 1; i < ldev->ports; i++)
885 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
886 } else if (shared_fdb) {
887 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
888 mlx5_rescan_drivers_locked(dev0);
889
890 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
891 if (!err)
892 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
893
894 if (err) {
895 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
896 mlx5_rescan_drivers_locked(dev0);
897 mlx5_deactivate_lag(ldev);
898 mlx5_lag_add_devices(ldev);
899 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
900 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
901 mlx5_core_err(dev0, "Failed to enable lag\n");
902 return;
903 }
904 }
905 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
906 mlx5_modify_lag(ldev, &tracker);
907 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
908 mlx5_disable_lag(ldev);
909 }
910 }
911
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)912 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
913 {
914 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
915 }
916
mlx5_do_bond_work(struct work_struct * work)917 static void mlx5_do_bond_work(struct work_struct *work)
918 {
919 struct delayed_work *delayed_work = to_delayed_work(work);
920 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
921 bond_work);
922 int status;
923
924 status = mlx5_dev_list_trylock();
925 if (!status) {
926 mlx5_queue_bond_work(ldev, HZ);
927 return;
928 }
929
930 mutex_lock(&ldev->lock);
931 if (ldev->mode_changes_in_progress) {
932 mutex_unlock(&ldev->lock);
933 mlx5_dev_list_unlock();
934 mlx5_queue_bond_work(ldev, HZ);
935 return;
936 }
937
938 mlx5_do_bond(ldev);
939 mutex_unlock(&ldev->lock);
940 mlx5_dev_list_unlock();
941 }
942
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)943 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
944 struct lag_tracker *tracker,
945 struct netdev_notifier_changeupper_info *info)
946 {
947 struct net_device *upper = info->upper_dev, *ndev_tmp;
948 struct netdev_lag_upper_info *lag_upper_info = NULL;
949 bool is_bonded, is_in_lag, mode_supported;
950 bool has_inactive = 0;
951 struct slave *slave;
952 u8 bond_status = 0;
953 int num_slaves = 0;
954 int changed = 0;
955 int idx;
956
957 if (!netif_is_lag_master(upper))
958 return 0;
959
960 if (info->linking)
961 lag_upper_info = info->upper_info;
962
963 /* The event may still be of interest if the slave does not belong to
964 * us, but is enslaved to a master which has one or more of our netdevs
965 * as slaves (e.g., if a new slave is added to a master that bonds two
966 * of our netdevs, we should unbond).
967 */
968 rcu_read_lock();
969 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
970 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
971 if (idx >= 0) {
972 slave = bond_slave_get_rcu(ndev_tmp);
973 if (slave)
974 has_inactive |= bond_is_slave_inactive(slave);
975 bond_status |= (1 << idx);
976 }
977
978 num_slaves++;
979 }
980 rcu_read_unlock();
981
982 /* None of this lagdev's netdevs are slaves of this master. */
983 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
984 return 0;
985
986 if (lag_upper_info) {
987 tracker->tx_type = lag_upper_info->tx_type;
988 tracker->hash_type = lag_upper_info->hash_type;
989 }
990
991 tracker->has_inactive = has_inactive;
992 /* Determine bonding status:
993 * A device is considered bonded if both its physical ports are slaves
994 * of the same lag master, and only them.
995 */
996 is_in_lag = num_slaves == ldev->ports &&
997 bond_status == GENMASK(ldev->ports - 1, 0);
998
999 /* Lag mode must be activebackup or hash. */
1000 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1001 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1002
1003 is_bonded = is_in_lag && mode_supported;
1004 if (tracker->is_bonded != is_bonded) {
1005 tracker->is_bonded = is_bonded;
1006 changed = 1;
1007 }
1008
1009 if (!is_in_lag)
1010 return changed;
1011
1012 if (!mlx5_lag_is_ready(ldev))
1013 NL_SET_ERR_MSG_MOD(info->info.extack,
1014 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1015 else if (!mode_supported)
1016 NL_SET_ERR_MSG_MOD(info->info.extack,
1017 "Can't activate LAG offload, TX type isn't supported");
1018
1019 return changed;
1020 }
1021
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1022 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1023 struct lag_tracker *tracker,
1024 struct net_device *ndev,
1025 struct netdev_notifier_changelowerstate_info *info)
1026 {
1027 struct netdev_lag_lower_state_info *lag_lower_info;
1028 int idx;
1029
1030 if (!netif_is_lag_port(ndev))
1031 return 0;
1032
1033 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1034 if (idx < 0)
1035 return 0;
1036
1037 /* This information is used to determine virtual to physical
1038 * port mapping.
1039 */
1040 lag_lower_info = info->lower_state_info;
1041 if (!lag_lower_info)
1042 return 0;
1043
1044 tracker->netdev_state[idx] = *lag_lower_info;
1045
1046 return 1;
1047 }
1048
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1049 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1050 struct lag_tracker *tracker,
1051 struct net_device *ndev)
1052 {
1053 struct net_device *ndev_tmp;
1054 struct slave *slave;
1055 bool has_inactive = 0;
1056 int idx;
1057
1058 if (!netif_is_lag_master(ndev))
1059 return 0;
1060
1061 rcu_read_lock();
1062 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1063 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1064 if (idx < 0)
1065 continue;
1066
1067 slave = bond_slave_get_rcu(ndev_tmp);
1068 if (slave)
1069 has_inactive |= bond_is_slave_inactive(slave);
1070 }
1071 rcu_read_unlock();
1072
1073 if (tracker->has_inactive == has_inactive)
1074 return 0;
1075
1076 tracker->has_inactive = has_inactive;
1077
1078 return 1;
1079 }
1080
1081 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1082 static int mlx5_lag_netdev_event(struct notifier_block *this,
1083 unsigned long event, void *ptr)
1084 {
1085 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1086 struct lag_tracker tracker;
1087 struct mlx5_lag *ldev;
1088 int changed = 0;
1089
1090 if (event != NETDEV_CHANGEUPPER &&
1091 event != NETDEV_CHANGELOWERSTATE &&
1092 event != NETDEV_CHANGEINFODATA)
1093 return NOTIFY_DONE;
1094
1095 ldev = container_of(this, struct mlx5_lag, nb);
1096
1097 tracker = ldev->tracker;
1098
1099 switch (event) {
1100 case NETDEV_CHANGEUPPER:
1101 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1102 break;
1103 case NETDEV_CHANGELOWERSTATE:
1104 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1105 ndev, ptr);
1106 break;
1107 case NETDEV_CHANGEINFODATA:
1108 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1109 break;
1110 }
1111
1112 ldev->tracker = tracker;
1113
1114 if (changed)
1115 mlx5_queue_bond_work(ldev, 0);
1116
1117 return NOTIFY_DONE;
1118 }
1119
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1120 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1121 struct mlx5_core_dev *dev,
1122 struct net_device *netdev)
1123 {
1124 unsigned int fn = mlx5_get_dev_index(dev);
1125 unsigned long flags;
1126
1127 if (fn >= ldev->ports)
1128 return;
1129
1130 spin_lock_irqsave(&lag_lock, flags);
1131 ldev->pf[fn].netdev = netdev;
1132 ldev->tracker.netdev_state[fn].link_up = 0;
1133 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1134 spin_unlock_irqrestore(&lag_lock, flags);
1135 }
1136
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1137 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1138 struct net_device *netdev)
1139 {
1140 unsigned long flags;
1141 int i;
1142
1143 spin_lock_irqsave(&lag_lock, flags);
1144 for (i = 0; i < ldev->ports; i++) {
1145 if (ldev->pf[i].netdev == netdev) {
1146 ldev->pf[i].netdev = NULL;
1147 break;
1148 }
1149 }
1150 spin_unlock_irqrestore(&lag_lock, flags);
1151 }
1152
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1153 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1154 struct mlx5_core_dev *dev)
1155 {
1156 unsigned int fn = mlx5_get_dev_index(dev);
1157
1158 if (fn >= ldev->ports)
1159 return;
1160
1161 ldev->pf[fn].dev = dev;
1162 dev->priv.lag = ldev;
1163 }
1164
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1165 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1166 struct mlx5_core_dev *dev)
1167 {
1168 int i;
1169
1170 for (i = 0; i < ldev->ports; i++)
1171 if (ldev->pf[i].dev == dev)
1172 break;
1173
1174 if (i == ldev->ports)
1175 return;
1176
1177 ldev->pf[i].dev = NULL;
1178 dev->priv.lag = NULL;
1179 }
1180
1181 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1182 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1183 {
1184 struct mlx5_lag *ldev = NULL;
1185 struct mlx5_core_dev *tmp_dev;
1186
1187 tmp_dev = mlx5_get_next_phys_dev_lag(dev);
1188 if (tmp_dev)
1189 ldev = tmp_dev->priv.lag;
1190
1191 if (!ldev) {
1192 ldev = mlx5_lag_dev_alloc(dev);
1193 if (!ldev) {
1194 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1195 return 0;
1196 }
1197 mlx5_ldev_add_mdev(ldev, dev);
1198 return 0;
1199 }
1200
1201 mutex_lock(&ldev->lock);
1202 if (ldev->mode_changes_in_progress) {
1203 mutex_unlock(&ldev->lock);
1204 return -EAGAIN;
1205 }
1206 mlx5_ldev_get(ldev);
1207 mlx5_ldev_add_mdev(ldev, dev);
1208 mutex_unlock(&ldev->lock);
1209
1210 return 0;
1211 }
1212
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1213 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1214 {
1215 struct mlx5_lag *ldev;
1216
1217 ldev = mlx5_lag_dev(dev);
1218 if (!ldev)
1219 return;
1220
1221 /* mdev is being removed, might as well remove debugfs
1222 * as early as possible.
1223 */
1224 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1225 recheck:
1226 mutex_lock(&ldev->lock);
1227 if (ldev->mode_changes_in_progress) {
1228 mutex_unlock(&ldev->lock);
1229 msleep(100);
1230 goto recheck;
1231 }
1232 mlx5_ldev_remove_mdev(ldev, dev);
1233 mutex_unlock(&ldev->lock);
1234 mlx5_ldev_put(ldev);
1235 }
1236
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1237 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1238 {
1239 int err;
1240
1241 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1242 !MLX5_CAP_GEN(dev, lag_master) ||
1243 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
1244 MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
1245 return;
1246
1247 recheck:
1248 mlx5_dev_list_lock();
1249 err = __mlx5_lag_dev_add_mdev(dev);
1250 mlx5_dev_list_unlock();
1251
1252 if (err) {
1253 msleep(100);
1254 goto recheck;
1255 }
1256 mlx5_ldev_add_debugfs(dev);
1257 }
1258
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1259 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1260 struct net_device *netdev)
1261 {
1262 struct mlx5_lag *ldev;
1263 bool lag_is_active;
1264
1265 ldev = mlx5_lag_dev(dev);
1266 if (!ldev)
1267 return;
1268
1269 mutex_lock(&ldev->lock);
1270 mlx5_ldev_remove_netdev(ldev, netdev);
1271 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1272
1273 lag_is_active = __mlx5_lag_is_active(ldev);
1274 mutex_unlock(&ldev->lock);
1275
1276 if (lag_is_active)
1277 mlx5_queue_bond_work(ldev, 0);
1278 }
1279
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1280 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1281 struct net_device *netdev)
1282 {
1283 struct mlx5_lag *ldev;
1284 int i;
1285
1286 ldev = mlx5_lag_dev(dev);
1287 if (!ldev)
1288 return;
1289
1290 mutex_lock(&ldev->lock);
1291 mlx5_ldev_add_netdev(ldev, dev, netdev);
1292
1293 for (i = 0; i < ldev->ports; i++)
1294 if (!ldev->pf[i].netdev)
1295 break;
1296
1297 if (i >= ldev->ports)
1298 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1299 mutex_unlock(&ldev->lock);
1300 mlx5_queue_bond_work(ldev, 0);
1301 }
1302
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1303 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1304 {
1305 struct mlx5_lag *ldev;
1306 unsigned long flags;
1307 bool res;
1308
1309 spin_lock_irqsave(&lag_lock, flags);
1310 ldev = mlx5_lag_dev(dev);
1311 res = ldev && __mlx5_lag_is_roce(ldev);
1312 spin_unlock_irqrestore(&lag_lock, flags);
1313
1314 return res;
1315 }
1316 EXPORT_SYMBOL(mlx5_lag_is_roce);
1317
mlx5_lag_is_active(struct mlx5_core_dev * dev)1318 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1319 {
1320 struct mlx5_lag *ldev;
1321 unsigned long flags;
1322 bool res;
1323
1324 spin_lock_irqsave(&lag_lock, flags);
1325 ldev = mlx5_lag_dev(dev);
1326 res = ldev && __mlx5_lag_is_active(ldev);
1327 spin_unlock_irqrestore(&lag_lock, flags);
1328
1329 return res;
1330 }
1331 EXPORT_SYMBOL(mlx5_lag_is_active);
1332
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1333 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1334 {
1335 struct mlx5_lag *ldev;
1336 unsigned long flags;
1337 bool res = 0;
1338
1339 spin_lock_irqsave(&lag_lock, flags);
1340 ldev = mlx5_lag_dev(dev);
1341 if (ldev)
1342 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1343 spin_unlock_irqrestore(&lag_lock, flags);
1344
1345 return res;
1346 }
1347 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1348
mlx5_lag_is_master(struct mlx5_core_dev * dev)1349 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1350 {
1351 struct mlx5_lag *ldev;
1352 unsigned long flags;
1353 bool res;
1354
1355 spin_lock_irqsave(&lag_lock, flags);
1356 ldev = mlx5_lag_dev(dev);
1357 res = ldev && __mlx5_lag_is_active(ldev) &&
1358 dev == ldev->pf[MLX5_LAG_P1].dev;
1359 spin_unlock_irqrestore(&lag_lock, flags);
1360
1361 return res;
1362 }
1363 EXPORT_SYMBOL(mlx5_lag_is_master);
1364
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1365 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1366 {
1367 struct mlx5_lag *ldev;
1368 unsigned long flags;
1369 bool res;
1370
1371 spin_lock_irqsave(&lag_lock, flags);
1372 ldev = mlx5_lag_dev(dev);
1373 res = ldev && __mlx5_lag_is_sriov(ldev);
1374 spin_unlock_irqrestore(&lag_lock, flags);
1375
1376 return res;
1377 }
1378 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1379
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1380 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1381 {
1382 struct mlx5_lag *ldev;
1383 unsigned long flags;
1384 bool res;
1385
1386 spin_lock_irqsave(&lag_lock, flags);
1387 ldev = mlx5_lag_dev(dev);
1388 res = ldev && __mlx5_lag_is_sriov(ldev) &&
1389 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1390 spin_unlock_irqrestore(&lag_lock, flags);
1391
1392 return res;
1393 }
1394 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1395
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1396 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1397 {
1398 struct mlx5_lag *ldev;
1399
1400 ldev = mlx5_lag_dev(dev);
1401 if (!ldev)
1402 return;
1403
1404 mlx5_dev_list_lock();
1405 mutex_lock(&ldev->lock);
1406
1407 ldev->mode_changes_in_progress++;
1408 if (__mlx5_lag_is_active(ldev))
1409 mlx5_disable_lag(ldev);
1410
1411 mutex_unlock(&ldev->lock);
1412 mlx5_dev_list_unlock();
1413 }
1414
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1415 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1416 {
1417 struct mlx5_lag *ldev;
1418
1419 ldev = mlx5_lag_dev(dev);
1420 if (!ldev)
1421 return;
1422
1423 mutex_lock(&ldev->lock);
1424 ldev->mode_changes_in_progress--;
1425 mutex_unlock(&ldev->lock);
1426 mlx5_queue_bond_work(ldev, 0);
1427 }
1428
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)1429 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1430 {
1431 struct net_device *ndev = NULL;
1432 struct mlx5_lag *ldev;
1433 unsigned long flags;
1434 int i;
1435
1436 spin_lock_irqsave(&lag_lock, flags);
1437 ldev = mlx5_lag_dev(dev);
1438
1439 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1440 goto unlock;
1441
1442 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1443 for (i = 0; i < ldev->ports; i++)
1444 if (ldev->tracker.netdev_state[i].tx_enabled)
1445 ndev = ldev->pf[i].netdev;
1446 if (!ndev)
1447 ndev = ldev->pf[ldev->ports - 1].netdev;
1448 } else {
1449 ndev = ldev->pf[MLX5_LAG_P1].netdev;
1450 }
1451 if (ndev)
1452 dev_hold(ndev);
1453
1454 unlock:
1455 spin_unlock_irqrestore(&lag_lock, flags);
1456
1457 return ndev;
1458 }
1459 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1460
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1461 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1462 struct net_device *slave)
1463 {
1464 struct mlx5_lag *ldev;
1465 unsigned long flags;
1466 u8 port = 0;
1467 int i;
1468
1469 spin_lock_irqsave(&lag_lock, flags);
1470 ldev = mlx5_lag_dev(dev);
1471 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1472 goto unlock;
1473
1474 for (i = 0; i < ldev->ports; i++) {
1475 if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
1476 port = i;
1477 break;
1478 }
1479 }
1480
1481 port = ldev->v2p_map[port * ldev->buckets];
1482
1483 unlock:
1484 spin_unlock_irqrestore(&lag_lock, flags);
1485 return port;
1486 }
1487 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1488
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1489 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1490 {
1491 struct mlx5_lag *ldev;
1492
1493 ldev = mlx5_lag_dev(dev);
1494 if (!ldev)
1495 return 0;
1496
1497 return ldev->ports;
1498 }
1499 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1500
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1501 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1502 {
1503 struct mlx5_core_dev *peer_dev = NULL;
1504 struct mlx5_lag *ldev;
1505 unsigned long flags;
1506
1507 spin_lock_irqsave(&lag_lock, flags);
1508 ldev = mlx5_lag_dev(dev);
1509 if (!ldev)
1510 goto unlock;
1511
1512 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1513 ldev->pf[MLX5_LAG_P2].dev :
1514 ldev->pf[MLX5_LAG_P1].dev;
1515
1516 unlock:
1517 spin_unlock_irqrestore(&lag_lock, flags);
1518 return peer_dev;
1519 }
1520 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1521
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1522 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1523 u64 *values,
1524 int num_counters,
1525 size_t *offsets)
1526 {
1527 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1528 struct mlx5_core_dev **mdev;
1529 struct mlx5_lag *ldev;
1530 unsigned long flags;
1531 int num_ports;
1532 int ret, i, j;
1533 void *out;
1534
1535 out = kvzalloc(outlen, GFP_KERNEL);
1536 if (!out)
1537 return -ENOMEM;
1538
1539 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1540 if (!mdev) {
1541 ret = -ENOMEM;
1542 goto free_out;
1543 }
1544
1545 memset(values, 0, sizeof(*values) * num_counters);
1546
1547 spin_lock_irqsave(&lag_lock, flags);
1548 ldev = mlx5_lag_dev(dev);
1549 if (ldev && __mlx5_lag_is_active(ldev)) {
1550 num_ports = ldev->ports;
1551 for (i = 0; i < ldev->ports; i++)
1552 mdev[i] = ldev->pf[i].dev;
1553 } else {
1554 num_ports = 1;
1555 mdev[MLX5_LAG_P1] = dev;
1556 }
1557 spin_unlock_irqrestore(&lag_lock, flags);
1558
1559 for (i = 0; i < num_ports; ++i) {
1560 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1561
1562 MLX5_SET(query_cong_statistics_in, in, opcode,
1563 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1564 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1565 out);
1566 if (ret)
1567 goto free_mdev;
1568
1569 for (j = 0; j < num_counters; ++j)
1570 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1571 }
1572
1573 free_mdev:
1574 kvfree(mdev);
1575 free_out:
1576 kvfree(out);
1577 return ret;
1578 }
1579 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1580