1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/vport.h>
36 #include "mlx5_core.h"
37 #include "eswitch.h"
38 #include "lag.h"
39 #include "lag_mp.h"
40 
41 /* General purpose, use for short periods of time.
42  * Beware of lock dependencies (preferably, no locks should be acquired
43  * under it).
44  */
45 static DEFINE_MUTEX(lag_mutex);
46 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)47 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
48 			       u8 remap_port2)
49 {
50 	u32   in[MLX5_ST_SZ_DW(create_lag_in)]   = {0};
51 	u32   out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
52 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
53 
54 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
55 
56 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
57 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
58 
59 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
60 }
61 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)62 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
63 			       u8 remap_port2)
64 {
65 	u32   in[MLX5_ST_SZ_DW(modify_lag_in)]   = {0};
66 	u32   out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
67 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
68 
69 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
70 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
71 
72 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
73 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
74 
75 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
76 }
77 
mlx5_cmd_destroy_lag(struct mlx5_core_dev * dev)78 static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
79 {
80 	u32  in[MLX5_ST_SZ_DW(destroy_lag_in)]  = {0};
81 	u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
82 
83 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
84 
85 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
86 }
87 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)88 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
89 {
90 	u32  in[MLX5_ST_SZ_DW(create_vport_lag_in)]  = {0};
91 	u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
92 
93 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
94 
95 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
96 }
97 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
98 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)99 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
100 {
101 	u32  in[MLX5_ST_SZ_DW(destroy_vport_lag_in)]  = {0};
102 	u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
103 
104 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
105 
106 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
107 }
108 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
109 
mlx5_cmd_query_cong_counter(struct mlx5_core_dev * dev,bool reset,void * out,int out_size)110 static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
111 				       bool reset, void *out, int out_size)
112 {
113 	u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
114 
115 	MLX5_SET(query_cong_statistics_in, in, opcode,
116 		 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
117 	MLX5_SET(query_cong_statistics_in, in, clear, reset);
118 	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
119 }
120 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)121 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
122 				struct net_device *ndev)
123 {
124 	int i;
125 
126 	for (i = 0; i < MLX5_MAX_PORTS; i++)
127 		if (ldev->pf[i].netdev == ndev)
128 			return i;
129 
130 	return -1;
131 }
132 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)133 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
134 {
135 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
136 }
137 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)138 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
139 {
140 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
141 }
142 
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)143 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
144 					   u8 *port1, u8 *port2)
145 {
146 	*port1 = 1;
147 	*port2 = 2;
148 	if (!tracker->netdev_state[0].tx_enabled ||
149 	    !tracker->netdev_state[0].link_up) {
150 		*port1 = 2;
151 		return;
152 	}
153 
154 	if (!tracker->netdev_state[1].tx_enabled ||
155 	    !tracker->netdev_state[1].link_up)
156 		*port2 = 1;
157 }
158 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)159 void mlx5_modify_lag(struct mlx5_lag *ldev,
160 		     struct lag_tracker *tracker)
161 {
162 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
163 	u8 v2p_port1, v2p_port2;
164 	int err;
165 
166 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
167 				       &v2p_port2);
168 
169 	if (v2p_port1 != ldev->v2p_map[0] ||
170 	    v2p_port2 != ldev->v2p_map[1]) {
171 		ldev->v2p_map[0] = v2p_port1;
172 		ldev->v2p_map[1] = v2p_port2;
173 
174 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
175 			       ldev->v2p_map[0], ldev->v2p_map[1]);
176 
177 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
178 		if (err)
179 			mlx5_core_err(dev0,
180 				      "Failed to modify LAG (%d)\n",
181 				      err);
182 	}
183 }
184 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)185 static int mlx5_create_lag(struct mlx5_lag *ldev,
186 			   struct lag_tracker *tracker)
187 {
188 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
189 	int err;
190 
191 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
192 				       &ldev->v2p_map[1]);
193 
194 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
195 		       ldev->v2p_map[0], ldev->v2p_map[1]);
196 
197 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
198 	if (err)
199 		mlx5_core_err(dev0,
200 			      "Failed to create LAG (%d)\n",
201 			      err);
202 	return err;
203 }
204 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags)205 int mlx5_activate_lag(struct mlx5_lag *ldev,
206 		      struct lag_tracker *tracker,
207 		      u8 flags)
208 {
209 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
210 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
211 	int err;
212 
213 	err = mlx5_create_lag(ldev, tracker);
214 	if (err) {
215 		if (roce_lag) {
216 			mlx5_core_err(dev0,
217 				      "Failed to activate RoCE LAG\n");
218 		} else {
219 			mlx5_core_err(dev0,
220 				      "Failed to activate VF LAG\n"
221 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
222 		}
223 		return err;
224 	}
225 
226 	ldev->flags |= flags;
227 	return 0;
228 }
229 
mlx5_deactivate_lag(struct mlx5_lag * ldev)230 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
231 {
232 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
233 	bool roce_lag = __mlx5_lag_is_roce(ldev);
234 	int err;
235 
236 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
237 
238 	err = mlx5_cmd_destroy_lag(dev0);
239 	if (err) {
240 		if (roce_lag) {
241 			mlx5_core_err(dev0,
242 				      "Failed to deactivate RoCE LAG; driver restart required\n");
243 		} else {
244 			mlx5_core_err(dev0,
245 				      "Failed to deactivate VF LAG; driver restart required\n"
246 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
247 		}
248 	}
249 
250 	return err;
251 }
252 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)253 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
254 {
255 	if (!ldev->pf[0].dev || !ldev->pf[1].dev)
256 		return false;
257 
258 #ifdef CONFIG_MLX5_ESWITCH
259 	return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
260 #else
261 	return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
262 		!mlx5_sriov_is_enabled(ldev->pf[1].dev));
263 #endif
264 }
265 
mlx5_lag_add_ib_devices(struct mlx5_lag * ldev)266 static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
267 {
268 	int i;
269 
270 	for (i = 0; i < MLX5_MAX_PORTS; i++)
271 		if (ldev->pf[i].dev)
272 			mlx5_add_dev_by_protocol(ldev->pf[i].dev,
273 						 MLX5_INTERFACE_PROTOCOL_IB);
274 }
275 
mlx5_lag_remove_ib_devices(struct mlx5_lag * ldev)276 static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
277 {
278 	int i;
279 
280 	for (i = 0; i < MLX5_MAX_PORTS; i++)
281 		if (ldev->pf[i].dev)
282 			mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
283 						    MLX5_INTERFACE_PROTOCOL_IB);
284 }
285 
mlx5_do_bond(struct mlx5_lag * ldev)286 static void mlx5_do_bond(struct mlx5_lag *ldev)
287 {
288 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
289 	struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
290 	struct lag_tracker tracker;
291 	bool do_bond, roce_lag;
292 	int err;
293 
294 	if (!dev0 || !dev1)
295 		return;
296 
297 	mutex_lock(&lag_mutex);
298 	tracker = ldev->tracker;
299 	mutex_unlock(&lag_mutex);
300 
301 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
302 
303 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
304 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
305 			   !mlx5_sriov_is_enabled(dev1);
306 
307 #ifdef CONFIG_MLX5_ESWITCH
308 		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
309 			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
310 #endif
311 
312 		if (roce_lag)
313 			mlx5_lag_remove_ib_devices(ldev);
314 
315 		err = mlx5_activate_lag(ldev, &tracker,
316 					roce_lag ? MLX5_LAG_FLAG_ROCE :
317 					MLX5_LAG_FLAG_SRIOV);
318 		if (err) {
319 			if (roce_lag)
320 				mlx5_lag_add_ib_devices(ldev);
321 
322 			return;
323 		}
324 
325 		if (roce_lag) {
326 			mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
327 			mlx5_nic_vport_enable_roce(dev1);
328 		}
329 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
330 		mlx5_modify_lag(ldev, &tracker);
331 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
332 		roce_lag = __mlx5_lag_is_roce(ldev);
333 
334 		if (roce_lag) {
335 			mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
336 			mlx5_nic_vport_disable_roce(dev1);
337 		}
338 
339 		err = mlx5_deactivate_lag(ldev);
340 		if (err)
341 			return;
342 
343 		if (roce_lag)
344 			mlx5_lag_add_ib_devices(ldev);
345 	}
346 }
347 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)348 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
349 {
350 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
351 }
352 
mlx5_do_bond_work(struct work_struct * work)353 static void mlx5_do_bond_work(struct work_struct *work)
354 {
355 	struct delayed_work *delayed_work = to_delayed_work(work);
356 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
357 					     bond_work);
358 	int status;
359 
360 	status = mlx5_dev_list_trylock();
361 	if (!status) {
362 		/* 1 sec delay. */
363 		mlx5_queue_bond_work(ldev, HZ);
364 		return;
365 	}
366 
367 	mlx5_do_bond(ldev);
368 	mlx5_dev_list_unlock();
369 }
370 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)371 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
372 					 struct lag_tracker *tracker,
373 					 struct net_device *ndev,
374 					 struct netdev_notifier_changeupper_info *info)
375 {
376 	struct net_device *upper = info->upper_dev, *ndev_tmp;
377 	struct netdev_lag_upper_info *lag_upper_info = NULL;
378 	bool is_bonded;
379 	int bond_status = 0;
380 	int num_slaves = 0;
381 	int idx;
382 
383 	if (!netif_is_lag_master(upper))
384 		return 0;
385 
386 	if (info->linking)
387 		lag_upper_info = info->upper_info;
388 
389 	/* The event may still be of interest if the slave does not belong to
390 	 * us, but is enslaved to a master which has one or more of our netdevs
391 	 * as slaves (e.g., if a new slave is added to a master that bonds two
392 	 * of our netdevs, we should unbond).
393 	 */
394 	rcu_read_lock();
395 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
396 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
397 		if (idx > -1)
398 			bond_status |= (1 << idx);
399 
400 		num_slaves++;
401 	}
402 	rcu_read_unlock();
403 
404 	/* None of this lagdev's netdevs are slaves of this master. */
405 	if (!(bond_status & 0x3))
406 		return 0;
407 
408 	if (lag_upper_info)
409 		tracker->tx_type = lag_upper_info->tx_type;
410 
411 	/* Determine bonding status:
412 	 * A device is considered bonded if both its physical ports are slaves
413 	 * of the same lag master, and only them.
414 	 * Lag mode must be activebackup or hash.
415 	 */
416 	is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
417 		    (bond_status == 0x3) &&
418 		    ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
419 		     (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
420 
421 	if (tracker->is_bonded != is_bonded) {
422 		tracker->is_bonded = is_bonded;
423 		return 1;
424 	}
425 
426 	return 0;
427 }
428 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)429 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
430 					      struct lag_tracker *tracker,
431 					      struct net_device *ndev,
432 					      struct netdev_notifier_changelowerstate_info *info)
433 {
434 	struct netdev_lag_lower_state_info *lag_lower_info;
435 	int idx;
436 
437 	if (!netif_is_lag_port(ndev))
438 		return 0;
439 
440 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
441 	if (idx == -1)
442 		return 0;
443 
444 	/* This information is used to determine virtual to physical
445 	 * port mapping.
446 	 */
447 	lag_lower_info = info->lower_state_info;
448 	if (!lag_lower_info)
449 		return 0;
450 
451 	tracker->netdev_state[idx] = *lag_lower_info;
452 
453 	return 1;
454 }
455 
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)456 static int mlx5_lag_netdev_event(struct notifier_block *this,
457 				 unsigned long event, void *ptr)
458 {
459 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
460 	struct lag_tracker tracker;
461 	struct mlx5_lag *ldev;
462 	int changed = 0;
463 
464 	if (!net_eq(dev_net(ndev), &init_net))
465 		return NOTIFY_DONE;
466 
467 	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
468 		return NOTIFY_DONE;
469 
470 	ldev    = container_of(this, struct mlx5_lag, nb);
471 	tracker = ldev->tracker;
472 
473 	switch (event) {
474 	case NETDEV_CHANGEUPPER:
475 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
476 							ptr);
477 		break;
478 	case NETDEV_CHANGELOWERSTATE:
479 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
480 							     ndev, ptr);
481 		break;
482 	}
483 
484 	mutex_lock(&lag_mutex);
485 	ldev->tracker = tracker;
486 	mutex_unlock(&lag_mutex);
487 
488 	if (changed)
489 		mlx5_queue_bond_work(ldev, 0);
490 
491 	return NOTIFY_DONE;
492 }
493 
mlx5_lag_dev_alloc(void)494 static struct mlx5_lag *mlx5_lag_dev_alloc(void)
495 {
496 	struct mlx5_lag *ldev;
497 
498 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
499 	if (!ldev)
500 		return NULL;
501 
502 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
503 	if (!ldev->wq) {
504 		kfree(ldev);
505 		return NULL;
506 	}
507 
508 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
509 
510 	return ldev;
511 }
512 
mlx5_lag_dev_free(struct mlx5_lag * ldev)513 static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
514 {
515 	destroy_workqueue(ldev->wq);
516 	kfree(ldev);
517 }
518 
mlx5_lag_dev_add_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)519 static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
520 				struct mlx5_core_dev *dev,
521 				struct net_device *netdev)
522 {
523 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
524 
525 	if (fn >= MLX5_MAX_PORTS)
526 		return;
527 
528 	mutex_lock(&lag_mutex);
529 	ldev->pf[fn].dev    = dev;
530 	ldev->pf[fn].netdev = netdev;
531 	ldev->tracker.netdev_state[fn].link_up = 0;
532 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
533 
534 	dev->priv.lag = ldev;
535 
536 	mutex_unlock(&lag_mutex);
537 }
538 
mlx5_lag_dev_remove_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)539 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
540 				   struct mlx5_core_dev *dev)
541 {
542 	int i;
543 
544 	for (i = 0; i < MLX5_MAX_PORTS; i++)
545 		if (ldev->pf[i].dev == dev)
546 			break;
547 
548 	if (i == MLX5_MAX_PORTS)
549 		return;
550 
551 	mutex_lock(&lag_mutex);
552 	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
553 
554 	dev->priv.lag = NULL;
555 	mutex_unlock(&lag_mutex);
556 }
557 
558 /* Must be called with intf_mutex held */
mlx5_lag_add(struct mlx5_core_dev * dev,struct net_device * netdev)559 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
560 {
561 	struct mlx5_lag *ldev = NULL;
562 	struct mlx5_core_dev *tmp_dev;
563 	int err;
564 
565 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
566 	    !MLX5_CAP_GEN(dev, lag_master) ||
567 	    (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
568 		return;
569 
570 	tmp_dev = mlx5_get_next_phys_dev(dev);
571 	if (tmp_dev)
572 		ldev = tmp_dev->priv.lag;
573 
574 	if (!ldev) {
575 		ldev = mlx5_lag_dev_alloc();
576 		if (!ldev) {
577 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
578 			return;
579 		}
580 	}
581 
582 	mlx5_lag_dev_add_pf(ldev, dev, netdev);
583 
584 	if (!ldev->nb.notifier_call) {
585 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
586 		if (register_netdevice_notifier(&ldev->nb)) {
587 			ldev->nb.notifier_call = NULL;
588 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
589 		}
590 	}
591 
592 	err = mlx5_lag_mp_init(ldev);
593 	if (err)
594 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
595 			      err);
596 }
597 
598 /* Must be called with intf_mutex held */
mlx5_lag_remove(struct mlx5_core_dev * dev)599 void mlx5_lag_remove(struct mlx5_core_dev *dev)
600 {
601 	struct mlx5_lag *ldev;
602 	int i;
603 
604 	ldev = mlx5_lag_dev_get(dev);
605 	if (!ldev)
606 		return;
607 
608 	if (__mlx5_lag_is_active(ldev))
609 		mlx5_deactivate_lag(ldev);
610 
611 	mlx5_lag_dev_remove_pf(ldev, dev);
612 
613 	for (i = 0; i < MLX5_MAX_PORTS; i++)
614 		if (ldev->pf[i].dev)
615 			break;
616 
617 	if (i == MLX5_MAX_PORTS) {
618 		if (ldev->nb.notifier_call)
619 			unregister_netdevice_notifier(&ldev->nb);
620 		mlx5_lag_mp_cleanup(ldev);
621 		cancel_delayed_work_sync(&ldev->bond_work);
622 		mlx5_lag_dev_free(ldev);
623 	}
624 }
625 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)626 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
627 {
628 	struct mlx5_lag *ldev;
629 	bool res;
630 
631 	mutex_lock(&lag_mutex);
632 	ldev = mlx5_lag_dev_get(dev);
633 	res  = ldev && __mlx5_lag_is_roce(ldev);
634 	mutex_unlock(&lag_mutex);
635 
636 	return res;
637 }
638 EXPORT_SYMBOL(mlx5_lag_is_roce);
639 
mlx5_lag_is_active(struct mlx5_core_dev * dev)640 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
641 {
642 	struct mlx5_lag *ldev;
643 	bool res;
644 
645 	mutex_lock(&lag_mutex);
646 	ldev = mlx5_lag_dev_get(dev);
647 	res  = ldev && __mlx5_lag_is_active(ldev);
648 	mutex_unlock(&lag_mutex);
649 
650 	return res;
651 }
652 EXPORT_SYMBOL(mlx5_lag_is_active);
653 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)654 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
655 {
656 	struct mlx5_lag *ldev;
657 	bool res;
658 
659 	mutex_lock(&lag_mutex);
660 	ldev = mlx5_lag_dev_get(dev);
661 	res  = ldev && __mlx5_lag_is_sriov(ldev);
662 	mutex_unlock(&lag_mutex);
663 
664 	return res;
665 }
666 EXPORT_SYMBOL(mlx5_lag_is_sriov);
667 
mlx5_lag_update(struct mlx5_core_dev * dev)668 void mlx5_lag_update(struct mlx5_core_dev *dev)
669 {
670 	struct mlx5_lag *ldev;
671 
672 	mlx5_dev_list_lock();
673 	ldev = mlx5_lag_dev_get(dev);
674 	if (!ldev)
675 		goto unlock;
676 
677 	mlx5_do_bond(ldev);
678 
679 unlock:
680 	mlx5_dev_list_unlock();
681 }
682 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)683 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
684 {
685 	struct net_device *ndev = NULL;
686 	struct mlx5_lag *ldev;
687 
688 	mutex_lock(&lag_mutex);
689 	ldev = mlx5_lag_dev_get(dev);
690 
691 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
692 		goto unlock;
693 
694 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
695 		ndev = ldev->tracker.netdev_state[0].tx_enabled ?
696 		       ldev->pf[0].netdev : ldev->pf[1].netdev;
697 	} else {
698 		ndev = ldev->pf[0].netdev;
699 	}
700 	if (ndev)
701 		dev_hold(ndev);
702 
703 unlock:
704 	mutex_unlock(&lag_mutex);
705 
706 	return ndev;
707 }
708 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
709 
mlx5_lag_intf_add(struct mlx5_interface * intf,struct mlx5_priv * priv)710 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
711 {
712 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
713 						 priv);
714 	struct mlx5_lag *ldev;
715 
716 	if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
717 		return true;
718 
719 	ldev = mlx5_lag_dev_get(dev);
720 	if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
721 		return true;
722 
723 	/* If bonded, we do not add an IB device for PF1. */
724 	return false;
725 }
726 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)727 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
728 				 u64 *values,
729 				 int num_counters,
730 				 size_t *offsets)
731 {
732 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
733 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
734 	struct mlx5_lag *ldev;
735 	int num_ports;
736 	int ret, i, j;
737 	void *out;
738 
739 	out = kvzalloc(outlen, GFP_KERNEL);
740 	if (!out)
741 		return -ENOMEM;
742 
743 	memset(values, 0, sizeof(*values) * num_counters);
744 
745 	mutex_lock(&lag_mutex);
746 	ldev = mlx5_lag_dev_get(dev);
747 	if (ldev && __mlx5_lag_is_roce(ldev)) {
748 		num_ports = MLX5_MAX_PORTS;
749 		mdev[0] = ldev->pf[0].dev;
750 		mdev[1] = ldev->pf[1].dev;
751 	} else {
752 		num_ports = 1;
753 		mdev[0] = dev;
754 	}
755 
756 	for (i = 0; i < num_ports; ++i) {
757 		ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
758 		if (ret)
759 			goto unlock;
760 
761 		for (j = 0; j < num_counters; ++j)
762 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
763 	}
764 
765 unlock:
766 	mutex_unlock(&lag_mutex);
767 	kvfree(out);
768 	return ret;
769 }
770 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
771