1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
39 #include "eswitch.h"
40 #include "lag.h"
41 #include "lag_mp.h"
42 
43 /* General purpose, use for short periods of time.
44  * Beware of lock dependencies (preferably, no locks should be acquired
45  * under it).
46  */
47 static DEFINE_SPINLOCK(lag_lock);
48 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2,bool shared_fdb)49 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 			       u8 remap_port2, bool shared_fdb)
51 {
52 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
54 
55 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
56 
57 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
60 
61 	return mlx5_cmd_exec_in(dev, create_lag, in);
62 }
63 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
65 			       u8 remap_port2)
66 {
67 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
69 
70 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
72 
73 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
75 
76 	return mlx5_cmd_exec_in(dev, modify_lag, in);
77 }
78 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
80 {
81 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
82 
83 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
84 
85 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
86 }
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
88 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
90 {
91 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
92 
93 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
94 
95 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
96 }
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
98 
99 static int mlx5_lag_netdev_event(struct notifier_block *this,
100 				 unsigned long event, void *ptr);
101 static void mlx5_do_bond_work(struct work_struct *work);
102 
mlx5_ldev_free(struct kref * ref)103 static void mlx5_ldev_free(struct kref *ref)
104 {
105 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
106 
107 	if (ldev->nb.notifier_call)
108 		unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 	mlx5_lag_mp_cleanup(ldev);
110 	cancel_delayed_work_sync(&ldev->bond_work);
111 	destroy_workqueue(ldev->wq);
112 	kfree(ldev);
113 }
114 
mlx5_ldev_put(struct mlx5_lag * ldev)115 static void mlx5_ldev_put(struct mlx5_lag *ldev)
116 {
117 	kref_put(&ldev->ref, mlx5_ldev_free);
118 }
119 
mlx5_ldev_get(struct mlx5_lag * ldev)120 static void mlx5_ldev_get(struct mlx5_lag *ldev)
121 {
122 	kref_get(&ldev->ref);
123 }
124 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)125 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
126 {
127 	struct mlx5_lag *ldev;
128 	int err;
129 
130 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
131 	if (!ldev)
132 		return NULL;
133 
134 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
135 	if (!ldev->wq) {
136 		kfree(ldev);
137 		return NULL;
138 	}
139 
140 	kref_init(&ldev->ref);
141 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
142 
143 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 	if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 		ldev->nb.notifier_call = NULL;
146 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
147 	}
148 
149 	err = mlx5_lag_mp_init(ldev);
150 	if (err)
151 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
152 			      err);
153 
154 	return ldev;
155 }
156 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 				struct net_device *ndev)
159 {
160 	int i;
161 
162 	for (i = 0; i < MLX5_MAX_PORTS; i++)
163 		if (ldev->pf[i].netdev == ndev)
164 			return i;
165 
166 	return -ENOENT;
167 }
168 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)169 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
170 {
171 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
172 }
173 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)174 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
175 {
176 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
177 }
178 
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 					   u8 *port1, u8 *port2)
181 {
182 	bool p1en;
183 	bool p2en;
184 
185 	p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 	       tracker->netdev_state[MLX5_LAG_P1].link_up;
187 
188 	p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 	       tracker->netdev_state[MLX5_LAG_P2].link_up;
190 
191 	*port1 = 1;
192 	*port2 = 2;
193 	if ((!p1en && !p2en) || (p1en && p2en))
194 		return;
195 
196 	if (p1en)
197 		*port2 = 1;
198 	else
199 		*port1 = 2;
200 }
201 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)202 void mlx5_modify_lag(struct mlx5_lag *ldev,
203 		     struct lag_tracker *tracker)
204 {
205 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 	u8 v2p_port1, v2p_port2;
207 	int err;
208 
209 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
210 				       &v2p_port2);
211 
212 	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
216 
217 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 			       ldev->v2p_map[MLX5_LAG_P1],
219 			       ldev->v2p_map[MLX5_LAG_P2]);
220 
221 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
222 		if (err)
223 			mlx5_core_err(dev0,
224 				      "Failed to modify LAG (%d)\n",
225 				      err);
226 	}
227 }
228 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,bool shared_fdb)229 static int mlx5_create_lag(struct mlx5_lag *ldev,
230 			   struct lag_tracker *tracker,
231 			   bool shared_fdb)
232 {
233 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
236 	int err;
237 
238 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 				       &ldev->v2p_map[MLX5_LAG_P2]);
240 
241 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
243 		       shared_fdb);
244 
245 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 				  ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
247 	if (err) {
248 		mlx5_core_err(dev0,
249 			      "Failed to create LAG (%d)\n",
250 			      err);
251 		return err;
252 	}
253 
254 	if (shared_fdb) {
255 		err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
256 							      dev1->priv.eswitch);
257 		if (err)
258 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
259 		else
260 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
261 	}
262 
263 	if (err) {
264 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
266 			mlx5_core_err(dev0,
267 				      "Failed to deactivate RoCE LAG; driver restart required\n");
268 	}
269 
270 	return err;
271 }
272 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags,bool shared_fdb)273 int mlx5_activate_lag(struct mlx5_lag *ldev,
274 		      struct lag_tracker *tracker,
275 		      u8 flags,
276 		      bool shared_fdb)
277 {
278 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
280 	int err;
281 
282 	err = mlx5_create_lag(ldev, tracker, shared_fdb);
283 	if (err) {
284 		if (roce_lag) {
285 			mlx5_core_err(dev0,
286 				      "Failed to activate RoCE LAG\n");
287 		} else {
288 			mlx5_core_err(dev0,
289 				      "Failed to activate VF LAG\n"
290 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
291 		}
292 		return err;
293 	}
294 
295 	ldev->flags |= flags;
296 	ldev->shared_fdb = shared_fdb;
297 	return 0;
298 }
299 
mlx5_deactivate_lag(struct mlx5_lag * ldev)300 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
301 {
302 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 	bool roce_lag = __mlx5_lag_is_roce(ldev);
305 	int err;
306 
307 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
308 	mlx5_lag_mp_reset(ldev);
309 
310 	if (ldev->shared_fdb) {
311 		mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
312 							 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
313 		ldev->shared_fdb = false;
314 	}
315 
316 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
317 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
318 	if (err) {
319 		if (roce_lag) {
320 			mlx5_core_err(dev0,
321 				      "Failed to deactivate RoCE LAG; driver restart required\n");
322 		} else {
323 			mlx5_core_err(dev0,
324 				      "Failed to deactivate VF LAG; driver restart required\n"
325 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
326 		}
327 	}
328 
329 	return err;
330 }
331 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)332 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
333 {
334 	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
335 		return false;
336 
337 #ifdef CONFIG_MLX5_ESWITCH
338 	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
339 				   ldev->pf[MLX5_LAG_P2].dev);
340 #else
341 	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
342 		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
343 #endif
344 }
345 
mlx5_lag_add_devices(struct mlx5_lag * ldev)346 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
347 {
348 	int i;
349 
350 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
351 		if (!ldev->pf[i].dev)
352 			continue;
353 
354 		if (ldev->pf[i].dev->priv.flags &
355 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
356 			continue;
357 
358 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
359 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
360 	}
361 }
362 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)363 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
364 {
365 	int i;
366 
367 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
368 		if (!ldev->pf[i].dev)
369 			continue;
370 
371 		if (ldev->pf[i].dev->priv.flags &
372 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
373 			continue;
374 
375 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
376 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
377 	}
378 }
379 
mlx5_disable_lag(struct mlx5_lag * ldev)380 static void mlx5_disable_lag(struct mlx5_lag *ldev)
381 {
382 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
383 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
384 	bool shared_fdb = ldev->shared_fdb;
385 	bool roce_lag;
386 	int err;
387 
388 	roce_lag = __mlx5_lag_is_roce(ldev);
389 
390 	if (shared_fdb) {
391 		mlx5_lag_remove_devices(ldev);
392 	} else if (roce_lag) {
393 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
394 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
395 			mlx5_rescan_drivers_locked(dev0);
396 		}
397 		mlx5_nic_vport_disable_roce(dev1);
398 	}
399 
400 	err = mlx5_deactivate_lag(ldev);
401 	if (err)
402 		return;
403 
404 	if (shared_fdb || roce_lag)
405 		mlx5_lag_add_devices(ldev);
406 
407 	if (shared_fdb) {
408 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
409 			mlx5_eswitch_reload_reps(dev0->priv.eswitch);
410 		if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
411 			mlx5_eswitch_reload_reps(dev1->priv.eswitch);
412 	}
413 }
414 
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)415 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
416 {
417 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
418 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
419 
420 	if (is_mdev_switchdev_mode(dev0) &&
421 	    is_mdev_switchdev_mode(dev1) &&
422 	    mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
423 	    mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
424 	    mlx5_devcom_is_paired(dev0->priv.devcom,
425 				  MLX5_DEVCOM_ESW_OFFLOADS) &&
426 	    MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
427 	    MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
428 	    MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
429 		return true;
430 
431 	return false;
432 }
433 
mlx5_do_bond(struct mlx5_lag * ldev)434 static void mlx5_do_bond(struct mlx5_lag *ldev)
435 {
436 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
437 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
438 	struct lag_tracker tracker;
439 	bool do_bond, roce_lag;
440 	int err;
441 
442 	if (!mlx5_lag_is_ready(ldev)) {
443 		do_bond = false;
444 	} else {
445 		/* VF LAG is in multipath mode, ignore bond change requests */
446 		if (mlx5_lag_is_multipath(dev0))
447 			return;
448 
449 		tracker = ldev->tracker;
450 
451 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
452 	}
453 
454 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
455 		bool shared_fdb = mlx5_shared_fdb_supported(ldev);
456 
457 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
458 			   !mlx5_sriov_is_enabled(dev1);
459 
460 #ifdef CONFIG_MLX5_ESWITCH
461 		roce_lag = roce_lag &&
462 			   dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
463 			   dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
464 #endif
465 
466 		if (shared_fdb || roce_lag)
467 			mlx5_lag_remove_devices(ldev);
468 
469 		err = mlx5_activate_lag(ldev, &tracker,
470 					roce_lag ? MLX5_LAG_FLAG_ROCE :
471 						   MLX5_LAG_FLAG_SRIOV,
472 					shared_fdb);
473 		if (err) {
474 			if (shared_fdb || roce_lag)
475 				mlx5_lag_add_devices(ldev);
476 
477 			return;
478 		} else if (roce_lag) {
479 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
480 			mlx5_rescan_drivers_locked(dev0);
481 			mlx5_nic_vport_enable_roce(dev1);
482 		} else if (shared_fdb) {
483 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
484 			mlx5_rescan_drivers_locked(dev0);
485 
486 			err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
487 			if (!err)
488 				err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
489 
490 			if (err) {
491 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
492 				mlx5_rescan_drivers_locked(dev0);
493 				mlx5_deactivate_lag(ldev);
494 				mlx5_lag_add_devices(ldev);
495 				mlx5_eswitch_reload_reps(dev0->priv.eswitch);
496 				mlx5_eswitch_reload_reps(dev1->priv.eswitch);
497 				mlx5_core_err(dev0, "Failed to enable lag\n");
498 				return;
499 			}
500 		}
501 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
502 		mlx5_modify_lag(ldev, &tracker);
503 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
504 		mlx5_disable_lag(ldev);
505 	}
506 }
507 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)508 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
509 {
510 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
511 }
512 
mlx5_lag_lock_eswitches(struct mlx5_core_dev * dev0,struct mlx5_core_dev * dev1)513 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
514 				    struct mlx5_core_dev *dev1)
515 {
516 	if (dev0)
517 		mlx5_esw_lock(dev0->priv.eswitch);
518 	if (dev1)
519 		mlx5_esw_lock(dev1->priv.eswitch);
520 }
521 
mlx5_lag_unlock_eswitches(struct mlx5_core_dev * dev0,struct mlx5_core_dev * dev1)522 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
523 				      struct mlx5_core_dev *dev1)
524 {
525 	if (dev1)
526 		mlx5_esw_unlock(dev1->priv.eswitch);
527 	if (dev0)
528 		mlx5_esw_unlock(dev0->priv.eswitch);
529 }
530 
mlx5_do_bond_work(struct work_struct * work)531 static void mlx5_do_bond_work(struct work_struct *work)
532 {
533 	struct delayed_work *delayed_work = to_delayed_work(work);
534 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
535 					     bond_work);
536 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
537 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
538 	int status;
539 
540 	status = mlx5_dev_list_trylock();
541 	if (!status) {
542 		mlx5_queue_bond_work(ldev, HZ);
543 		return;
544 	}
545 
546 	if (ldev->mode_changes_in_progress) {
547 		mlx5_dev_list_unlock();
548 		mlx5_queue_bond_work(ldev, HZ);
549 		return;
550 	}
551 
552 	mlx5_lag_lock_eswitches(dev0, dev1);
553 	mlx5_do_bond(ldev);
554 	mlx5_lag_unlock_eswitches(dev0, dev1);
555 	mlx5_dev_list_unlock();
556 }
557 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)558 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
559 					 struct lag_tracker *tracker,
560 					 struct net_device *ndev,
561 					 struct netdev_notifier_changeupper_info *info)
562 {
563 	struct net_device *upper = info->upper_dev, *ndev_tmp;
564 	struct netdev_lag_upper_info *lag_upper_info = NULL;
565 	bool is_bonded, is_in_lag, mode_supported;
566 	int bond_status = 0;
567 	int num_slaves = 0;
568 	int idx;
569 
570 	if (!netif_is_lag_master(upper))
571 		return 0;
572 
573 	if (info->linking)
574 		lag_upper_info = info->upper_info;
575 
576 	/* The event may still be of interest if the slave does not belong to
577 	 * us, but is enslaved to a master which has one or more of our netdevs
578 	 * as slaves (e.g., if a new slave is added to a master that bonds two
579 	 * of our netdevs, we should unbond).
580 	 */
581 	rcu_read_lock();
582 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
583 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
584 		if (idx >= 0)
585 			bond_status |= (1 << idx);
586 
587 		num_slaves++;
588 	}
589 	rcu_read_unlock();
590 
591 	/* None of this lagdev's netdevs are slaves of this master. */
592 	if (!(bond_status & 0x3))
593 		return 0;
594 
595 	if (lag_upper_info)
596 		tracker->tx_type = lag_upper_info->tx_type;
597 
598 	/* Determine bonding status:
599 	 * A device is considered bonded if both its physical ports are slaves
600 	 * of the same lag master, and only them.
601 	 */
602 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
603 
604 	if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
605 		NL_SET_ERR_MSG_MOD(info->info.extack,
606 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
607 		return 0;
608 	}
609 
610 	/* Lag mode must be activebackup or hash. */
611 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
612 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
613 
614 	if (is_in_lag && !mode_supported)
615 		NL_SET_ERR_MSG_MOD(info->info.extack,
616 				   "Can't activate LAG offload, TX type isn't supported");
617 
618 	is_bonded = is_in_lag && mode_supported;
619 	if (tracker->is_bonded != is_bonded) {
620 		tracker->is_bonded = is_bonded;
621 		return 1;
622 	}
623 
624 	return 0;
625 }
626 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)627 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
628 					      struct lag_tracker *tracker,
629 					      struct net_device *ndev,
630 					      struct netdev_notifier_changelowerstate_info *info)
631 {
632 	struct netdev_lag_lower_state_info *lag_lower_info;
633 	int idx;
634 
635 	if (!netif_is_lag_port(ndev))
636 		return 0;
637 
638 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
639 	if (idx < 0)
640 		return 0;
641 
642 	/* This information is used to determine virtual to physical
643 	 * port mapping.
644 	 */
645 	lag_lower_info = info->lower_state_info;
646 	if (!lag_lower_info)
647 		return 0;
648 
649 	tracker->netdev_state[idx] = *lag_lower_info;
650 
651 	return 1;
652 }
653 
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)654 static int mlx5_lag_netdev_event(struct notifier_block *this,
655 				 unsigned long event, void *ptr)
656 {
657 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
658 	struct lag_tracker tracker;
659 	struct mlx5_lag *ldev;
660 	int changed = 0;
661 
662 	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
663 		return NOTIFY_DONE;
664 
665 	ldev    = container_of(this, struct mlx5_lag, nb);
666 
667 	if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
668 		return NOTIFY_DONE;
669 
670 	tracker = ldev->tracker;
671 
672 	switch (event) {
673 	case NETDEV_CHANGEUPPER:
674 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
675 							ptr);
676 		break;
677 	case NETDEV_CHANGELOWERSTATE:
678 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
679 							     ndev, ptr);
680 		break;
681 	}
682 
683 	ldev->tracker = tracker;
684 
685 	if (changed)
686 		mlx5_queue_bond_work(ldev, 0);
687 
688 	return NOTIFY_DONE;
689 }
690 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)691 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
692 				 struct mlx5_core_dev *dev,
693 				 struct net_device *netdev)
694 {
695 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
696 
697 	if (fn >= MLX5_MAX_PORTS)
698 		return;
699 
700 	spin_lock(&lag_lock);
701 	ldev->pf[fn].netdev = netdev;
702 	ldev->tracker.netdev_state[fn].link_up = 0;
703 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
704 	spin_unlock(&lag_lock);
705 }
706 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)707 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
708 				    struct net_device *netdev)
709 {
710 	int i;
711 
712 	spin_lock(&lag_lock);
713 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
714 		if (ldev->pf[i].netdev == netdev) {
715 			ldev->pf[i].netdev = NULL;
716 			break;
717 		}
718 	}
719 	spin_unlock(&lag_lock);
720 }
721 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)722 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
723 			       struct mlx5_core_dev *dev)
724 {
725 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
726 
727 	if (fn >= MLX5_MAX_PORTS)
728 		return;
729 
730 	ldev->pf[fn].dev = dev;
731 	dev->priv.lag = ldev;
732 }
733 
734 /* Must be called with intf_mutex held */
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)735 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
736 				  struct mlx5_core_dev *dev)
737 {
738 	int i;
739 
740 	for (i = 0; i < MLX5_MAX_PORTS; i++)
741 		if (ldev->pf[i].dev == dev)
742 			break;
743 
744 	if (i == MLX5_MAX_PORTS)
745 		return;
746 
747 	ldev->pf[i].dev = NULL;
748 	dev->priv.lag = NULL;
749 }
750 
751 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)752 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
753 {
754 	struct mlx5_lag *ldev = NULL;
755 	struct mlx5_core_dev *tmp_dev;
756 
757 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
758 	    !MLX5_CAP_GEN(dev, lag_master) ||
759 	    MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
760 		return 0;
761 
762 	tmp_dev = mlx5_get_next_phys_dev(dev);
763 	if (tmp_dev)
764 		ldev = tmp_dev->priv.lag;
765 
766 	if (!ldev) {
767 		ldev = mlx5_lag_dev_alloc(dev);
768 		if (!ldev) {
769 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
770 			return 0;
771 		}
772 	} else {
773 		if (ldev->mode_changes_in_progress)
774 			return -EAGAIN;
775 		mlx5_ldev_get(ldev);
776 	}
777 
778 	mlx5_ldev_add_mdev(ldev, dev);
779 
780 	return 0;
781 }
782 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)783 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
784 {
785 	struct mlx5_lag *ldev;
786 
787 	ldev = mlx5_lag_dev(dev);
788 	if (!ldev)
789 		return;
790 
791 recheck:
792 	mlx5_dev_list_lock();
793 	if (ldev->mode_changes_in_progress) {
794 		mlx5_dev_list_unlock();
795 		msleep(100);
796 		goto recheck;
797 	}
798 	mlx5_ldev_remove_mdev(ldev, dev);
799 	mlx5_dev_list_unlock();
800 	mlx5_ldev_put(ldev);
801 }
802 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)803 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
804 {
805 	int err;
806 
807 recheck:
808 	mlx5_dev_list_lock();
809 	err = __mlx5_lag_dev_add_mdev(dev);
810 	if (err) {
811 		mlx5_dev_list_unlock();
812 		msleep(100);
813 		goto recheck;
814 	}
815 	mlx5_dev_list_unlock();
816 }
817 
818 /* Must be called with intf_mutex held */
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)819 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
820 			    struct net_device *netdev)
821 {
822 	struct mlx5_lag *ldev;
823 
824 	ldev = mlx5_lag_dev(dev);
825 	if (!ldev)
826 		return;
827 
828 	mlx5_ldev_remove_netdev(ldev, netdev);
829 	ldev->flags &= ~MLX5_LAG_FLAG_READY;
830 
831 	if (__mlx5_lag_is_active(ldev))
832 		mlx5_queue_bond_work(ldev, 0);
833 }
834 
835 /* Must be called with intf_mutex held */
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)836 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
837 			 struct net_device *netdev)
838 {
839 	struct mlx5_lag *ldev;
840 	int i;
841 
842 	ldev = mlx5_lag_dev(dev);
843 	if (!ldev)
844 		return;
845 
846 	mlx5_ldev_add_netdev(ldev, dev, netdev);
847 
848 	for (i = 0; i < MLX5_MAX_PORTS; i++)
849 		if (!ldev->pf[i].dev)
850 			break;
851 
852 	if (i >= MLX5_MAX_PORTS)
853 		ldev->flags |= MLX5_LAG_FLAG_READY;
854 	mlx5_queue_bond_work(ldev, 0);
855 }
856 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)857 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
858 {
859 	struct mlx5_lag *ldev;
860 	bool res;
861 
862 	spin_lock(&lag_lock);
863 	ldev = mlx5_lag_dev(dev);
864 	res  = ldev && __mlx5_lag_is_roce(ldev);
865 	spin_unlock(&lag_lock);
866 
867 	return res;
868 }
869 EXPORT_SYMBOL(mlx5_lag_is_roce);
870 
mlx5_lag_is_active(struct mlx5_core_dev * dev)871 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
872 {
873 	struct mlx5_lag *ldev;
874 	bool res;
875 
876 	spin_lock(&lag_lock);
877 	ldev = mlx5_lag_dev(dev);
878 	res  = ldev && __mlx5_lag_is_active(ldev);
879 	spin_unlock(&lag_lock);
880 
881 	return res;
882 }
883 EXPORT_SYMBOL(mlx5_lag_is_active);
884 
mlx5_lag_is_master(struct mlx5_core_dev * dev)885 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
886 {
887 	struct mlx5_lag *ldev;
888 	bool res;
889 
890 	spin_lock(&lag_lock);
891 	ldev = mlx5_lag_dev(dev);
892 	res = ldev && __mlx5_lag_is_active(ldev) &&
893 		dev == ldev->pf[MLX5_LAG_P1].dev;
894 	spin_unlock(&lag_lock);
895 
896 	return res;
897 }
898 EXPORT_SYMBOL(mlx5_lag_is_master);
899 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)900 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
901 {
902 	struct mlx5_lag *ldev;
903 	bool res;
904 
905 	spin_lock(&lag_lock);
906 	ldev = mlx5_lag_dev(dev);
907 	res  = ldev && __mlx5_lag_is_sriov(ldev);
908 	spin_unlock(&lag_lock);
909 
910 	return res;
911 }
912 EXPORT_SYMBOL(mlx5_lag_is_sriov);
913 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)914 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
915 {
916 	struct mlx5_lag *ldev;
917 	bool res;
918 
919 	spin_lock(&lag_lock);
920 	ldev = mlx5_lag_dev(dev);
921 	res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
922 	spin_unlock(&lag_lock);
923 
924 	return res;
925 }
926 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
927 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)928 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
929 {
930 	struct mlx5_core_dev *dev0;
931 	struct mlx5_core_dev *dev1;
932 	struct mlx5_lag *ldev;
933 
934 	ldev = mlx5_lag_dev(dev);
935 	if (!ldev)
936 		return;
937 
938 	mlx5_dev_list_lock();
939 
940 	dev0 = ldev->pf[MLX5_LAG_P1].dev;
941 	dev1 = ldev->pf[MLX5_LAG_P2].dev;
942 
943 	ldev->mode_changes_in_progress++;
944 	if (__mlx5_lag_is_active(ldev)) {
945 		mlx5_lag_lock_eswitches(dev0, dev1);
946 		mlx5_disable_lag(ldev);
947 		mlx5_lag_unlock_eswitches(dev0, dev1);
948 	}
949 	mlx5_dev_list_unlock();
950 }
951 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)952 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
953 {
954 	struct mlx5_lag *ldev;
955 
956 	ldev = mlx5_lag_dev(dev);
957 	if (!ldev)
958 		return;
959 
960 	mlx5_dev_list_lock();
961 	ldev->mode_changes_in_progress--;
962 	mlx5_dev_list_unlock();
963 	mlx5_queue_bond_work(ldev, 0);
964 }
965 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)966 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
967 {
968 	struct net_device *ndev = NULL;
969 	struct mlx5_lag *ldev;
970 
971 	spin_lock(&lag_lock);
972 	ldev = mlx5_lag_dev(dev);
973 
974 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
975 		goto unlock;
976 
977 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
978 		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
979 		       ldev->pf[MLX5_LAG_P1].netdev :
980 		       ldev->pf[MLX5_LAG_P2].netdev;
981 	} else {
982 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
983 	}
984 	if (ndev)
985 		dev_hold(ndev);
986 
987 unlock:
988 	spin_unlock(&lag_lock);
989 
990 	return ndev;
991 }
992 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
993 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)994 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
995 			   struct net_device *slave)
996 {
997 	struct mlx5_lag *ldev;
998 	u8 port = 0;
999 
1000 	spin_lock(&lag_lock);
1001 	ldev = mlx5_lag_dev(dev);
1002 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1003 		goto unlock;
1004 
1005 	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1006 		port = MLX5_LAG_P1;
1007 	else
1008 		port = MLX5_LAG_P2;
1009 
1010 	port = ldev->v2p_map[port];
1011 
1012 unlock:
1013 	spin_unlock(&lag_lock);
1014 	return port;
1015 }
1016 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1017 
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1018 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1019 {
1020 	struct mlx5_core_dev *peer_dev = NULL;
1021 	struct mlx5_lag *ldev;
1022 
1023 	spin_lock(&lag_lock);
1024 	ldev = mlx5_lag_dev(dev);
1025 	if (!ldev)
1026 		goto unlock;
1027 
1028 	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1029 			   ldev->pf[MLX5_LAG_P2].dev :
1030 			   ldev->pf[MLX5_LAG_P1].dev;
1031 
1032 unlock:
1033 	spin_unlock(&lag_lock);
1034 	return peer_dev;
1035 }
1036 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1037 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1038 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1039 				 u64 *values,
1040 				 int num_counters,
1041 				 size_t *offsets)
1042 {
1043 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1044 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1045 	struct mlx5_lag *ldev;
1046 	int num_ports;
1047 	int ret, i, j;
1048 	void *out;
1049 
1050 	out = kvzalloc(outlen, GFP_KERNEL);
1051 	if (!out)
1052 		return -ENOMEM;
1053 
1054 	memset(values, 0, sizeof(*values) * num_counters);
1055 
1056 	spin_lock(&lag_lock);
1057 	ldev = mlx5_lag_dev(dev);
1058 	if (ldev && __mlx5_lag_is_active(ldev)) {
1059 		num_ports = MLX5_MAX_PORTS;
1060 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1061 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1062 	} else {
1063 		num_ports = 1;
1064 		mdev[MLX5_LAG_P1] = dev;
1065 	}
1066 	spin_unlock(&lag_lock);
1067 
1068 	for (i = 0; i < num_ports; ++i) {
1069 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1070 
1071 		MLX5_SET(query_cong_statistics_in, in, opcode,
1072 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1073 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1074 					  out);
1075 		if (ret)
1076 			goto free;
1077 
1078 		for (j = 0; j < num_counters; ++j)
1079 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1080 	}
1081 
1082 free:
1083 	kvfree(out);
1084 	return ret;
1085 }
1086 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1087