1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag.h"
7 #include "lag_mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
13 {
14 	if (!ldev->pf[0].dev || !ldev->pf[1].dev)
15 		return false;
16 
17 	return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
18 }
19 
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)20 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
21 {
22 	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
23 }
24 
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)25 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
26 {
27 	struct mlx5_lag *ldev;
28 	bool res;
29 
30 	ldev = mlx5_lag_dev_get(dev);
31 	res  = ldev && __mlx5_lag_is_multipath(ldev);
32 
33 	return res;
34 }
35 
36 /**
37  * Set lag port affinity
38  *
39  * @ldev: lag device
40  * @port:
41  *     0 - set normal affinity.
42  *     1 - set affinity to port 1.
43  *     2 - set affinity to port 2.
44  *
45  **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,int port)46 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
47 {
48 	struct lag_tracker tracker;
49 
50 	if (!__mlx5_lag_is_multipath(ldev))
51 		return;
52 
53 	switch (port) {
54 	case 0:
55 		tracker.netdev_state[0].tx_enabled = true;
56 		tracker.netdev_state[1].tx_enabled = true;
57 		tracker.netdev_state[0].link_up = true;
58 		tracker.netdev_state[1].link_up = true;
59 		break;
60 	case 1:
61 		tracker.netdev_state[0].tx_enabled = true;
62 		tracker.netdev_state[0].link_up = true;
63 		tracker.netdev_state[1].tx_enabled = false;
64 		tracker.netdev_state[1].link_up = false;
65 		break;
66 	case 2:
67 		tracker.netdev_state[0].tx_enabled = false;
68 		tracker.netdev_state[0].link_up = false;
69 		tracker.netdev_state[1].tx_enabled = true;
70 		tracker.netdev_state[1].link_up = true;
71 		break;
72 	default:
73 		mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
74 			       port);
75 		return;
76 	}
77 
78 	if (tracker.netdev_state[0].tx_enabled)
79 		mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
80 					 MLX5_DEV_EVENT_PORT_AFFINITY,
81 					 (void *)0);
82 
83 	if (tracker.netdev_state[1].tx_enabled)
84 		mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
85 					 MLX5_DEV_EVENT_PORT_AFFINITY,
86 					 (void *)0);
87 
88 	mlx5_modify_lag(ldev, &tracker);
89 }
90 
mlx5_lag_fib_event_flush(struct notifier_block * nb)91 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
92 {
93 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
94 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
95 
96 	flush_workqueue(ldev->wq);
97 }
98 
99 struct mlx5_fib_event_work {
100 	struct work_struct work;
101 	struct mlx5_lag *ldev;
102 	unsigned long event;
103 	union {
104 		struct fib_entry_notifier_info fen_info;
105 		struct fib_nh_notifier_info fnh_info;
106 	};
107 };
108 
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_info * fi)109 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
110 				     unsigned long event,
111 				     struct fib_info *fi)
112 {
113 	struct lag_mp *mp = &ldev->lag_mp;
114 	struct fib_nh *fib_nh0, *fib_nh1;
115 	unsigned int nhs;
116 
117 	/* Handle delete event */
118 	if (event == FIB_EVENT_ENTRY_DEL) {
119 		/* stop track */
120 		if (mp->mfi == fi)
121 			mp->mfi = NULL;
122 		return;
123 	}
124 
125 	/* Handle add/replace event */
126 	nhs = fib_info_num_path(fi);
127 	if (nhs == 1) {
128 		if (__mlx5_lag_is_active(ldev)) {
129 			struct fib_nh *nh = fib_info_nh(fi, 0);
130 			struct net_device *nh_dev = nh->fib_nh_dev;
131 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
132 
133 			mlx5_lag_set_port_affinity(ldev, ++i);
134 		}
135 		return;
136 	}
137 
138 	if (nhs != 2)
139 		return;
140 
141 	/* Verify next hops are ports of the same hca */
142 	fib_nh0 = fib_info_nh(fi, 0);
143 	fib_nh1 = fib_info_nh(fi, 1);
144 	if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
145 	      fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
146 	    !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
147 	      fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
148 		mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
149 		return;
150 	}
151 
152 	/* First time we see multipath route */
153 	if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
154 		struct lag_tracker tracker;
155 
156 		tracker = ldev->tracker;
157 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
158 	}
159 
160 	mlx5_lag_set_port_affinity(ldev, 0);
161 	mp->mfi = fi;
162 }
163 
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)164 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
165 				       unsigned long event,
166 				       struct fib_nh *fib_nh,
167 				       struct fib_info *fi)
168 {
169 	struct lag_mp *mp = &ldev->lag_mp;
170 
171 	/* Check the nh event is related to the route */
172 	if (!mp->mfi || mp->mfi != fi)
173 		return;
174 
175 	/* nh added/removed */
176 	if (event == FIB_EVENT_NH_DEL) {
177 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
178 
179 		if (i >= 0) {
180 			i = (i + 1) % 2 + 1; /* peer port */
181 			mlx5_lag_set_port_affinity(ldev, i);
182 		}
183 	} else if (event == FIB_EVENT_NH_ADD &&
184 		   fib_info_num_path(fi) == 2) {
185 		mlx5_lag_set_port_affinity(ldev, 0);
186 	}
187 }
188 
mlx5_lag_fib_update(struct work_struct * work)189 static void mlx5_lag_fib_update(struct work_struct *work)
190 {
191 	struct mlx5_fib_event_work *fib_work =
192 		container_of(work, struct mlx5_fib_event_work, work);
193 	struct mlx5_lag *ldev = fib_work->ldev;
194 	struct fib_nh *fib_nh;
195 
196 	/* Protect internal structures from changes */
197 	rtnl_lock();
198 	switch (fib_work->event) {
199 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
200 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
201 	case FIB_EVENT_ENTRY_ADD: /* fall through */
202 	case FIB_EVENT_ENTRY_DEL:
203 		mlx5_lag_fib_route_event(ldev, fib_work->event,
204 					 fib_work->fen_info.fi);
205 		fib_info_put(fib_work->fen_info.fi);
206 		break;
207 	case FIB_EVENT_NH_ADD: /* fall through */
208 	case FIB_EVENT_NH_DEL:
209 		fib_nh = fib_work->fnh_info.fib_nh;
210 		mlx5_lag_fib_nexthop_event(ldev,
211 					   fib_work->event,
212 					   fib_work->fnh_info.fib_nh,
213 					   fib_nh->nh_parent);
214 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
215 		break;
216 	}
217 
218 	rtnl_unlock();
219 	kfree(fib_work);
220 }
221 
222 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)223 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
224 {
225 	struct mlx5_fib_event_work *fib_work;
226 
227 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
228 	if (WARN_ON(!fib_work))
229 		return NULL;
230 
231 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
232 	fib_work->ldev = ldev;
233 	fib_work->event = event;
234 
235 	return fib_work;
236 }
237 
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)238 static int mlx5_lag_fib_event(struct notifier_block *nb,
239 			      unsigned long event,
240 			      void *ptr)
241 {
242 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
243 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
244 	struct fib_notifier_info *info = ptr;
245 	struct mlx5_fib_event_work *fib_work;
246 	struct fib_entry_notifier_info *fen_info;
247 	struct fib_nh_notifier_info *fnh_info;
248 	struct net_device *fib_dev;
249 	struct fib_info *fi;
250 
251 	if (!net_eq(info->net, &init_net))
252 		return NOTIFY_DONE;
253 
254 	if (info->family != AF_INET)
255 		return NOTIFY_DONE;
256 
257 	if (!mlx5_lag_multipath_check_prereq(ldev))
258 		return NOTIFY_DONE;
259 
260 	switch (event) {
261 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
262 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
263 	case FIB_EVENT_ENTRY_ADD: /* fall through */
264 	case FIB_EVENT_ENTRY_DEL:
265 		fen_info = container_of(info, struct fib_entry_notifier_info,
266 					info);
267 		fi = fen_info->fi;
268 		if (fi->nh) {
269 			NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
270 			return notifier_from_errno(-EINVAL);
271 		}
272 		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
273 		if (fib_dev != ldev->pf[0].netdev &&
274 		    fib_dev != ldev->pf[1].netdev) {
275 			return NOTIFY_DONE;
276 		}
277 		fib_work = mlx5_lag_init_fib_work(ldev, event);
278 		if (!fib_work)
279 			return NOTIFY_DONE;
280 		fib_work->fen_info = *fen_info;
281 		/* Take reference on fib_info to prevent it from being
282 		 * freed while work is queued. Release it afterwards.
283 		 */
284 		fib_info_hold(fib_work->fen_info.fi);
285 		break;
286 	case FIB_EVENT_NH_ADD: /* fall through */
287 	case FIB_EVENT_NH_DEL:
288 		fnh_info = container_of(info, struct fib_nh_notifier_info,
289 					info);
290 		fib_work = mlx5_lag_init_fib_work(ldev, event);
291 		if (!fib_work)
292 			return NOTIFY_DONE;
293 		fib_work->fnh_info = *fnh_info;
294 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
295 		break;
296 	default:
297 		return NOTIFY_DONE;
298 	}
299 
300 	queue_work(ldev->wq, &fib_work->work);
301 
302 	return NOTIFY_DONE;
303 }
304 
mlx5_lag_mp_init(struct mlx5_lag * ldev)305 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
306 {
307 	struct lag_mp *mp = &ldev->lag_mp;
308 	int err;
309 
310 	if (mp->fib_nb.notifier_call)
311 		return 0;
312 
313 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
314 	err = register_fib_notifier(&mp->fib_nb,
315 				    mlx5_lag_fib_event_flush);
316 	if (err)
317 		mp->fib_nb.notifier_call = NULL;
318 
319 	return err;
320 }
321 
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)322 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
323 {
324 	struct lag_mp *mp = &ldev->lag_mp;
325 
326 	if (!mp->fib_nb.notifier_call)
327 		return;
328 
329 	unregister_fib_notifier(&mp->fib_nb);
330 	mp->fib_nb.notifier_call = NULL;
331 }
332