1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
3
4 #include <linux/netdevice.h>
5 #include <linux/list.h>
6 #include <net/lag.h>
7
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "esw/acl/ofld.h"
11 #include "en_rep.h"
12
13 struct mlx5e_rep_bond {
14 struct notifier_block nb;
15 struct netdev_net_notifier nn;
16 struct list_head metadata_list;
17 };
18
19 struct mlx5e_rep_bond_slave_entry {
20 struct list_head list;
21 struct net_device *netdev;
22 };
23
24 struct mlx5e_rep_bond_metadata {
25 struct list_head list; /* link to global list of rep_bond_metadata */
26 struct mlx5_eswitch *esw;
27 /* private of uplink holding rep bond metadata list */
28 struct net_device *lag_dev;
29 u32 metadata_reg_c_0;
30
31 struct list_head slaves_list; /* slaves list */
32 int slaves;
33 };
34
35 static struct mlx5e_rep_bond_metadata *
mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv * uplink_priv,const struct net_device * lag_dev)36 mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
37 const struct net_device *lag_dev)
38 {
39 struct mlx5e_rep_bond_metadata *found = NULL;
40 struct mlx5e_rep_bond_metadata *cur;
41
42 list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
43 if (cur->lag_dev == lag_dev) {
44 found = cur;
45 break;
46 }
47 }
48
49 return found;
50 }
51
52 static struct mlx5e_rep_bond_slave_entry *
mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata * mdata,const struct net_device * netdev)53 mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
54 const struct net_device *netdev)
55 {
56 struct mlx5e_rep_bond_slave_entry *found = NULL;
57 struct mlx5e_rep_bond_slave_entry *cur;
58
59 list_for_each_entry(cur, &mdata->slaves_list, list) {
60 if (cur->netdev == netdev) {
61 found = cur;
62 break;
63 }
64 }
65
66 return found;
67 }
68
mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata * mdata)69 static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
70 {
71 netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
72 mdata->metadata_reg_c_0);
73 list_del(&mdata->list);
74 mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
75 WARN_ON(!list_empty(&mdata->slaves_list));
76 kfree(mdata);
77 }
78
79 /* This must be called under rtnl_lock */
mlx5e_rep_bond_enslave(struct mlx5_eswitch * esw,struct net_device * netdev,struct net_device * lag_dev)80 int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
81 struct net_device *lag_dev)
82 {
83 struct mlx5e_rep_bond_slave_entry *s_entry;
84 struct mlx5e_rep_bond_metadata *mdata;
85 struct mlx5e_rep_priv *rpriv;
86 struct mlx5e_priv *priv;
87 int err;
88
89 ASSERT_RTNL();
90
91 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
92 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
93 if (!mdata) {
94 /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
95 mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
96 if (!mdata)
97 return -ENOMEM;
98
99 mdata->lag_dev = lag_dev;
100 mdata->esw = esw;
101 INIT_LIST_HEAD(&mdata->slaves_list);
102 mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
103 if (!mdata->metadata_reg_c_0) {
104 kfree(mdata);
105 return -ENOSPC;
106 }
107 list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
108
109 netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
110 mdata->metadata_reg_c_0);
111 }
112
113 s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
114 if (!s_entry) {
115 err = -ENOMEM;
116 goto entry_alloc_err;
117 }
118
119 s_entry->netdev = netdev;
120 priv = netdev_priv(netdev);
121 rpriv = priv->ppriv;
122
123 err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
124 mdata->metadata_reg_c_0);
125 if (err)
126 goto ingress_err;
127
128 mdata->slaves++;
129 list_add_tail(&s_entry->list, &mdata->slaves_list);
130 netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
131 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
132
133 return 0;
134
135 ingress_err:
136 kfree(s_entry);
137 entry_alloc_err:
138 if (!mdata->slaves)
139 mlx5e_rep_bond_metadata_release(mdata);
140 return err;
141 }
142
143 /* This must be called under rtnl_lock */
mlx5e_rep_bond_unslave(struct mlx5_eswitch * esw,const struct net_device * netdev,const struct net_device * lag_dev)144 void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
145 const struct net_device *netdev,
146 const struct net_device *lag_dev)
147 {
148 struct mlx5e_rep_bond_slave_entry *s_entry;
149 struct mlx5e_rep_bond_metadata *mdata;
150 struct mlx5e_rep_priv *rpriv;
151 struct mlx5e_priv *priv;
152
153 ASSERT_RTNL();
154
155 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
156 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
157 if (!mdata)
158 return;
159
160 s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
161 if (!s_entry)
162 return;
163
164 priv = netdev_priv(netdev);
165 rpriv = priv->ppriv;
166
167 /* Reset bond_metadata to zero first then reset all ingress/egress
168 * acls and rx rules of unslave representor's vport
169 */
170 mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
171 mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
172 mlx5e_rep_bond_update(priv, false);
173
174 list_del(&s_entry->list);
175
176 netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
177 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
178
179 if (--mdata->slaves == 0)
180 mlx5e_rep_bond_metadata_release(mdata);
181 kfree(s_entry);
182 }
183
mlx5e_rep_is_lag_netdev(struct net_device * netdev)184 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
185 {
186 struct mlx5e_rep_priv *rpriv;
187 struct mlx5e_priv *priv;
188
189 /* A given netdev is not a representor or not a slave of LAG configuration */
190 if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
191 return false;
192
193 priv = netdev_priv(netdev);
194 rpriv = priv->ppriv;
195
196 /* Egress acl forward to vport is supported only non-uplink representor */
197 return rpriv->rep->vport != MLX5_VPORT_UPLINK;
198 }
199
mlx5e_rep_changelowerstate_event(struct net_device * netdev,void * ptr)200 static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
201 {
202 struct netdev_notifier_changelowerstate_info *info;
203 struct netdev_lag_lower_state_info *lag_info;
204 struct mlx5e_rep_priv *rpriv;
205 struct net_device *lag_dev;
206 struct mlx5e_priv *priv;
207 struct list_head *iter;
208 struct net_device *dev;
209 u16 acl_vport_num;
210 u16 fwd_vport_num;
211 int err;
212
213 if (!mlx5e_rep_is_lag_netdev(netdev))
214 return;
215
216 info = ptr;
217 lag_info = info->lower_state_info;
218 /* This is not an event of a representor becoming active slave */
219 if (!lag_info->tx_enabled)
220 return;
221
222 priv = netdev_priv(netdev);
223 rpriv = priv->ppriv;
224 fwd_vport_num = rpriv->rep->vport;
225 lag_dev = netdev_master_upper_dev_get(netdev);
226
227 netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
228 lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
229
230 /* Point everyone's egress acl to the vport of the active representor */
231 netdev_for_each_lower_dev(lag_dev, dev, iter) {
232 priv = netdev_priv(dev);
233 rpriv = priv->ppriv;
234 acl_vport_num = rpriv->rep->vport;
235 if (acl_vport_num != fwd_vport_num) {
236 /* Only single rx_rule for unique bond_metadata should be
237 * present, delete it if it's saved as passive vport's
238 * rx_rule with destination as passive vport's root_ft
239 */
240 mlx5e_rep_bond_update(priv, true);
241 err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
242 fwd_vport_num,
243 acl_vport_num);
244 if (err)
245 netdev_warn(dev,
246 "configure slave vport(%d) egress fwd, err(%d)",
247 acl_vport_num, err);
248 }
249 }
250
251 /* Insert new rx_rule for unique bond_metadata, save it as active vport's
252 * rx_rule with new destination as active vport's root_ft
253 */
254 err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
255 if (err)
256 netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
257 fwd_vport_num, err);
258 }
259
mlx5e_rep_changeupper_event(struct net_device * netdev,void * ptr)260 static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
261 {
262 struct netdev_notifier_changeupper_info *info = ptr;
263 struct mlx5e_rep_priv *rpriv;
264 struct net_device *lag_dev;
265 struct mlx5e_priv *priv;
266
267 if (!mlx5e_rep_is_lag_netdev(netdev))
268 return;
269
270 priv = netdev_priv(netdev);
271 rpriv = priv->ppriv;
272 lag_dev = info->upper_dev;
273
274 netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
275 info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
276
277 if (info->linking)
278 mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
279 else
280 mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
281 }
282
283 /* Bond device of representors and netdev events are used here in specific way
284 * to support eswitch vports bonding and to perform failover of eswitch vport
285 * by modifying the vport's egress acl of lower dev representors. Thus this
286 * also change the traditional behavior of lower dev under bond device.
287 * All non-representor netdevs or representors of other vendors as lower dev
288 * of bond device are not supported.
289 */
mlx5e_rep_esw_bond_netevent(struct notifier_block * nb,unsigned long event,void * ptr)290 static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
291 unsigned long event, void *ptr)
292 {
293 struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
294
295 switch (event) {
296 case NETDEV_CHANGELOWERSTATE:
297 mlx5e_rep_changelowerstate_event(netdev, ptr);
298 break;
299 case NETDEV_CHANGEUPPER:
300 mlx5e_rep_changeupper_event(netdev, ptr);
301 break;
302 }
303 return NOTIFY_DONE;
304 }
305
306 /* If HW support eswitch vports bonding, register a specific notifier to
307 * handle it when two or more representors are bonded
308 */
mlx5e_rep_bond_init(struct mlx5e_rep_priv * rpriv)309 int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
310 {
311 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
312 struct net_device *netdev = rpriv->netdev;
313 struct mlx5e_priv *priv;
314 int ret = 0;
315
316 priv = netdev_priv(netdev);
317 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
318 goto out;
319
320 uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
321 if (!uplink_priv->bond) {
322 ret = -ENOMEM;
323 goto out;
324 }
325
326 INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
327 uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
328 ret = register_netdevice_notifier_dev_net(netdev,
329 &uplink_priv->bond->nb,
330 &uplink_priv->bond->nn);
331 if (ret) {
332 netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
333 kvfree(uplink_priv->bond);
334 uplink_priv->bond = NULL;
335 }
336
337 out:
338 return ret;
339 }
340
mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv * rpriv)341 void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
342 {
343 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
344
345 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
346 !rpriv->uplink_priv.bond)
347 return;
348
349 unregister_netdevice_notifier_dev_net(rpriv->netdev,
350 &rpriv->uplink_priv.bond->nb,
351 &rpriv->uplink_priv.bond->nn);
352 kvfree(rpriv->uplink_priv.bond);
353 }
354