1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12
13 enum {
14 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
15 };
16
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 struct mlx5_flow_attr *attr,
19 struct mlx5e_encap_entry *e,
20 int out_index)
21 {
22 struct net_device *route_dev;
23 int err = 0;
24
25 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26
27 if (!route_dev || !netif_is_ovs_master(route_dev) ||
28 attr->parse_attr->filter_dev == e->out_dev)
29 goto out;
30
31 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
32 MLX5E_TC_INT_PORT_EGRESS,
33 &attr->action, out_index);
34
35 out:
36 if (route_dev)
37 dev_put(route_dev);
38
39 return err;
40 }
41
42 struct mlx5e_route_key {
43 int ip_version;
44 union {
45 __be32 v4;
46 struct in6_addr v6;
47 } endpoint_ip;
48 };
49
50 struct mlx5e_route_entry {
51 struct mlx5e_route_key key;
52 struct list_head encap_entries;
53 struct list_head decap_flows;
54 u32 flags;
55 struct hlist_node hlist;
56 refcount_t refcnt;
57 int tunnel_dev_index;
58 struct rcu_head rcu;
59 };
60
61 struct mlx5e_tc_tun_encap {
62 struct mlx5e_priv *priv;
63 struct notifier_block fib_nb;
64 spinlock_t route_lock; /* protects route_tbl */
65 unsigned long route_tbl_last_update;
66 DECLARE_HASHTABLE(route_tbl, 8);
67 };
68
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)69 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
70 {
71 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
72 }
73
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)74 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
75 struct mlx5_flow_spec *spec)
76 {
77 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
78 struct mlx5_rx_tun_attr *tun_attr;
79 void *daddr, *saddr;
80 u8 ip_version;
81
82 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
83 if (!tun_attr)
84 return -ENOMEM;
85
86 esw_attr->rx_tun_attr = tun_attr;
87 ip_version = mlx5e_tc_get_ip_version(spec, true);
88
89 if (ip_version == 4) {
90 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
92 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
93 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
94 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
95 tun_attr->src_ip.v4 = *(__be32 *)saddr;
96 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
97 return 0;
98 }
99 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
100 else if (ip_version == 6) {
101 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
102
103 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
104 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
105 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
106 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
107 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
108 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
109 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
110 ipv6_addr_any(&tun_attr->src_ip.v6))
111 return 0;
112 }
113 #endif
114 /* Only set the flag if both src and dst ip addresses exist. They are
115 * required to establish routing.
116 */
117 flow_flag_set(flow, TUN_RX);
118 flow->attr->tun_ip_version = ip_version;
119 return 0;
120 }
121
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)122 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
123 {
124 bool all_flow_encaps_valid = true;
125 int i;
126
127 /* Flow can be associated with multiple encap entries.
128 * Before offloading the flow verify that all of them have
129 * a valid neighbour.
130 */
131 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
132 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
133 continue;
134 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
135 all_flow_encaps_valid = false;
136 break;
137 }
138 }
139
140 return all_flow_encaps_valid;
141 }
142
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)143 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
144 struct mlx5e_encap_entry *e,
145 struct list_head *flow_list)
146 {
147 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
148 struct mlx5_pkt_reformat_params reformat_params;
149 struct mlx5_esw_flow_attr *esw_attr;
150 struct mlx5_flow_handle *rule;
151 struct mlx5_flow_attr *attr;
152 struct mlx5_flow_spec *spec;
153 struct mlx5e_tc_flow *flow;
154 int err;
155
156 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
157 return;
158
159 memset(&reformat_params, 0, sizeof(reformat_params));
160 reformat_params.type = e->reformat_type;
161 reformat_params.size = e->encap_size;
162 reformat_params.data = e->encap_header;
163 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
164 &reformat_params,
165 MLX5_FLOW_NAMESPACE_FDB);
166 if (IS_ERR(e->pkt_reformat)) {
167 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
168 PTR_ERR(e->pkt_reformat));
169 return;
170 }
171 e->flags |= MLX5_ENCAP_ENTRY_VALID;
172 mlx5e_rep_queue_neigh_stats_work(priv);
173
174 list_for_each_entry(flow, flow_list, tmp_list) {
175 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
176 continue;
177
178 spec = &flow->attr->parse_attr->spec;
179
180 attr = mlx5e_tc_get_encap_attr(flow);
181 esw_attr = attr->esw_attr;
182 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
183 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
184
185 /* Do not offload flows with unresolved neighbors */
186 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
187 continue;
188
189 err = mlx5e_tc_offload_flow_post_acts(flow);
190 if (err) {
191 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
192 err);
193 continue;
194 }
195
196 /* update from slow path rule to encap rule */
197 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
198 if (IS_ERR(rule)) {
199 mlx5e_tc_unoffload_flow_post_acts(flow);
200 err = PTR_ERR(rule);
201 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
202 err);
203 continue;
204 }
205
206 mlx5e_tc_unoffload_from_slow_path(esw, flow);
207 flow->rule[0] = rule;
208 /* was unset when slow path rule removed */
209 flow_flag_set(flow, OFFLOADED);
210 }
211 }
212
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)213 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
214 struct mlx5e_encap_entry *e,
215 struct list_head *flow_list)
216 {
217 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
218 struct mlx5_esw_flow_attr *esw_attr;
219 struct mlx5_flow_handle *rule;
220 struct mlx5_flow_attr *attr;
221 struct mlx5_flow_spec *spec;
222 struct mlx5e_tc_flow *flow;
223 int err;
224
225 list_for_each_entry(flow, flow_list, tmp_list) {
226 if (!mlx5e_is_offloaded_flow(flow))
227 continue;
228
229 attr = mlx5e_tc_get_encap_attr(flow);
230 esw_attr = attr->esw_attr;
231 /* mark the flow's encap dest as non-valid */
232 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
233 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
234
235 /* Clear pkt_reformat before checking slow path flag. Because
236 * in next iteration, the same flow is already set slow path
237 * flag, but still need to clear the pkt_reformat.
238 */
239 if (flow_flag_test(flow, SLOW))
240 continue;
241
242 /* update from encap rule to slow path rule */
243 spec = &flow->attr->parse_attr->spec;
244 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
245
246 if (IS_ERR(rule)) {
247 err = PTR_ERR(rule);
248 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
249 err);
250 continue;
251 }
252
253 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
254 mlx5e_tc_unoffload_flow_post_acts(flow);
255 flow->rule[0] = rule;
256 /* was unset when fast path rule removed */
257 flow_flag_set(flow, OFFLOADED);
258 }
259
260 /* we know that the encap is valid */
261 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
262 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
263 e->pkt_reformat = NULL;
264 }
265
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)266 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
267 struct list_head *flow_list,
268 int index)
269 {
270 if (IS_ERR(mlx5e_flow_get(flow))) {
271 /* Flow is being deleted concurrently. Wait for it to be
272 * unoffloaded from hardware, otherwise deleting encap will
273 * fail.
274 */
275 wait_for_completion(&flow->del_hw_done);
276 return;
277 }
278 wait_for_completion(&flow->init_done);
279
280 flow->tmp_entry_index = index;
281 list_add(&flow->tmp_list, flow_list);
282 }
283
284 /* Takes reference to all flows attached to encap and adds the flows to
285 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
286 */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)287 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
288 {
289 struct encap_flow_item *efi;
290 struct mlx5e_tc_flow *flow;
291
292 list_for_each_entry(efi, &e->flows, list) {
293 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
294 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
295 }
296 }
297
298 /* Takes reference to all flows attached to route and adds the flows to
299 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
300 */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)301 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
302 struct list_head *flow_list)
303 {
304 struct mlx5e_tc_flow *flow;
305
306 list_for_each_entry(flow, &r->decap_flows, decap_routes)
307 mlx5e_take_tmp_flow(flow, flow_list, 0);
308 }
309
310 typedef bool (match_cb)(struct mlx5e_encap_entry *);
311
312 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)313 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
314 struct mlx5e_encap_entry *e,
315 match_cb match)
316 {
317 struct mlx5e_encap_entry *next = NULL;
318
319 retry:
320 rcu_read_lock();
321
322 /* find encap with non-zero reference counter value */
323 for (next = e ?
324 list_next_or_null_rcu(&nhe->encap_list,
325 &e->encap_list,
326 struct mlx5e_encap_entry,
327 encap_list) :
328 list_first_or_null_rcu(&nhe->encap_list,
329 struct mlx5e_encap_entry,
330 encap_list);
331 next;
332 next = list_next_or_null_rcu(&nhe->encap_list,
333 &next->encap_list,
334 struct mlx5e_encap_entry,
335 encap_list))
336 if (mlx5e_encap_take(next))
337 break;
338
339 rcu_read_unlock();
340
341 /* release starting encap */
342 if (e)
343 mlx5e_encap_put(netdev_priv(e->out_dev), e);
344 if (!next)
345 return next;
346
347 /* wait for encap to be fully initialized */
348 wait_for_completion(&next->res_ready);
349 /* continue searching if encap entry is not in valid state after completion */
350 if (!match(next)) {
351 e = next;
352 goto retry;
353 }
354
355 return next;
356 }
357
mlx5e_encap_valid(struct mlx5e_encap_entry * e)358 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
359 {
360 return e->flags & MLX5_ENCAP_ENTRY_VALID;
361 }
362
363 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)364 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
365 struct mlx5e_encap_entry *e)
366 {
367 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
368 }
369
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)370 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
371 {
372 return e->compl_result >= 0;
373 }
374
375 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)376 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
377 struct mlx5e_encap_entry *e)
378 {
379 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
380 }
381
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)382 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
383 {
384 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
385 struct mlx5e_encap_entry *e = NULL;
386 struct mlx5e_tc_flow *flow;
387 struct mlx5_fc *counter;
388 struct neigh_table *tbl;
389 bool neigh_used = false;
390 struct neighbour *n;
391 u64 lastuse;
392
393 if (m_neigh->family == AF_INET)
394 tbl = &arp_tbl;
395 #if IS_ENABLED(CONFIG_IPV6)
396 else if (m_neigh->family == AF_INET6)
397 tbl = ipv6_stub->nd_tbl;
398 #endif
399 else
400 return;
401
402 /* mlx5e_get_next_valid_encap() releases previous encap before returning
403 * next one.
404 */
405 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
406 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
407 struct encap_flow_item *efi, *tmp;
408 struct mlx5_eswitch *esw;
409 LIST_HEAD(flow_list);
410
411 esw = priv->mdev->priv.eswitch;
412 mutex_lock(&esw->offloads.encap_tbl_lock);
413 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
414 flow = container_of(efi, struct mlx5e_tc_flow,
415 encaps[efi->index]);
416 if (IS_ERR(mlx5e_flow_get(flow)))
417 continue;
418 list_add(&flow->tmp_list, &flow_list);
419
420 if (mlx5e_is_offloaded_flow(flow)) {
421 counter = mlx5e_tc_get_counter(flow);
422 lastuse = mlx5_fc_query_lastuse(counter);
423 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
424 neigh_used = true;
425 break;
426 }
427 }
428 }
429 mutex_unlock(&esw->offloads.encap_tbl_lock);
430
431 mlx5e_put_flow_list(priv, &flow_list);
432 if (neigh_used) {
433 /* release current encap before breaking the loop */
434 mlx5e_encap_put(priv, e);
435 break;
436 }
437 }
438
439 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
440
441 if (neigh_used) {
442 nhe->reported_lastuse = jiffies;
443
444 /* find the relevant neigh according to the cached device and
445 * dst ip pair
446 */
447 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
448 if (!n)
449 return;
450
451 neigh_event_send(n, NULL);
452 neigh_release(n);
453 }
454 }
455
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)456 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
457 {
458 WARN_ON(!list_empty(&e->flows));
459
460 if (e->compl_result > 0) {
461 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
462
463 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
464 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
465 }
466
467 kfree(e->tun_info);
468 kfree(e->encap_header);
469 kfree_rcu(e, rcu);
470 }
471
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)472 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
473 struct mlx5e_decap_entry *d)
474 {
475 WARN_ON(!list_empty(&d->flows));
476
477 if (!d->compl_result)
478 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
479
480 kfree_rcu(d, rcu);
481 }
482
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)483 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
484 {
485 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
486
487 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
488 return;
489 list_del(&e->route_list);
490 hash_del_rcu(&e->encap_hlist);
491 mutex_unlock(&esw->offloads.encap_tbl_lock);
492
493 mlx5e_encap_dealloc(priv, e);
494 }
495
mlx5e_encap_put_locked(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)496 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
497 {
498 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
499
500 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
501
502 if (!refcount_dec_and_test(&e->refcnt))
503 return;
504 list_del(&e->route_list);
505 hash_del_rcu(&e->encap_hlist);
506 mlx5e_encap_dealloc(priv, e);
507 }
508
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)509 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
510 {
511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512
513 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
514 return;
515 hash_del_rcu(&d->hlist);
516 mutex_unlock(&esw->offloads.decap_tbl_lock);
517
518 mlx5e_decap_dealloc(priv, d);
519 }
520
521 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
522 struct mlx5e_tc_flow *flow,
523 int out_index);
524
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)525 void mlx5e_detach_encap(struct mlx5e_priv *priv,
526 struct mlx5e_tc_flow *flow,
527 struct mlx5_flow_attr *attr,
528 int out_index)
529 {
530 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
531 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
532
533 if (!mlx5e_is_eswitch_flow(flow))
534 return;
535
536 if (attr->esw_attr->dests[out_index].flags &
537 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
538 mlx5e_detach_encap_route(priv, flow, out_index);
539
540 /* flow wasn't fully initialized */
541 if (!e)
542 return;
543
544 mutex_lock(&esw->offloads.encap_tbl_lock);
545 list_del(&flow->encaps[out_index].list);
546 flow->encaps[out_index].e = NULL;
547 if (!refcount_dec_and_test(&e->refcnt)) {
548 mutex_unlock(&esw->offloads.encap_tbl_lock);
549 return;
550 }
551 list_del(&e->route_list);
552 hash_del_rcu(&e->encap_hlist);
553 mutex_unlock(&esw->offloads.encap_tbl_lock);
554
555 mlx5e_encap_dealloc(priv, e);
556 }
557
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)558 void mlx5e_detach_decap(struct mlx5e_priv *priv,
559 struct mlx5e_tc_flow *flow)
560 {
561 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
562 struct mlx5e_decap_entry *d = flow->decap_reformat;
563
564 if (!d)
565 return;
566
567 mutex_lock(&esw->offloads.decap_tbl_lock);
568 list_del(&flow->l3_to_l2_reformat);
569 flow->decap_reformat = NULL;
570
571 if (!refcount_dec_and_test(&d->refcnt)) {
572 mutex_unlock(&esw->offloads.decap_tbl_lock);
573 return;
574 }
575 hash_del_rcu(&d->hlist);
576 mutex_unlock(&esw->offloads.decap_tbl_lock);
577
578 mlx5e_decap_dealloc(priv, d);
579 }
580
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)581 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
582 struct mlx5e_encap_key *b)
583 {
584 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
585 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
586 }
587
mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b,__be16 tun_flags)588 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
589 struct mlx5e_encap_key *b,
590 __be16 tun_flags)
591 {
592 struct ip_tunnel_info *a_info;
593 struct ip_tunnel_info *b_info;
594 bool a_has_opts, b_has_opts;
595
596 if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
597 return false;
598
599 a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
600 b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
601
602 /* keys are equal when both don't have any options attached */
603 if (!a_has_opts && !b_has_opts)
604 return true;
605
606 if (a_has_opts != b_has_opts)
607 return false;
608
609 /* options stored in memory next to ip_tunnel_info struct */
610 a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
611 b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
612
613 return a_info->options_len == b_info->options_len &&
614 !memcmp(ip_tunnel_info_opts(a_info),
615 ip_tunnel_info_opts(b_info),
616 a_info->options_len);
617 }
618
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)619 static int cmp_decap_info(struct mlx5e_decap_key *a,
620 struct mlx5e_decap_key *b)
621 {
622 return memcmp(&a->key, &b->key, sizeof(b->key));
623 }
624
hash_encap_info(struct mlx5e_encap_key * key)625 static int hash_encap_info(struct mlx5e_encap_key *key)
626 {
627 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
628 key->tc_tunnel->tunnel_type);
629 }
630
hash_decap_info(struct mlx5e_decap_key * key)631 static int hash_decap_info(struct mlx5e_decap_key *key)
632 {
633 return jhash(&key->key, sizeof(key->key), 0);
634 }
635
mlx5e_encap_take(struct mlx5e_encap_entry * e)636 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
637 {
638 return refcount_inc_not_zero(&e->refcnt);
639 }
640
mlx5e_decap_take(struct mlx5e_decap_entry * e)641 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
642 {
643 return refcount_inc_not_zero(&e->refcnt);
644 }
645
646 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)647 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
648 uintptr_t hash_key)
649 {
650 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
651 struct mlx5e_encap_key e_key;
652 struct mlx5e_encap_entry *e;
653
654 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
655 encap_hlist, hash_key) {
656 e_key.ip_tun_key = &e->tun_info->key;
657 e_key.tc_tunnel = e->tunnel;
658 if (e->tunnel->encap_info_equal(&e_key, key) &&
659 mlx5e_encap_take(e))
660 return e;
661 }
662
663 return NULL;
664 }
665
666 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)667 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
668 uintptr_t hash_key)
669 {
670 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
671 struct mlx5e_decap_key r_key;
672 struct mlx5e_decap_entry *e;
673
674 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
675 hlist, hash_key) {
676 r_key = e->key;
677 if (!cmp_decap_info(&r_key, key) &&
678 mlx5e_decap_take(e))
679 return e;
680 }
681 return NULL;
682 }
683
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)684 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
685 {
686 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
687
688 return kmemdup(tun_info, tun_size, GFP_KERNEL);
689 }
690
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)691 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
692 struct mlx5e_tc_flow *flow,
693 int out_index,
694 struct mlx5e_encap_entry *e,
695 struct netlink_ext_ack *extack)
696 {
697 int i;
698
699 for (i = 0; i < out_index; i++) {
700 if (flow->encaps[i].e != e)
701 continue;
702 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
703 netdev_err(priv->netdev, "can't duplicate encap action\n");
704 return true;
705 }
706
707 return false;
708 }
709
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)710 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
711 struct mlx5_flow_attr *attr,
712 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
713 struct net_device *out_dev,
714 int route_dev_ifindex,
715 int out_index)
716 {
717 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
718 struct net_device *route_dev;
719 u16 vport_num;
720 int err = 0;
721 u32 data;
722
723 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
724
725 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
726 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
727 goto out;
728
729 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
730 if (err)
731 goto out;
732
733 attr->dest_chain = 0;
734 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
735 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
736 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
737 vport_num);
738 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
739 MLX5_FLOW_NAMESPACE_FDB,
740 VPORT_TO_REG, data);
741 if (err >= 0) {
742 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
743 err = 0;
744 }
745
746 out:
747 if (route_dev)
748 dev_put(route_dev);
749 return err;
750 }
751
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)752 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
753 struct mlx5_esw_flow_attr *attr,
754 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
755 struct net_device *out_dev,
756 int route_dev_ifindex,
757 int out_index)
758 {
759 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
760 struct net_device *route_dev;
761 u16 vport_num;
762 int err = 0;
763 u32 data;
764
765 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
766
767 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
768 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
769 err = -ENODEV;
770 goto out;
771 }
772
773 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
774 if (err)
775 goto out;
776
777 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
778 vport_num);
779 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
780
781 out:
782 if (route_dev)
783 dev_put(route_dev);
784 return err;
785 }
786
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)787 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
788 {
789 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
790 struct mlx5_rep_uplink_priv *uplink_priv;
791 struct mlx5e_rep_priv *uplink_rpriv;
792 struct mlx5e_tc_tun_encap *encap;
793 unsigned int ret;
794
795 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
796 uplink_priv = &uplink_rpriv->uplink_priv;
797 encap = uplink_priv->encap;
798
799 spin_lock_bh(&encap->route_lock);
800 ret = encap->route_tbl_last_update;
801 spin_unlock_bh(&encap->route_lock);
802 return ret;
803 }
804
805 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
806 struct mlx5e_tc_flow *flow,
807 struct mlx5_flow_attr *attr,
808 struct mlx5e_encap_entry *e,
809 bool new_encap_entry,
810 unsigned long tbl_time_before,
811 int out_index);
812
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)813 int mlx5e_attach_encap(struct mlx5e_priv *priv,
814 struct mlx5e_tc_flow *flow,
815 struct mlx5_flow_attr *attr,
816 struct net_device *mirred_dev,
817 int out_index,
818 struct netlink_ext_ack *extack,
819 struct net_device **encap_dev)
820 {
821 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
822 struct mlx5e_tc_flow_parse_attr *parse_attr;
823 const struct ip_tunnel_info *tun_info;
824 const struct mlx5e_mpls_info *mpls_info;
825 unsigned long tbl_time_before = 0;
826 struct mlx5e_encap_entry *e;
827 struct mlx5e_encap_key key;
828 bool entry_created = false;
829 unsigned short family;
830 uintptr_t hash_key;
831 int err = 0;
832
833 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
834
835 parse_attr = attr->parse_attr;
836 tun_info = parse_attr->tun_info[out_index];
837 mpls_info = &parse_attr->mpls_info[out_index];
838 family = ip_tunnel_info_af(tun_info);
839 key.ip_tun_key = &tun_info->key;
840 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
841 if (!key.tc_tunnel) {
842 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
843 return -EOPNOTSUPP;
844 }
845
846 hash_key = hash_encap_info(&key);
847
848 e = mlx5e_encap_get(priv, &key, hash_key);
849
850 /* must verify if encap is valid or not */
851 if (e) {
852 /* Check that entry was not already attached to this flow */
853 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
854 err = -EOPNOTSUPP;
855 goto out_err;
856 }
857
858 goto attach_flow;
859 }
860
861 e = kzalloc(sizeof(*e), GFP_KERNEL);
862 if (!e) {
863 err = -ENOMEM;
864 goto out_err;
865 }
866
867 refcount_set(&e->refcnt, 1);
868 init_completion(&e->res_ready);
869 entry_created = true;
870 INIT_LIST_HEAD(&e->route_list);
871
872 tun_info = mlx5e_dup_tun_info(tun_info);
873 if (!tun_info) {
874 err = -ENOMEM;
875 goto out_err_init;
876 }
877 e->tun_info = tun_info;
878 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
879 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
880 if (err)
881 goto out_err_init;
882
883 INIT_LIST_HEAD(&e->flows);
884 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
885 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
886
887 if (family == AF_INET)
888 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
889 else if (family == AF_INET6)
890 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
891
892 complete_all(&e->res_ready);
893 if (err) {
894 e->compl_result = err;
895 goto out_err;
896 }
897 e->compl_result = 1;
898
899 attach_flow:
900 err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
901 tbl_time_before, out_index);
902 if (err)
903 goto out_err;
904
905 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
906 if (err == -EOPNOTSUPP) {
907 /* If device doesn't support int port offload,
908 * redirect to uplink vport.
909 */
910 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
911 err = 0;
912 } else if (err) {
913 goto out_err;
914 }
915
916 flow->encaps[out_index].e = e;
917 list_add(&flow->encaps[out_index].list, &e->flows);
918 flow->encaps[out_index].index = out_index;
919 *encap_dev = e->out_dev;
920 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
921 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
922 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
923 } else {
924 flow_flag_set(flow, SLOW);
925 }
926
927 return err;
928
929 out_err:
930 if (e)
931 mlx5e_encap_put_locked(priv, e);
932 return err;
933
934 out_err_init:
935 kfree(tun_info);
936 kfree(e);
937 return err;
938 }
939
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941 struct mlx5e_tc_flow *flow,
942 struct netlink_ext_ack *extack)
943 {
944 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946 struct mlx5_pkt_reformat_params reformat_params;
947 struct mlx5e_decap_entry *d;
948 struct mlx5e_decap_key key;
949 uintptr_t hash_key;
950 int err = 0;
951
952 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953 NL_SET_ERR_MSG_MOD(extack,
954 "encap header larger than max supported");
955 return -EOPNOTSUPP;
956 }
957
958 key.key = attr->eth;
959 hash_key = hash_decap_info(&key);
960 mutex_lock(&esw->offloads.decap_tbl_lock);
961 d = mlx5e_decap_get(priv, &key, hash_key);
962 if (d) {
963 mutex_unlock(&esw->offloads.decap_tbl_lock);
964 wait_for_completion(&d->res_ready);
965 mutex_lock(&esw->offloads.decap_tbl_lock);
966 if (d->compl_result) {
967 err = -EREMOTEIO;
968 goto out_free;
969 }
970 goto found;
971 }
972
973 d = kzalloc(sizeof(*d), GFP_KERNEL);
974 if (!d) {
975 err = -ENOMEM;
976 goto out_err;
977 }
978
979 d->key = key;
980 refcount_set(&d->refcnt, 1);
981 init_completion(&d->res_ready);
982 INIT_LIST_HEAD(&d->flows);
983 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984 mutex_unlock(&esw->offloads.decap_tbl_lock);
985
986 memset(&reformat_params, 0, sizeof(reformat_params));
987 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988 reformat_params.size = sizeof(attr->eth);
989 reformat_params.data = &attr->eth;
990 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991 &reformat_params,
992 MLX5_FLOW_NAMESPACE_FDB);
993 if (IS_ERR(d->pkt_reformat)) {
994 err = PTR_ERR(d->pkt_reformat);
995 d->compl_result = err;
996 }
997 mutex_lock(&esw->offloads.decap_tbl_lock);
998 complete_all(&d->res_ready);
999 if (err)
1000 goto out_free;
1001
1002 found:
1003 flow->decap_reformat = d;
1004 attr->decap_pkt_reformat = d->pkt_reformat;
1005 list_add(&flow->l3_to_l2_reformat, &d->flows);
1006 mutex_unlock(&esw->offloads.decap_tbl_lock);
1007 return 0;
1008
1009 out_free:
1010 mutex_unlock(&esw->offloads.decap_tbl_lock);
1011 mlx5e_decap_put(priv, d);
1012 return err;
1013
1014 out_err:
1015 mutex_unlock(&esw->offloads.decap_tbl_lock);
1016 return err;
1017 }
1018
mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack,bool * vf_tun)1019 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1020 struct mlx5e_tc_flow *flow,
1021 struct mlx5_flow_attr *attr,
1022 struct netlink_ext_ack *extack,
1023 bool *vf_tun)
1024 {
1025 struct mlx5e_tc_flow_parse_attr *parse_attr;
1026 struct mlx5_esw_flow_attr *esw_attr;
1027 struct net_device *encap_dev = NULL;
1028 struct mlx5e_rep_priv *rpriv;
1029 struct mlx5e_priv *out_priv;
1030 struct mlx5_eswitch *esw;
1031 int out_index;
1032 int err = 0;
1033
1034 parse_attr = attr->parse_attr;
1035 esw_attr = attr->esw_attr;
1036 *vf_tun = false;
1037
1038 esw = priv->mdev->priv.eswitch;
1039 mutex_lock(&esw->offloads.encap_tbl_lock);
1040 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1041 struct net_device *out_dev;
1042 int mirred_ifindex;
1043
1044 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1045 continue;
1046
1047 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1048 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1049 if (!out_dev) {
1050 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1051 err = -ENODEV;
1052 goto out;
1053 }
1054 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1055 extack, &encap_dev);
1056 dev_put(out_dev);
1057 if (err)
1058 goto out;
1059
1060 if (esw_attr->dests[out_index].flags &
1061 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1062 !esw_attr->dest_int_port)
1063 *vf_tun = true;
1064
1065 out_priv = netdev_priv(encap_dev);
1066 rpriv = out_priv->ppriv;
1067 esw_attr->dests[out_index].rep = rpriv->rep;
1068 esw_attr->dests[out_index].mdev = out_priv->mdev;
1069 }
1070
1071 if (*vf_tun && esw_attr->out_count > 1) {
1072 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1073 err = -EOPNOTSUPP;
1074 goto out;
1075 }
1076
1077 out:
1078 mutex_unlock(&esw->offloads.encap_tbl_lock);
1079 return err;
1080 }
1081
mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1082 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1083 struct mlx5e_tc_flow *flow,
1084 struct mlx5_flow_attr *attr)
1085 {
1086 struct mlx5_esw_flow_attr *esw_attr;
1087 int out_index;
1088
1089 if (!mlx5e_is_eswitch_flow(flow))
1090 return;
1091
1092 esw_attr = attr->esw_attr;
1093
1094 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1095 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1096 continue;
1097
1098 mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1099 kfree(attr->parse_attr->tun_info[out_index]);
1100 }
1101 }
1102
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)1103 static int cmp_route_info(struct mlx5e_route_key *a,
1104 struct mlx5e_route_key *b)
1105 {
1106 if (a->ip_version == 4 && b->ip_version == 4)
1107 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1108 sizeof(a->endpoint_ip.v4));
1109 else if (a->ip_version == 6 && b->ip_version == 6)
1110 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1111 sizeof(a->endpoint_ip.v6));
1112 return 1;
1113 }
1114
hash_route_info(struct mlx5e_route_key * key)1115 static u32 hash_route_info(struct mlx5e_route_key *key)
1116 {
1117 if (key->ip_version == 4)
1118 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1119 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1120 }
1121
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1122 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1123 struct mlx5e_route_entry *r)
1124 {
1125 WARN_ON(!list_empty(&r->decap_flows));
1126 WARN_ON(!list_empty(&r->encap_entries));
1127
1128 kfree_rcu(r, rcu);
1129 }
1130
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1131 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1132 {
1133 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1134
1135 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1136 return;
1137
1138 hash_del_rcu(&r->hlist);
1139 mutex_unlock(&esw->offloads.encap_tbl_lock);
1140
1141 mlx5e_route_dealloc(priv, r);
1142 }
1143
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1144 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1145 {
1146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1147
1148 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1149
1150 if (!refcount_dec_and_test(&r->refcnt))
1151 return;
1152 hash_del_rcu(&r->hlist);
1153 mlx5e_route_dealloc(priv, r);
1154 }
1155
1156 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1157 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1158 u32 hash_key)
1159 {
1160 struct mlx5e_route_key r_key;
1161 struct mlx5e_route_entry *r;
1162
1163 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1164 r_key = r->key;
1165 if (!cmp_route_info(&r_key, key) &&
1166 refcount_inc_not_zero(&r->refcnt))
1167 return r;
1168 }
1169 return NULL;
1170 }
1171
1172 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1173 mlx5e_route_get_create(struct mlx5e_priv *priv,
1174 struct mlx5e_route_key *key,
1175 int tunnel_dev_index,
1176 unsigned long *route_tbl_change_time)
1177 {
1178 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1179 struct mlx5_rep_uplink_priv *uplink_priv;
1180 struct mlx5e_rep_priv *uplink_rpriv;
1181 struct mlx5e_tc_tun_encap *encap;
1182 struct mlx5e_route_entry *r;
1183 u32 hash_key;
1184
1185 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1186 uplink_priv = &uplink_rpriv->uplink_priv;
1187 encap = uplink_priv->encap;
1188
1189 hash_key = hash_route_info(key);
1190 spin_lock_bh(&encap->route_lock);
1191 r = mlx5e_route_get(encap, key, hash_key);
1192 spin_unlock_bh(&encap->route_lock);
1193 if (r) {
1194 if (!mlx5e_route_entry_valid(r)) {
1195 mlx5e_route_put_locked(priv, r);
1196 return ERR_PTR(-EINVAL);
1197 }
1198 return r;
1199 }
1200
1201 r = kzalloc(sizeof(*r), GFP_KERNEL);
1202 if (!r)
1203 return ERR_PTR(-ENOMEM);
1204
1205 r->key = *key;
1206 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1207 r->tunnel_dev_index = tunnel_dev_index;
1208 refcount_set(&r->refcnt, 1);
1209 INIT_LIST_HEAD(&r->decap_flows);
1210 INIT_LIST_HEAD(&r->encap_entries);
1211
1212 spin_lock_bh(&encap->route_lock);
1213 *route_tbl_change_time = encap->route_tbl_last_update;
1214 hash_add(encap->route_tbl, &r->hlist, hash_key);
1215 spin_unlock_bh(&encap->route_lock);
1216
1217 return r;
1218 }
1219
1220 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1221 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1222 {
1223 u32 hash_key = hash_route_info(key);
1224 struct mlx5e_route_entry *r;
1225
1226 spin_lock_bh(&encap->route_lock);
1227 encap->route_tbl_last_update = jiffies;
1228 r = mlx5e_route_get(encap, key, hash_key);
1229 spin_unlock_bh(&encap->route_lock);
1230
1231 return r;
1232 }
1233
1234 struct mlx5e_tc_fib_event_data {
1235 struct work_struct work;
1236 unsigned long event;
1237 struct mlx5e_route_entry *r;
1238 struct net_device *ul_dev;
1239 };
1240
1241 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1242 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1243 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1244 {
1245 struct mlx5e_tc_fib_event_data *fib_work;
1246
1247 fib_work = kzalloc(sizeof(*fib_work), flags);
1248 if (WARN_ON(!fib_work))
1249 return NULL;
1250
1251 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1252 fib_work->event = event;
1253 fib_work->ul_dev = ul_dev;
1254
1255 return fib_work;
1256 }
1257
1258 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1259 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1260 struct mlx5e_route_entry *r,
1261 unsigned long event)
1262 {
1263 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1264 struct mlx5e_tc_fib_event_data *fib_work;
1265 struct mlx5e_rep_priv *uplink_rpriv;
1266 struct net_device *ul_dev;
1267
1268 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1269 ul_dev = uplink_rpriv->netdev;
1270
1271 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1272 if (!fib_work)
1273 return -ENOMEM;
1274
1275 dev_hold(ul_dev);
1276 refcount_inc(&r->refcnt);
1277 fib_work->r = r;
1278 queue_work(priv->wq, &fib_work->work);
1279
1280 return 0;
1281 }
1282
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1283 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1284 struct mlx5e_tc_flow *flow)
1285 {
1286 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1287 unsigned long tbl_time_before, tbl_time_after;
1288 struct mlx5e_tc_flow_parse_attr *parse_attr;
1289 struct mlx5_flow_attr *attr = flow->attr;
1290 struct mlx5_esw_flow_attr *esw_attr;
1291 struct mlx5e_route_entry *r;
1292 struct mlx5e_route_key key;
1293 int err = 0;
1294
1295 esw_attr = attr->esw_attr;
1296 parse_attr = attr->parse_attr;
1297 mutex_lock(&esw->offloads.encap_tbl_lock);
1298 if (!esw_attr->rx_tun_attr)
1299 goto out;
1300
1301 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1302 tbl_time_after = tbl_time_before;
1303 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1304 if (err || !esw_attr->rx_tun_attr->decap_vport)
1305 goto out;
1306
1307 key.ip_version = attr->tun_ip_version;
1308 if (key.ip_version == 4)
1309 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1310 else
1311 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1312
1313 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1314 &tbl_time_after);
1315 if (IS_ERR(r)) {
1316 err = PTR_ERR(r);
1317 goto out;
1318 }
1319 /* Routing changed concurrently. FIB event handler might have missed new
1320 * entry, schedule update.
1321 */
1322 if (tbl_time_before != tbl_time_after) {
1323 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1324 if (err) {
1325 mlx5e_route_put_locked(priv, r);
1326 goto out;
1327 }
1328 }
1329
1330 flow->decap_route = r;
1331 list_add(&flow->decap_routes, &r->decap_flows);
1332 mutex_unlock(&esw->offloads.encap_tbl_lock);
1333 return 0;
1334
1335 out:
1336 mutex_unlock(&esw->offloads.encap_tbl_lock);
1337 return err;
1338 }
1339
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1340 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1341 struct mlx5e_tc_flow *flow,
1342 struct mlx5_flow_attr *attr,
1343 struct mlx5e_encap_entry *e,
1344 bool new_encap_entry,
1345 unsigned long tbl_time_before,
1346 int out_index)
1347 {
1348 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1349 unsigned long tbl_time_after = tbl_time_before;
1350 struct mlx5e_tc_flow_parse_attr *parse_attr;
1351 const struct ip_tunnel_info *tun_info;
1352 struct mlx5_esw_flow_attr *esw_attr;
1353 struct mlx5e_route_entry *r;
1354 struct mlx5e_route_key key;
1355 unsigned short family;
1356 int err = 0;
1357
1358 esw_attr = attr->esw_attr;
1359 parse_attr = attr->parse_attr;
1360 tun_info = parse_attr->tun_info[out_index];
1361 family = ip_tunnel_info_af(tun_info);
1362
1363 if (family == AF_INET) {
1364 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1365 key.ip_version = 4;
1366 } else if (family == AF_INET6) {
1367 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1368 key.ip_version = 6;
1369 }
1370
1371 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1372 e->route_dev_ifindex, out_index);
1373 if (err || !(esw_attr->dests[out_index].flags &
1374 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1375 return err;
1376
1377 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1378 &tbl_time_after);
1379 if (IS_ERR(r))
1380 return PTR_ERR(r);
1381 /* Routing changed concurrently. FIB event handler might have missed new
1382 * entry, schedule update.
1383 */
1384 if (tbl_time_before != tbl_time_after) {
1385 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1386 if (err) {
1387 mlx5e_route_put_locked(priv, r);
1388 return err;
1389 }
1390 }
1391
1392 flow->encap_routes[out_index].r = r;
1393 if (new_encap_entry)
1394 list_add(&e->route_list, &r->encap_entries);
1395 flow->encap_routes[out_index].index = out_index;
1396 return 0;
1397 }
1398
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1399 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1400 struct mlx5e_tc_flow *flow)
1401 {
1402 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1403 struct mlx5e_route_entry *r = flow->decap_route;
1404
1405 if (!r)
1406 return;
1407
1408 mutex_lock(&esw->offloads.encap_tbl_lock);
1409 list_del(&flow->decap_routes);
1410 flow->decap_route = NULL;
1411
1412 if (!refcount_dec_and_test(&r->refcnt)) {
1413 mutex_unlock(&esw->offloads.encap_tbl_lock);
1414 return;
1415 }
1416 hash_del_rcu(&r->hlist);
1417 mutex_unlock(&esw->offloads.encap_tbl_lock);
1418
1419 mlx5e_route_dealloc(priv, r);
1420 }
1421
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1422 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1423 struct mlx5e_tc_flow *flow,
1424 int out_index)
1425 {
1426 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1427 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1428 struct mlx5e_encap_entry *e, *tmp;
1429
1430 if (!r)
1431 return;
1432
1433 mutex_lock(&esw->offloads.encap_tbl_lock);
1434 flow->encap_routes[out_index].r = NULL;
1435
1436 if (!refcount_dec_and_test(&r->refcnt)) {
1437 mutex_unlock(&esw->offloads.encap_tbl_lock);
1438 return;
1439 }
1440 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1441 list_del_init(&e->route_list);
1442 hash_del_rcu(&r->hlist);
1443 mutex_unlock(&esw->offloads.encap_tbl_lock);
1444
1445 mlx5e_route_dealloc(priv, r);
1446 }
1447
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1448 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1449 struct mlx5e_encap_entry *e,
1450 struct list_head *encap_flows)
1451 {
1452 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1453 struct mlx5e_tc_flow *flow;
1454
1455 list_for_each_entry(flow, encap_flows, tmp_list) {
1456 struct mlx5_esw_flow_attr *esw_attr;
1457 struct mlx5_flow_attr *attr;
1458
1459 if (!mlx5e_is_offloaded_flow(flow))
1460 continue;
1461
1462 attr = mlx5e_tc_get_encap_attr(flow);
1463 esw_attr = attr->esw_attr;
1464
1465 if (flow_flag_test(flow, SLOW)) {
1466 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1467 } else {
1468 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1469 mlx5e_tc_unoffload_flow_post_acts(flow);
1470 }
1471
1472 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1473 attr->modify_hdr = NULL;
1474
1475 esw_attr->dests[flow->tmp_entry_index].flags &=
1476 ~MLX5_ESW_DEST_ENCAP_VALID;
1477 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1478 }
1479
1480 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1481 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1482 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1483 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1484 e->pkt_reformat = NULL;
1485 }
1486 }
1487
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1488 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1489 struct net_device *tunnel_dev,
1490 struct mlx5e_encap_entry *e,
1491 struct list_head *encap_flows)
1492 {
1493 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1494 struct mlx5e_tc_flow *flow;
1495 int err;
1496
1497 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1498 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1499 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1500 if (err)
1501 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1502 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1503
1504 list_for_each_entry(flow, encap_flows, tmp_list) {
1505 struct mlx5e_tc_flow_parse_attr *parse_attr;
1506 struct mlx5_esw_flow_attr *esw_attr;
1507 struct mlx5_flow_handle *rule;
1508 struct mlx5_flow_attr *attr;
1509 struct mlx5_flow_spec *spec;
1510
1511 if (flow_flag_test(flow, FAILED))
1512 continue;
1513
1514 spec = &flow->attr->parse_attr->spec;
1515
1516 attr = mlx5e_tc_get_encap_attr(flow);
1517 esw_attr = attr->esw_attr;
1518 parse_attr = attr->parse_attr;
1519
1520 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1521 e->out_dev, e->route_dev_ifindex,
1522 flow->tmp_entry_index);
1523 if (err) {
1524 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1525 continue;
1526 }
1527
1528 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1529 if (err) {
1530 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1531 err);
1532 continue;
1533 }
1534
1535 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1536 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1537 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1538 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1539 goto offload_to_slow_path;
1540
1541 err = mlx5e_tc_offload_flow_post_acts(flow);
1542 if (err) {
1543 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1544 err);
1545 goto offload_to_slow_path;
1546 }
1547
1548 /* update from slow path rule to encap rule */
1549 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1550 if (IS_ERR(rule)) {
1551 mlx5e_tc_unoffload_flow_post_acts(flow);
1552 err = PTR_ERR(rule);
1553 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1554 err);
1555 } else {
1556 flow->rule[0] = rule;
1557 }
1558 } else {
1559 offload_to_slow_path:
1560 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1561 /* mark the flow's encap dest as non-valid */
1562 esw_attr->dests[flow->tmp_entry_index].flags &=
1563 ~MLX5_ESW_DEST_ENCAP_VALID;
1564
1565 if (IS_ERR(rule)) {
1566 err = PTR_ERR(rule);
1567 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1568 err);
1569 } else {
1570 flow->rule[0] = rule;
1571 }
1572 }
1573 flow_flag_set(flow, OFFLOADED);
1574 }
1575 }
1576
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1577 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1578 struct mlx5e_route_entry *r,
1579 struct list_head *flow_list,
1580 bool replace)
1581 {
1582 struct net_device *tunnel_dev;
1583 struct mlx5e_encap_entry *e;
1584
1585 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1586 if (!tunnel_dev)
1587 return -ENODEV;
1588
1589 list_for_each_entry(e, &r->encap_entries, route_list) {
1590 LIST_HEAD(encap_flows);
1591
1592 mlx5e_take_all_encap_flows(e, &encap_flows);
1593 if (list_empty(&encap_flows))
1594 continue;
1595
1596 if (mlx5e_route_entry_valid(r))
1597 mlx5e_invalidate_encap(priv, e, &encap_flows);
1598
1599 if (!replace) {
1600 list_splice(&encap_flows, flow_list);
1601 continue;
1602 }
1603
1604 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1605 list_splice(&encap_flows, flow_list);
1606 }
1607
1608 return 0;
1609 }
1610
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1611 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1612 struct list_head *flow_list)
1613 {
1614 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1615 struct mlx5e_tc_flow *flow;
1616
1617 list_for_each_entry(flow, flow_list, tmp_list)
1618 if (mlx5e_is_offloaded_flow(flow))
1619 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1620 }
1621
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1622 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1623 struct list_head *decap_flows)
1624 {
1625 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1626 struct mlx5e_tc_flow *flow;
1627
1628 list_for_each_entry(flow, decap_flows, tmp_list) {
1629 struct mlx5e_tc_flow_parse_attr *parse_attr;
1630 struct mlx5_flow_attr *attr = flow->attr;
1631 struct mlx5_flow_handle *rule;
1632 struct mlx5_flow_spec *spec;
1633 int err;
1634
1635 if (flow_flag_test(flow, FAILED))
1636 continue;
1637
1638 parse_attr = attr->parse_attr;
1639 spec = &parse_attr->spec;
1640 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1641 if (err) {
1642 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1643 err);
1644 continue;
1645 }
1646
1647 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1648 if (IS_ERR(rule)) {
1649 err = PTR_ERR(rule);
1650 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1651 err);
1652 } else {
1653 flow->rule[0] = rule;
1654 flow_flag_set(flow, OFFLOADED);
1655 }
1656 }
1657 }
1658
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1659 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1660 struct mlx5e_route_entry *r,
1661 struct list_head *flow_list,
1662 bool replace)
1663 {
1664 struct net_device *tunnel_dev;
1665 LIST_HEAD(decap_flows);
1666
1667 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1668 if (!tunnel_dev)
1669 return -ENODEV;
1670
1671 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1672 if (mlx5e_route_entry_valid(r))
1673 mlx5e_unoffload_flow_list(priv, &decap_flows);
1674 if (replace)
1675 mlx5e_reoffload_decap(priv, &decap_flows);
1676
1677 list_splice(&decap_flows, flow_list);
1678
1679 return 0;
1680 }
1681
mlx5e_tc_fib_event_work(struct work_struct * work)1682 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1683 {
1684 struct mlx5e_tc_fib_event_data *event_data =
1685 container_of(work, struct mlx5e_tc_fib_event_data, work);
1686 struct net_device *ul_dev = event_data->ul_dev;
1687 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1688 struct mlx5e_route_entry *r = event_data->r;
1689 struct mlx5_eswitch *esw;
1690 LIST_HEAD(flow_list);
1691 bool replace;
1692 int err;
1693
1694 /* sync with concurrent neigh updates */
1695 rtnl_lock();
1696 esw = priv->mdev->priv.eswitch;
1697 mutex_lock(&esw->offloads.encap_tbl_lock);
1698 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1699
1700 if (!mlx5e_route_entry_valid(r) && !replace)
1701 goto out;
1702
1703 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1704 if (err)
1705 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1706 err);
1707
1708 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1709 if (err)
1710 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1711 err);
1712
1713 if (replace)
1714 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1715 out:
1716 mutex_unlock(&esw->offloads.encap_tbl_lock);
1717 rtnl_unlock();
1718
1719 mlx5e_put_flow_list(priv, &flow_list);
1720 mlx5e_route_put(priv, event_data->r);
1721 dev_put(event_data->ul_dev);
1722 kfree(event_data);
1723 }
1724
1725 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1726 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1727 struct net_device *ul_dev,
1728 struct mlx5e_tc_tun_encap *encap,
1729 unsigned long event,
1730 struct fib_notifier_info *info)
1731 {
1732 struct fib_entry_notifier_info *fen_info;
1733 struct mlx5e_tc_fib_event_data *fib_work;
1734 struct mlx5e_route_entry *r;
1735 struct mlx5e_route_key key;
1736 struct net_device *fib_dev;
1737
1738 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1739 if (fen_info->fi->nh)
1740 return NULL;
1741 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1742 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1743 fen_info->dst_len != 32)
1744 return NULL;
1745
1746 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1747 if (!fib_work)
1748 return ERR_PTR(-ENOMEM);
1749
1750 key.endpoint_ip.v4 = htonl(fen_info->dst);
1751 key.ip_version = 4;
1752
1753 /* Can't fail after this point because releasing reference to r
1754 * requires obtaining sleeping mutex which we can't do in atomic
1755 * context.
1756 */
1757 r = mlx5e_route_lookup_for_update(encap, &key);
1758 if (!r)
1759 goto out;
1760 fib_work->r = r;
1761 dev_hold(ul_dev);
1762
1763 return fib_work;
1764
1765 out:
1766 kfree(fib_work);
1767 return NULL;
1768 }
1769
1770 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1771 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1772 struct net_device *ul_dev,
1773 struct mlx5e_tc_tun_encap *encap,
1774 unsigned long event,
1775 struct fib_notifier_info *info)
1776 {
1777 struct fib6_entry_notifier_info *fen_info;
1778 struct mlx5e_tc_fib_event_data *fib_work;
1779 struct mlx5e_route_entry *r;
1780 struct mlx5e_route_key key;
1781 struct net_device *fib_dev;
1782
1783 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1784 fib_dev = fib6_info_nh_dev(fen_info->rt);
1785 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1786 fen_info->rt->fib6_dst.plen != 128)
1787 return NULL;
1788
1789 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1790 if (!fib_work)
1791 return ERR_PTR(-ENOMEM);
1792
1793 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1794 sizeof(fen_info->rt->fib6_dst.addr));
1795 key.ip_version = 6;
1796
1797 /* Can't fail after this point because releasing reference to r
1798 * requires obtaining sleeping mutex which we can't do in atomic
1799 * context.
1800 */
1801 r = mlx5e_route_lookup_for_update(encap, &key);
1802 if (!r)
1803 goto out;
1804 fib_work->r = r;
1805 dev_hold(ul_dev);
1806
1807 return fib_work;
1808
1809 out:
1810 kfree(fib_work);
1811 return NULL;
1812 }
1813
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1814 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1815 {
1816 struct mlx5e_tc_fib_event_data *fib_work;
1817 struct fib_notifier_info *info = ptr;
1818 struct mlx5e_tc_tun_encap *encap;
1819 struct net_device *ul_dev;
1820 struct mlx5e_priv *priv;
1821
1822 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1823 priv = encap->priv;
1824 ul_dev = priv->netdev;
1825 priv = netdev_priv(ul_dev);
1826
1827 switch (event) {
1828 case FIB_EVENT_ENTRY_REPLACE:
1829 case FIB_EVENT_ENTRY_DEL:
1830 if (info->family == AF_INET)
1831 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1832 else if (info->family == AF_INET6)
1833 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1834 else
1835 return NOTIFY_DONE;
1836
1837 if (!IS_ERR_OR_NULL(fib_work)) {
1838 queue_work(priv->wq, &fib_work->work);
1839 } else if (IS_ERR(fib_work)) {
1840 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1841 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1842 PTR_ERR(fib_work));
1843 }
1844
1845 break;
1846 default:
1847 return NOTIFY_DONE;
1848 }
1849
1850 return NOTIFY_DONE;
1851 }
1852
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1853 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1854 {
1855 struct mlx5e_tc_tun_encap *encap;
1856 int err;
1857
1858 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1859 if (!encap)
1860 return ERR_PTR(-ENOMEM);
1861
1862 encap->priv = priv;
1863 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1864 spin_lock_init(&encap->route_lock);
1865 hash_init(encap->route_tbl);
1866 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1867 NULL, NULL);
1868 if (err) {
1869 kvfree(encap);
1870 return ERR_PTR(err);
1871 }
1872
1873 return encap;
1874 }
1875
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1876 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1877 {
1878 if (!encap)
1879 return;
1880
1881 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1882 flush_workqueue(encap->priv->wq); /* flush fib event works */
1883 kvfree(encap);
1884 }
1885