1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
7 #include "en_tc.h"
8 #include "tc_tun.h"
9 #include "rep/tc.h"
10 #include "diag/en_tc_tracepoint.h"
11 
12 enum {
13 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
14 };
15 
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
17 				     struct mlx5_flow_attr *attr,
18 				     struct mlx5e_encap_entry *e,
19 				     int out_index)
20 {
21 	struct net_device *route_dev;
22 	int err = 0;
23 
24 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
25 
26 	if (!route_dev || !netif_is_ovs_master(route_dev))
27 		goto out;
28 
29 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
30 						MLX5E_TC_INT_PORT_EGRESS,
31 						&attr->action, out_index);
32 
33 out:
34 	if (route_dev)
35 		dev_put(route_dev);
36 
37 	return err;
38 }
39 
40 struct mlx5e_route_key {
41 	int ip_version;
42 	union {
43 		__be32 v4;
44 		struct in6_addr v6;
45 	} endpoint_ip;
46 };
47 
48 struct mlx5e_route_entry {
49 	struct mlx5e_route_key key;
50 	struct list_head encap_entries;
51 	struct list_head decap_flows;
52 	u32 flags;
53 	struct hlist_node hlist;
54 	refcount_t refcnt;
55 	int tunnel_dev_index;
56 	struct rcu_head rcu;
57 };
58 
59 struct mlx5e_tc_tun_encap {
60 	struct mlx5e_priv *priv;
61 	struct notifier_block fib_nb;
62 	spinlock_t route_lock; /* protects route_tbl */
63 	unsigned long route_tbl_last_update;
64 	DECLARE_HASHTABLE(route_tbl, 8);
65 };
66 
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
68 {
69 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
70 }
71 
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
73 			     struct mlx5_flow_spec *spec)
74 {
75 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
76 	struct mlx5_rx_tun_attr *tun_attr;
77 	void *daddr, *saddr;
78 	u8 ip_version;
79 
80 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
81 	if (!tun_attr)
82 		return -ENOMEM;
83 
84 	esw_attr->rx_tun_attr = tun_attr;
85 	ip_version = mlx5e_tc_get_ip_version(spec, true);
86 
87 	if (ip_version == 4) {
88 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
89 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
90 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
92 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
93 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
94 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
95 			return 0;
96 	}
97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
98 	else if (ip_version == 6) {
99 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
100 		struct in6_addr zerov6 = {};
101 
102 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108 		if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
109 		    !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
110 			return 0;
111 	}
112 #endif
113 	/* Only set the flag if both src and dst ip addresses exist. They are
114 	 * required to establish routing.
115 	 */
116 	flow_flag_set(flow, TUN_RX);
117 	flow->attr->tun_ip_version = ip_version;
118 	return 0;
119 }
120 
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123 	bool all_flow_encaps_valid = true;
124 	int i;
125 
126 	/* Flow can be associated with multiple encap entries.
127 	 * Before offloading the flow verify that all of them have
128 	 * a valid neighbour.
129 	 */
130 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132 			continue;
133 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134 			all_flow_encaps_valid = false;
135 			break;
136 		}
137 	}
138 
139 	return all_flow_encaps_valid;
140 }
141 
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143 			      struct mlx5e_encap_entry *e,
144 			      struct list_head *flow_list)
145 {
146 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147 	struct mlx5_pkt_reformat_params reformat_params;
148 	struct mlx5_esw_flow_attr *esw_attr;
149 	struct mlx5_flow_handle *rule;
150 	struct mlx5_flow_attr *attr;
151 	struct mlx5_flow_spec *spec;
152 	struct mlx5e_tc_flow *flow;
153 	int err;
154 
155 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156 		return;
157 
158 	memset(&reformat_params, 0, sizeof(reformat_params));
159 	reformat_params.type = e->reformat_type;
160 	reformat_params.size = e->encap_size;
161 	reformat_params.data = e->encap_header;
162 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163 						     &reformat_params,
164 						     MLX5_FLOW_NAMESPACE_FDB);
165 	if (IS_ERR(e->pkt_reformat)) {
166 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167 			       PTR_ERR(e->pkt_reformat));
168 		return;
169 	}
170 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
171 	mlx5e_rep_queue_neigh_stats_work(priv);
172 
173 	list_for_each_entry(flow, flow_list, tmp_list) {
174 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175 			continue;
176 
177 		spec = &flow->attr->parse_attr->spec;
178 
179 		attr = mlx5e_tc_get_encap_attr(flow);
180 		esw_attr = attr->esw_attr;
181 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183 
184 		/* Do not offload flows with unresolved neighbors */
185 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186 			continue;
187 
188 		err = mlx5e_tc_offload_flow_post_acts(flow);
189 		if (err) {
190 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191 				       err);
192 			continue;
193 		}
194 
195 		/* update from slow path rule to encap rule */
196 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197 		if (IS_ERR(rule)) {
198 			mlx5e_tc_unoffload_flow_post_acts(flow);
199 			err = PTR_ERR(rule);
200 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201 				       err);
202 			continue;
203 		}
204 
205 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
206 		flow->rule[0] = rule;
207 		/* was unset when slow path rule removed */
208 		flow_flag_set(flow, OFFLOADED);
209 	}
210 }
211 
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213 			      struct mlx5e_encap_entry *e,
214 			      struct list_head *flow_list)
215 {
216 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217 	struct mlx5_esw_flow_attr *esw_attr;
218 	struct mlx5_flow_handle *rule;
219 	struct mlx5_flow_attr *attr;
220 	struct mlx5_flow_spec *spec;
221 	struct mlx5e_tc_flow *flow;
222 	int err;
223 
224 	list_for_each_entry(flow, flow_list, tmp_list) {
225 		if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
226 			continue;
227 
228 		attr = mlx5e_tc_get_encap_attr(flow);
229 		esw_attr = attr->esw_attr;
230 		/* mark the flow's encap dest as non-valid */
231 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233 
234 		/* update from encap rule to slow path rule */
235 		spec = &flow->attr->parse_attr->spec;
236 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
237 
238 		if (IS_ERR(rule)) {
239 			err = PTR_ERR(rule);
240 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
241 				       err);
242 			continue;
243 		}
244 
245 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
246 		mlx5e_tc_unoffload_flow_post_acts(flow);
247 		flow->rule[0] = rule;
248 		/* was unset when fast path rule removed */
249 		flow_flag_set(flow, OFFLOADED);
250 	}
251 
252 	/* we know that the encap is valid */
253 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
254 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
255 	e->pkt_reformat = NULL;
256 }
257 
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)258 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
259 				struct list_head *flow_list,
260 				int index)
261 {
262 	if (IS_ERR(mlx5e_flow_get(flow))) {
263 		/* Flow is being deleted concurrently. Wait for it to be
264 		 * unoffloaded from hardware, otherwise deleting encap will
265 		 * fail.
266 		 */
267 		wait_for_completion(&flow->del_hw_done);
268 		return;
269 	}
270 	wait_for_completion(&flow->init_done);
271 
272 	flow->tmp_entry_index = index;
273 	list_add(&flow->tmp_list, flow_list);
274 }
275 
276 /* Takes reference to all flows attached to encap and adds the flows to
277  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
278  */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)279 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
280 {
281 	struct encap_flow_item *efi;
282 	struct mlx5e_tc_flow *flow;
283 
284 	list_for_each_entry(efi, &e->flows, list) {
285 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
286 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
287 	}
288 }
289 
290 /* Takes reference to all flows attached to route and adds the flows to
291  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
292  */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)293 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
294 					     struct list_head *flow_list)
295 {
296 	struct mlx5e_tc_flow *flow;
297 
298 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
299 		mlx5e_take_tmp_flow(flow, flow_list, 0);
300 }
301 
302 typedef bool (match_cb)(struct mlx5e_encap_entry *);
303 
304 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)305 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
306 			      struct mlx5e_encap_entry *e,
307 			      match_cb match)
308 {
309 	struct mlx5e_encap_entry *next = NULL;
310 
311 retry:
312 	rcu_read_lock();
313 
314 	/* find encap with non-zero reference counter value */
315 	for (next = e ?
316 		     list_next_or_null_rcu(&nhe->encap_list,
317 					   &e->encap_list,
318 					   struct mlx5e_encap_entry,
319 					   encap_list) :
320 		     list_first_or_null_rcu(&nhe->encap_list,
321 					    struct mlx5e_encap_entry,
322 					    encap_list);
323 	     next;
324 	     next = list_next_or_null_rcu(&nhe->encap_list,
325 					  &next->encap_list,
326 					  struct mlx5e_encap_entry,
327 					  encap_list))
328 		if (mlx5e_encap_take(next))
329 			break;
330 
331 	rcu_read_unlock();
332 
333 	/* release starting encap */
334 	if (e)
335 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
336 	if (!next)
337 		return next;
338 
339 	/* wait for encap to be fully initialized */
340 	wait_for_completion(&next->res_ready);
341 	/* continue searching if encap entry is not in valid state after completion */
342 	if (!match(next)) {
343 		e = next;
344 		goto retry;
345 	}
346 
347 	return next;
348 }
349 
mlx5e_encap_valid(struct mlx5e_encap_entry * e)350 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
351 {
352 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
353 }
354 
355 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)356 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
357 			   struct mlx5e_encap_entry *e)
358 {
359 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
360 }
361 
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)362 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
363 {
364 	return e->compl_result >= 0;
365 }
366 
367 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)368 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
369 			  struct mlx5e_encap_entry *e)
370 {
371 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
372 }
373 
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)374 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
375 {
376 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
377 	struct mlx5e_encap_entry *e = NULL;
378 	struct mlx5e_tc_flow *flow;
379 	struct mlx5_fc *counter;
380 	struct neigh_table *tbl;
381 	bool neigh_used = false;
382 	struct neighbour *n;
383 	u64 lastuse;
384 
385 	if (m_neigh->family == AF_INET)
386 		tbl = &arp_tbl;
387 #if IS_ENABLED(CONFIG_IPV6)
388 	else if (m_neigh->family == AF_INET6)
389 		tbl = ipv6_stub->nd_tbl;
390 #endif
391 	else
392 		return;
393 
394 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
395 	 * next one.
396 	 */
397 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
398 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
399 		struct encap_flow_item *efi, *tmp;
400 		struct mlx5_eswitch *esw;
401 		LIST_HEAD(flow_list);
402 
403 		esw = priv->mdev->priv.eswitch;
404 		mutex_lock(&esw->offloads.encap_tbl_lock);
405 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
406 			flow = container_of(efi, struct mlx5e_tc_flow,
407 					    encaps[efi->index]);
408 			if (IS_ERR(mlx5e_flow_get(flow)))
409 				continue;
410 			list_add(&flow->tmp_list, &flow_list);
411 
412 			if (mlx5e_is_offloaded_flow(flow)) {
413 				counter = mlx5e_tc_get_counter(flow);
414 				lastuse = mlx5_fc_query_lastuse(counter);
415 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
416 					neigh_used = true;
417 					break;
418 				}
419 			}
420 		}
421 		mutex_unlock(&esw->offloads.encap_tbl_lock);
422 
423 		mlx5e_put_flow_list(priv, &flow_list);
424 		if (neigh_used) {
425 			/* release current encap before breaking the loop */
426 			mlx5e_encap_put(priv, e);
427 			break;
428 		}
429 	}
430 
431 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
432 
433 	if (neigh_used) {
434 		nhe->reported_lastuse = jiffies;
435 
436 		/* find the relevant neigh according to the cached device and
437 		 * dst ip pair
438 		 */
439 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
440 		if (!n)
441 			return;
442 
443 		neigh_event_send(n, NULL);
444 		neigh_release(n);
445 	}
446 }
447 
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)448 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
449 {
450 	WARN_ON(!list_empty(&e->flows));
451 
452 	if (e->compl_result > 0) {
453 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
454 
455 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
456 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
457 	}
458 
459 	kfree(e->tun_info);
460 	kfree(e->encap_header);
461 	kfree_rcu(e, rcu);
462 }
463 
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)464 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
465 				struct mlx5e_decap_entry *d)
466 {
467 	WARN_ON(!list_empty(&d->flows));
468 
469 	if (!d->compl_result)
470 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
471 
472 	kfree_rcu(d, rcu);
473 }
474 
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)475 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
476 {
477 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
478 
479 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
480 		return;
481 	list_del(&e->route_list);
482 	hash_del_rcu(&e->encap_hlist);
483 	mutex_unlock(&esw->offloads.encap_tbl_lock);
484 
485 	mlx5e_encap_dealloc(priv, e);
486 }
487 
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)488 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
489 {
490 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
491 
492 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
493 		return;
494 	hash_del_rcu(&d->hlist);
495 	mutex_unlock(&esw->offloads.decap_tbl_lock);
496 
497 	mlx5e_decap_dealloc(priv, d);
498 }
499 
500 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
501 				     struct mlx5e_tc_flow *flow,
502 				     int out_index);
503 
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)504 void mlx5e_detach_encap(struct mlx5e_priv *priv,
505 			struct mlx5e_tc_flow *flow,
506 			struct mlx5_flow_attr *attr,
507 			int out_index)
508 {
509 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
510 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
511 
512 	if (!mlx5e_is_eswitch_flow(flow))
513 		return;
514 
515 	if (attr->esw_attr->dests[out_index].flags &
516 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
517 		mlx5e_detach_encap_route(priv, flow, out_index);
518 
519 	/* flow wasn't fully initialized */
520 	if (!e)
521 		return;
522 
523 	mutex_lock(&esw->offloads.encap_tbl_lock);
524 	list_del(&flow->encaps[out_index].list);
525 	flow->encaps[out_index].e = NULL;
526 	if (!refcount_dec_and_test(&e->refcnt)) {
527 		mutex_unlock(&esw->offloads.encap_tbl_lock);
528 		return;
529 	}
530 	list_del(&e->route_list);
531 	hash_del_rcu(&e->encap_hlist);
532 	mutex_unlock(&esw->offloads.encap_tbl_lock);
533 
534 	mlx5e_encap_dealloc(priv, e);
535 }
536 
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)537 void mlx5e_detach_decap(struct mlx5e_priv *priv,
538 			struct mlx5e_tc_flow *flow)
539 {
540 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
541 	struct mlx5e_decap_entry *d = flow->decap_reformat;
542 
543 	if (!d)
544 		return;
545 
546 	mutex_lock(&esw->offloads.decap_tbl_lock);
547 	list_del(&flow->l3_to_l2_reformat);
548 	flow->decap_reformat = NULL;
549 
550 	if (!refcount_dec_and_test(&d->refcnt)) {
551 		mutex_unlock(&esw->offloads.decap_tbl_lock);
552 		return;
553 	}
554 	hash_del_rcu(&d->hlist);
555 	mutex_unlock(&esw->offloads.decap_tbl_lock);
556 
557 	mlx5e_decap_dealloc(priv, d);
558 }
559 
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)560 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
561 					   struct mlx5e_encap_key *b)
562 {
563 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
564 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
565 }
566 
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)567 static int cmp_decap_info(struct mlx5e_decap_key *a,
568 			  struct mlx5e_decap_key *b)
569 {
570 	return memcmp(&a->key, &b->key, sizeof(b->key));
571 }
572 
hash_encap_info(struct mlx5e_encap_key * key)573 static int hash_encap_info(struct mlx5e_encap_key *key)
574 {
575 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
576 		     key->tc_tunnel->tunnel_type);
577 }
578 
hash_decap_info(struct mlx5e_decap_key * key)579 static int hash_decap_info(struct mlx5e_decap_key *key)
580 {
581 	return jhash(&key->key, sizeof(key->key), 0);
582 }
583 
mlx5e_encap_take(struct mlx5e_encap_entry * e)584 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
585 {
586 	return refcount_inc_not_zero(&e->refcnt);
587 }
588 
mlx5e_decap_take(struct mlx5e_decap_entry * e)589 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
590 {
591 	return refcount_inc_not_zero(&e->refcnt);
592 }
593 
594 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)595 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
596 		uintptr_t hash_key)
597 {
598 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
599 	struct mlx5e_encap_key e_key;
600 	struct mlx5e_encap_entry *e;
601 
602 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
603 				   encap_hlist, hash_key) {
604 		e_key.ip_tun_key = &e->tun_info->key;
605 		e_key.tc_tunnel = e->tunnel;
606 		if (e->tunnel->encap_info_equal(&e_key, key) &&
607 		    mlx5e_encap_take(e))
608 			return e;
609 	}
610 
611 	return NULL;
612 }
613 
614 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)615 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
616 		uintptr_t hash_key)
617 {
618 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
619 	struct mlx5e_decap_key r_key;
620 	struct mlx5e_decap_entry *e;
621 
622 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
623 				   hlist, hash_key) {
624 		r_key = e->key;
625 		if (!cmp_decap_info(&r_key, key) &&
626 		    mlx5e_decap_take(e))
627 			return e;
628 	}
629 	return NULL;
630 }
631 
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)632 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
633 {
634 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
635 
636 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
637 }
638 
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)639 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
640 				      struct mlx5e_tc_flow *flow,
641 				      int out_index,
642 				      struct mlx5e_encap_entry *e,
643 				      struct netlink_ext_ack *extack)
644 {
645 	int i;
646 
647 	for (i = 0; i < out_index; i++) {
648 		if (flow->encaps[i].e != e)
649 			continue;
650 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
651 		netdev_err(priv->netdev, "can't duplicate encap action\n");
652 		return true;
653 	}
654 
655 	return false;
656 }
657 
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)658 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
659 			       struct mlx5_flow_attr *attr,
660 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
661 			       struct net_device *out_dev,
662 			       int route_dev_ifindex,
663 			       int out_index)
664 {
665 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
666 	struct net_device *route_dev;
667 	u16 vport_num;
668 	int err = 0;
669 	u32 data;
670 
671 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
672 
673 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
674 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
675 		goto out;
676 
677 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
678 	if (err)
679 		goto out;
680 
681 	attr->dest_chain = 0;
682 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
683 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
684 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
685 						       vport_num);
686 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
687 						   MLX5_FLOW_NAMESPACE_FDB,
688 						   VPORT_TO_REG, data);
689 	if (err >= 0) {
690 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
691 		err = 0;
692 	}
693 
694 out:
695 	if (route_dev)
696 		dev_put(route_dev);
697 	return err;
698 }
699 
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)700 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
701 				  struct mlx5_esw_flow_attr *attr,
702 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
703 				  struct net_device *out_dev,
704 				  int route_dev_ifindex,
705 				  int out_index)
706 {
707 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
708 	struct net_device *route_dev;
709 	u16 vport_num;
710 	int err = 0;
711 	u32 data;
712 
713 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
714 
715 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
716 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
717 		err = -ENODEV;
718 		goto out;
719 	}
720 
721 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
722 	if (err)
723 		goto out;
724 
725 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
726 						       vport_num);
727 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
728 
729 out:
730 	if (route_dev)
731 		dev_put(route_dev);
732 	return err;
733 }
734 
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)735 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
736 {
737 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
738 	struct mlx5_rep_uplink_priv *uplink_priv;
739 	struct mlx5e_rep_priv *uplink_rpriv;
740 	struct mlx5e_tc_tun_encap *encap;
741 	unsigned int ret;
742 
743 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
744 	uplink_priv = &uplink_rpriv->uplink_priv;
745 	encap = uplink_priv->encap;
746 
747 	spin_lock_bh(&encap->route_lock);
748 	ret = encap->route_tbl_last_update;
749 	spin_unlock_bh(&encap->route_lock);
750 	return ret;
751 }
752 
753 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
754 				    struct mlx5e_tc_flow *flow,
755 				    struct mlx5_flow_attr *attr,
756 				    struct mlx5e_encap_entry *e,
757 				    bool new_encap_entry,
758 				    unsigned long tbl_time_before,
759 				    int out_index);
760 
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)761 int mlx5e_attach_encap(struct mlx5e_priv *priv,
762 		       struct mlx5e_tc_flow *flow,
763 		       struct mlx5_flow_attr *attr,
764 		       struct net_device *mirred_dev,
765 		       int out_index,
766 		       struct netlink_ext_ack *extack,
767 		       struct net_device **encap_dev)
768 {
769 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
770 	struct mlx5e_tc_flow_parse_attr *parse_attr;
771 	const struct ip_tunnel_info *tun_info;
772 	const struct mlx5e_mpls_info *mpls_info;
773 	unsigned long tbl_time_before = 0;
774 	struct mlx5e_encap_entry *e;
775 	struct mlx5e_encap_key key;
776 	bool entry_created = false;
777 	unsigned short family;
778 	uintptr_t hash_key;
779 	int err = 0;
780 
781 	parse_attr = attr->parse_attr;
782 	tun_info = parse_attr->tun_info[out_index];
783 	mpls_info = &parse_attr->mpls_info[out_index];
784 	family = ip_tunnel_info_af(tun_info);
785 	key.ip_tun_key = &tun_info->key;
786 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
787 	if (!key.tc_tunnel) {
788 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
789 		return -EOPNOTSUPP;
790 	}
791 
792 	hash_key = hash_encap_info(&key);
793 
794 	mutex_lock(&esw->offloads.encap_tbl_lock);
795 	e = mlx5e_encap_get(priv, &key, hash_key);
796 
797 	/* must verify if encap is valid or not */
798 	if (e) {
799 		/* Check that entry was not already attached to this flow */
800 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
801 			err = -EOPNOTSUPP;
802 			goto out_err;
803 		}
804 
805 		mutex_unlock(&esw->offloads.encap_tbl_lock);
806 		wait_for_completion(&e->res_ready);
807 
808 		/* Protect against concurrent neigh update. */
809 		mutex_lock(&esw->offloads.encap_tbl_lock);
810 		if (e->compl_result < 0) {
811 			err = -EREMOTEIO;
812 			goto out_err;
813 		}
814 		goto attach_flow;
815 	}
816 
817 	e = kzalloc(sizeof(*e), GFP_KERNEL);
818 	if (!e) {
819 		err = -ENOMEM;
820 		goto out_err;
821 	}
822 
823 	refcount_set(&e->refcnt, 1);
824 	init_completion(&e->res_ready);
825 	entry_created = true;
826 	INIT_LIST_HEAD(&e->route_list);
827 
828 	tun_info = mlx5e_dup_tun_info(tun_info);
829 	if (!tun_info) {
830 		err = -ENOMEM;
831 		goto out_err_init;
832 	}
833 	e->tun_info = tun_info;
834 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
835 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
836 	if (err)
837 		goto out_err_init;
838 
839 	INIT_LIST_HEAD(&e->flows);
840 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
841 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
842 	mutex_unlock(&esw->offloads.encap_tbl_lock);
843 
844 	if (family == AF_INET)
845 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
846 	else if (family == AF_INET6)
847 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
848 
849 	/* Protect against concurrent neigh update. */
850 	mutex_lock(&esw->offloads.encap_tbl_lock);
851 	complete_all(&e->res_ready);
852 	if (err) {
853 		e->compl_result = err;
854 		goto out_err;
855 	}
856 	e->compl_result = 1;
857 
858 attach_flow:
859 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
860 				       tbl_time_before, out_index);
861 	if (err)
862 		goto out_err;
863 
864 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
865 	if (err == -EOPNOTSUPP) {
866 		/* If device doesn't support int port offload,
867 		 * redirect to uplink vport.
868 		 */
869 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
870 		err = 0;
871 	} else if (err) {
872 		goto out_err;
873 	}
874 
875 	flow->encaps[out_index].e = e;
876 	list_add(&flow->encaps[out_index].list, &e->flows);
877 	flow->encaps[out_index].index = out_index;
878 	*encap_dev = e->out_dev;
879 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
880 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
881 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
882 	} else {
883 		flow_flag_set(flow, SLOW);
884 	}
885 	mutex_unlock(&esw->offloads.encap_tbl_lock);
886 
887 	return err;
888 
889 out_err:
890 	mutex_unlock(&esw->offloads.encap_tbl_lock);
891 	if (e)
892 		mlx5e_encap_put(priv, e);
893 	return err;
894 
895 out_err_init:
896 	mutex_unlock(&esw->offloads.encap_tbl_lock);
897 	kfree(tun_info);
898 	kfree(e);
899 	return err;
900 }
901 
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)902 int mlx5e_attach_decap(struct mlx5e_priv *priv,
903 		       struct mlx5e_tc_flow *flow,
904 		       struct netlink_ext_ack *extack)
905 {
906 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
907 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
908 	struct mlx5_pkt_reformat_params reformat_params;
909 	struct mlx5e_decap_entry *d;
910 	struct mlx5e_decap_key key;
911 	uintptr_t hash_key;
912 	int err = 0;
913 
914 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
915 		NL_SET_ERR_MSG_MOD(extack,
916 				   "encap header larger than max supported");
917 		return -EOPNOTSUPP;
918 	}
919 
920 	key.key = attr->eth;
921 	hash_key = hash_decap_info(&key);
922 	mutex_lock(&esw->offloads.decap_tbl_lock);
923 	d = mlx5e_decap_get(priv, &key, hash_key);
924 	if (d) {
925 		mutex_unlock(&esw->offloads.decap_tbl_lock);
926 		wait_for_completion(&d->res_ready);
927 		mutex_lock(&esw->offloads.decap_tbl_lock);
928 		if (d->compl_result) {
929 			err = -EREMOTEIO;
930 			goto out_free;
931 		}
932 		goto found;
933 	}
934 
935 	d = kzalloc(sizeof(*d), GFP_KERNEL);
936 	if (!d) {
937 		err = -ENOMEM;
938 		goto out_err;
939 	}
940 
941 	d->key = key;
942 	refcount_set(&d->refcnt, 1);
943 	init_completion(&d->res_ready);
944 	INIT_LIST_HEAD(&d->flows);
945 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
946 	mutex_unlock(&esw->offloads.decap_tbl_lock);
947 
948 	memset(&reformat_params, 0, sizeof(reformat_params));
949 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
950 	reformat_params.size = sizeof(attr->eth);
951 	reformat_params.data = &attr->eth;
952 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
953 						     &reformat_params,
954 						     MLX5_FLOW_NAMESPACE_FDB);
955 	if (IS_ERR(d->pkt_reformat)) {
956 		err = PTR_ERR(d->pkt_reformat);
957 		d->compl_result = err;
958 	}
959 	mutex_lock(&esw->offloads.decap_tbl_lock);
960 	complete_all(&d->res_ready);
961 	if (err)
962 		goto out_free;
963 
964 found:
965 	flow->decap_reformat = d;
966 	attr->decap_pkt_reformat = d->pkt_reformat;
967 	list_add(&flow->l3_to_l2_reformat, &d->flows);
968 	mutex_unlock(&esw->offloads.decap_tbl_lock);
969 	return 0;
970 
971 out_free:
972 	mutex_unlock(&esw->offloads.decap_tbl_lock);
973 	mlx5e_decap_put(priv, d);
974 	return err;
975 
976 out_err:
977 	mutex_unlock(&esw->offloads.decap_tbl_lock);
978 	return err;
979 }
980 
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)981 static int cmp_route_info(struct mlx5e_route_key *a,
982 			  struct mlx5e_route_key *b)
983 {
984 	if (a->ip_version == 4 && b->ip_version == 4)
985 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
986 			      sizeof(a->endpoint_ip.v4));
987 	else if (a->ip_version == 6 && b->ip_version == 6)
988 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
989 			      sizeof(a->endpoint_ip.v6));
990 	return 1;
991 }
992 
hash_route_info(struct mlx5e_route_key * key)993 static u32 hash_route_info(struct mlx5e_route_key *key)
994 {
995 	if (key->ip_version == 4)
996 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
997 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
998 }
999 
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1000 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1001 				struct mlx5e_route_entry *r)
1002 {
1003 	WARN_ON(!list_empty(&r->decap_flows));
1004 	WARN_ON(!list_empty(&r->encap_entries));
1005 
1006 	kfree_rcu(r, rcu);
1007 }
1008 
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1009 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1010 {
1011 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1012 
1013 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1014 		return;
1015 
1016 	hash_del_rcu(&r->hlist);
1017 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1018 
1019 	mlx5e_route_dealloc(priv, r);
1020 }
1021 
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1022 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1023 {
1024 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1025 
1026 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1027 
1028 	if (!refcount_dec_and_test(&r->refcnt))
1029 		return;
1030 	hash_del_rcu(&r->hlist);
1031 	mlx5e_route_dealloc(priv, r);
1032 }
1033 
1034 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1035 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1036 		u32 hash_key)
1037 {
1038 	struct mlx5e_route_key r_key;
1039 	struct mlx5e_route_entry *r;
1040 
1041 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1042 		r_key = r->key;
1043 		if (!cmp_route_info(&r_key, key) &&
1044 		    refcount_inc_not_zero(&r->refcnt))
1045 			return r;
1046 	}
1047 	return NULL;
1048 }
1049 
1050 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1051 mlx5e_route_get_create(struct mlx5e_priv *priv,
1052 		       struct mlx5e_route_key *key,
1053 		       int tunnel_dev_index,
1054 		       unsigned long *route_tbl_change_time)
1055 {
1056 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1057 	struct mlx5_rep_uplink_priv *uplink_priv;
1058 	struct mlx5e_rep_priv *uplink_rpriv;
1059 	struct mlx5e_tc_tun_encap *encap;
1060 	struct mlx5e_route_entry *r;
1061 	u32 hash_key;
1062 
1063 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1064 	uplink_priv = &uplink_rpriv->uplink_priv;
1065 	encap = uplink_priv->encap;
1066 
1067 	hash_key = hash_route_info(key);
1068 	spin_lock_bh(&encap->route_lock);
1069 	r = mlx5e_route_get(encap, key, hash_key);
1070 	spin_unlock_bh(&encap->route_lock);
1071 	if (r) {
1072 		if (!mlx5e_route_entry_valid(r)) {
1073 			mlx5e_route_put_locked(priv, r);
1074 			return ERR_PTR(-EINVAL);
1075 		}
1076 		return r;
1077 	}
1078 
1079 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1080 	if (!r)
1081 		return ERR_PTR(-ENOMEM);
1082 
1083 	r->key = *key;
1084 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1085 	r->tunnel_dev_index = tunnel_dev_index;
1086 	refcount_set(&r->refcnt, 1);
1087 	INIT_LIST_HEAD(&r->decap_flows);
1088 	INIT_LIST_HEAD(&r->encap_entries);
1089 
1090 	spin_lock_bh(&encap->route_lock);
1091 	*route_tbl_change_time = encap->route_tbl_last_update;
1092 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1093 	spin_unlock_bh(&encap->route_lock);
1094 
1095 	return r;
1096 }
1097 
1098 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1099 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1100 {
1101 	u32 hash_key = hash_route_info(key);
1102 	struct mlx5e_route_entry *r;
1103 
1104 	spin_lock_bh(&encap->route_lock);
1105 	encap->route_tbl_last_update = jiffies;
1106 	r = mlx5e_route_get(encap, key, hash_key);
1107 	spin_unlock_bh(&encap->route_lock);
1108 
1109 	return r;
1110 }
1111 
1112 struct mlx5e_tc_fib_event_data {
1113 	struct work_struct work;
1114 	unsigned long event;
1115 	struct mlx5e_route_entry *r;
1116 	struct net_device *ul_dev;
1117 };
1118 
1119 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1120 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1121 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1122 {
1123 	struct mlx5e_tc_fib_event_data *fib_work;
1124 
1125 	fib_work = kzalloc(sizeof(*fib_work), flags);
1126 	if (WARN_ON(!fib_work))
1127 		return NULL;
1128 
1129 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1130 	fib_work->event = event;
1131 	fib_work->ul_dev = ul_dev;
1132 
1133 	return fib_work;
1134 }
1135 
1136 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1137 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1138 			   struct mlx5e_route_entry *r,
1139 			   unsigned long event)
1140 {
1141 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1142 	struct mlx5e_tc_fib_event_data *fib_work;
1143 	struct mlx5e_rep_priv *uplink_rpriv;
1144 	struct net_device *ul_dev;
1145 
1146 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1147 	ul_dev = uplink_rpriv->netdev;
1148 
1149 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1150 	if (!fib_work)
1151 		return -ENOMEM;
1152 
1153 	dev_hold(ul_dev);
1154 	refcount_inc(&r->refcnt);
1155 	fib_work->r = r;
1156 	queue_work(priv->wq, &fib_work->work);
1157 
1158 	return 0;
1159 }
1160 
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1161 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1162 			     struct mlx5e_tc_flow *flow)
1163 {
1164 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1165 	unsigned long tbl_time_before, tbl_time_after;
1166 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1167 	struct mlx5_flow_attr *attr = flow->attr;
1168 	struct mlx5_esw_flow_attr *esw_attr;
1169 	struct mlx5e_route_entry *r;
1170 	struct mlx5e_route_key key;
1171 	int err = 0;
1172 
1173 	esw_attr = attr->esw_attr;
1174 	parse_attr = attr->parse_attr;
1175 	mutex_lock(&esw->offloads.encap_tbl_lock);
1176 	if (!esw_attr->rx_tun_attr)
1177 		goto out;
1178 
1179 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1180 	tbl_time_after = tbl_time_before;
1181 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1182 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1183 		goto out;
1184 
1185 	key.ip_version = attr->tun_ip_version;
1186 	if (key.ip_version == 4)
1187 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1188 	else
1189 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1190 
1191 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1192 				   &tbl_time_after);
1193 	if (IS_ERR(r)) {
1194 		err = PTR_ERR(r);
1195 		goto out;
1196 	}
1197 	/* Routing changed concurrently. FIB event handler might have missed new
1198 	 * entry, schedule update.
1199 	 */
1200 	if (tbl_time_before != tbl_time_after) {
1201 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1202 		if (err) {
1203 			mlx5e_route_put_locked(priv, r);
1204 			goto out;
1205 		}
1206 	}
1207 
1208 	flow->decap_route = r;
1209 	list_add(&flow->decap_routes, &r->decap_flows);
1210 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1211 	return 0;
1212 
1213 out:
1214 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1215 	return err;
1216 }
1217 
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1218 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1219 				    struct mlx5e_tc_flow *flow,
1220 				    struct mlx5_flow_attr *attr,
1221 				    struct mlx5e_encap_entry *e,
1222 				    bool new_encap_entry,
1223 				    unsigned long tbl_time_before,
1224 				    int out_index)
1225 {
1226 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1227 	unsigned long tbl_time_after = tbl_time_before;
1228 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1229 	const struct ip_tunnel_info *tun_info;
1230 	struct mlx5_esw_flow_attr *esw_attr;
1231 	struct mlx5e_route_entry *r;
1232 	struct mlx5e_route_key key;
1233 	unsigned short family;
1234 	int err = 0;
1235 
1236 	esw_attr = attr->esw_attr;
1237 	parse_attr = attr->parse_attr;
1238 	tun_info = parse_attr->tun_info[out_index];
1239 	family = ip_tunnel_info_af(tun_info);
1240 
1241 	if (family == AF_INET) {
1242 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1243 		key.ip_version = 4;
1244 	} else if (family == AF_INET6) {
1245 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1246 		key.ip_version = 6;
1247 	}
1248 
1249 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1250 				  e->route_dev_ifindex, out_index);
1251 	if (err || !(esw_attr->dests[out_index].flags &
1252 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1253 		return err;
1254 
1255 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1256 				   &tbl_time_after);
1257 	if (IS_ERR(r))
1258 		return PTR_ERR(r);
1259 	/* Routing changed concurrently. FIB event handler might have missed new
1260 	 * entry, schedule update.
1261 	 */
1262 	if (tbl_time_before != tbl_time_after) {
1263 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1264 		if (err) {
1265 			mlx5e_route_put_locked(priv, r);
1266 			return err;
1267 		}
1268 	}
1269 
1270 	flow->encap_routes[out_index].r = r;
1271 	if (new_encap_entry)
1272 		list_add(&e->route_list, &r->encap_entries);
1273 	flow->encap_routes[out_index].index = out_index;
1274 	return 0;
1275 }
1276 
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1277 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1278 			      struct mlx5e_tc_flow *flow)
1279 {
1280 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1281 	struct mlx5e_route_entry *r = flow->decap_route;
1282 
1283 	if (!r)
1284 		return;
1285 
1286 	mutex_lock(&esw->offloads.encap_tbl_lock);
1287 	list_del(&flow->decap_routes);
1288 	flow->decap_route = NULL;
1289 
1290 	if (!refcount_dec_and_test(&r->refcnt)) {
1291 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1292 		return;
1293 	}
1294 	hash_del_rcu(&r->hlist);
1295 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1296 
1297 	mlx5e_route_dealloc(priv, r);
1298 }
1299 
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1300 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1301 				     struct mlx5e_tc_flow *flow,
1302 				     int out_index)
1303 {
1304 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1305 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1306 	struct mlx5e_encap_entry *e, *tmp;
1307 
1308 	if (!r)
1309 		return;
1310 
1311 	mutex_lock(&esw->offloads.encap_tbl_lock);
1312 	flow->encap_routes[out_index].r = NULL;
1313 
1314 	if (!refcount_dec_and_test(&r->refcnt)) {
1315 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1316 		return;
1317 	}
1318 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1319 		list_del_init(&e->route_list);
1320 	hash_del_rcu(&r->hlist);
1321 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1322 
1323 	mlx5e_route_dealloc(priv, r);
1324 }
1325 
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1326 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1327 				   struct mlx5e_encap_entry *e,
1328 				   struct list_head *encap_flows)
1329 {
1330 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1331 	struct mlx5e_tc_flow *flow;
1332 
1333 	list_for_each_entry(flow, encap_flows, tmp_list) {
1334 		struct mlx5_flow_attr *attr = flow->attr;
1335 		struct mlx5_esw_flow_attr *esw_attr;
1336 
1337 		if (!mlx5e_is_offloaded_flow(flow))
1338 			continue;
1339 		esw_attr = attr->esw_attr;
1340 
1341 		if (flow_flag_test(flow, SLOW))
1342 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1343 		else
1344 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1345 		mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1346 		attr->modify_hdr = NULL;
1347 
1348 		esw_attr->dests[flow->tmp_entry_index].flags &=
1349 			~MLX5_ESW_DEST_ENCAP_VALID;
1350 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1351 	}
1352 
1353 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1354 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1355 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1356 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1357 		e->pkt_reformat = NULL;
1358 	}
1359 }
1360 
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1361 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1362 				  struct net_device *tunnel_dev,
1363 				  struct mlx5e_encap_entry *e,
1364 				  struct list_head *encap_flows)
1365 {
1366 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1367 	struct mlx5e_tc_flow *flow;
1368 	int err;
1369 
1370 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1371 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1372 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1373 	if (err)
1374 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1375 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1376 
1377 	list_for_each_entry(flow, encap_flows, tmp_list) {
1378 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1379 		struct mlx5_esw_flow_attr *esw_attr;
1380 		struct mlx5_flow_handle *rule;
1381 		struct mlx5_flow_attr *attr;
1382 		struct mlx5_flow_spec *spec;
1383 
1384 		if (flow_flag_test(flow, FAILED))
1385 			continue;
1386 
1387 		spec = &flow->attr->parse_attr->spec;
1388 
1389 		attr = mlx5e_tc_get_encap_attr(flow);
1390 		esw_attr = attr->esw_attr;
1391 		parse_attr = attr->parse_attr;
1392 
1393 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1394 					     e->out_dev, e->route_dev_ifindex,
1395 					     flow->tmp_entry_index);
1396 		if (err) {
1397 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1398 			continue;
1399 		}
1400 
1401 		err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1402 		if (err) {
1403 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1404 				       err);
1405 			continue;
1406 		}
1407 
1408 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1409 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1410 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1411 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1412 				goto offload_to_slow_path;
1413 
1414 			err = mlx5e_tc_offload_flow_post_acts(flow);
1415 			if (err) {
1416 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1417 					       err);
1418 				goto offload_to_slow_path;
1419 			}
1420 
1421 			/* update from slow path rule to encap rule */
1422 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1423 			if (IS_ERR(rule)) {
1424 				mlx5e_tc_unoffload_flow_post_acts(flow);
1425 				err = PTR_ERR(rule);
1426 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1427 					       err);
1428 			} else {
1429 				flow->rule[0] = rule;
1430 			}
1431 		} else {
1432 offload_to_slow_path:
1433 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1434 			/* mark the flow's encap dest as non-valid */
1435 			esw_attr->dests[flow->tmp_entry_index].flags &=
1436 				~MLX5_ESW_DEST_ENCAP_VALID;
1437 
1438 			if (IS_ERR(rule)) {
1439 				err = PTR_ERR(rule);
1440 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1441 					       err);
1442 			} else {
1443 				flow->rule[0] = rule;
1444 			}
1445 		}
1446 		flow_flag_set(flow, OFFLOADED);
1447 	}
1448 }
1449 
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1450 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1451 				     struct mlx5e_route_entry *r,
1452 				     struct list_head *flow_list,
1453 				     bool replace)
1454 {
1455 	struct net_device *tunnel_dev;
1456 	struct mlx5e_encap_entry *e;
1457 
1458 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1459 	if (!tunnel_dev)
1460 		return -ENODEV;
1461 
1462 	list_for_each_entry(e, &r->encap_entries, route_list) {
1463 		LIST_HEAD(encap_flows);
1464 
1465 		mlx5e_take_all_encap_flows(e, &encap_flows);
1466 		if (list_empty(&encap_flows))
1467 			continue;
1468 
1469 		if (mlx5e_route_entry_valid(r))
1470 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1471 
1472 		if (!replace) {
1473 			list_splice(&encap_flows, flow_list);
1474 			continue;
1475 		}
1476 
1477 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1478 		list_splice(&encap_flows, flow_list);
1479 	}
1480 
1481 	return 0;
1482 }
1483 
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1484 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1485 				      struct list_head *flow_list)
1486 {
1487 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1488 	struct mlx5e_tc_flow *flow;
1489 
1490 	list_for_each_entry(flow, flow_list, tmp_list)
1491 		if (mlx5e_is_offloaded_flow(flow))
1492 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1493 }
1494 
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1495 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1496 				  struct list_head *decap_flows)
1497 {
1498 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1499 	struct mlx5e_tc_flow *flow;
1500 
1501 	list_for_each_entry(flow, decap_flows, tmp_list) {
1502 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1503 		struct mlx5_flow_attr *attr = flow->attr;
1504 		struct mlx5_flow_handle *rule;
1505 		struct mlx5_flow_spec *spec;
1506 		int err;
1507 
1508 		if (flow_flag_test(flow, FAILED))
1509 			continue;
1510 
1511 		parse_attr = attr->parse_attr;
1512 		spec = &parse_attr->spec;
1513 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1514 		if (err) {
1515 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1516 				       err);
1517 			continue;
1518 		}
1519 
1520 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1521 		if (IS_ERR(rule)) {
1522 			err = PTR_ERR(rule);
1523 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1524 				       err);
1525 		} else {
1526 			flow->rule[0] = rule;
1527 			flow_flag_set(flow, OFFLOADED);
1528 		}
1529 	}
1530 }
1531 
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1532 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1533 					  struct mlx5e_route_entry *r,
1534 					  struct list_head *flow_list,
1535 					  bool replace)
1536 {
1537 	struct net_device *tunnel_dev;
1538 	LIST_HEAD(decap_flows);
1539 
1540 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1541 	if (!tunnel_dev)
1542 		return -ENODEV;
1543 
1544 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1545 	if (mlx5e_route_entry_valid(r))
1546 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1547 	if (replace)
1548 		mlx5e_reoffload_decap(priv, &decap_flows);
1549 
1550 	list_splice(&decap_flows, flow_list);
1551 
1552 	return 0;
1553 }
1554 
mlx5e_tc_fib_event_work(struct work_struct * work)1555 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1556 {
1557 	struct mlx5e_tc_fib_event_data *event_data =
1558 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1559 	struct net_device *ul_dev = event_data->ul_dev;
1560 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1561 	struct mlx5e_route_entry *r = event_data->r;
1562 	struct mlx5_eswitch *esw;
1563 	LIST_HEAD(flow_list);
1564 	bool replace;
1565 	int err;
1566 
1567 	/* sync with concurrent neigh updates */
1568 	rtnl_lock();
1569 	esw = priv->mdev->priv.eswitch;
1570 	mutex_lock(&esw->offloads.encap_tbl_lock);
1571 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1572 
1573 	if (!mlx5e_route_entry_valid(r) && !replace)
1574 		goto out;
1575 
1576 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1577 	if (err)
1578 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1579 			       err);
1580 
1581 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1582 	if (err)
1583 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1584 			       err);
1585 
1586 	if (replace)
1587 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1588 out:
1589 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1590 	rtnl_unlock();
1591 
1592 	mlx5e_put_flow_list(priv, &flow_list);
1593 	mlx5e_route_put(priv, event_data->r);
1594 	dev_put(event_data->ul_dev);
1595 	kfree(event_data);
1596 }
1597 
1598 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1599 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1600 			 struct net_device *ul_dev,
1601 			 struct mlx5e_tc_tun_encap *encap,
1602 			 unsigned long event,
1603 			 struct fib_notifier_info *info)
1604 {
1605 	struct fib_entry_notifier_info *fen_info;
1606 	struct mlx5e_tc_fib_event_data *fib_work;
1607 	struct mlx5e_route_entry *r;
1608 	struct mlx5e_route_key key;
1609 	struct net_device *fib_dev;
1610 
1611 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1612 	if (fen_info->fi->nh)
1613 		return NULL;
1614 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1615 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1616 	    fen_info->dst_len != 32)
1617 		return NULL;
1618 
1619 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1620 	if (!fib_work)
1621 		return ERR_PTR(-ENOMEM);
1622 
1623 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1624 	key.ip_version = 4;
1625 
1626 	/* Can't fail after this point because releasing reference to r
1627 	 * requires obtaining sleeping mutex which we can't do in atomic
1628 	 * context.
1629 	 */
1630 	r = mlx5e_route_lookup_for_update(encap, &key);
1631 	if (!r)
1632 		goto out;
1633 	fib_work->r = r;
1634 	dev_hold(ul_dev);
1635 
1636 	return fib_work;
1637 
1638 out:
1639 	kfree(fib_work);
1640 	return NULL;
1641 }
1642 
1643 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1644 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1645 			 struct net_device *ul_dev,
1646 			 struct mlx5e_tc_tun_encap *encap,
1647 			 unsigned long event,
1648 			 struct fib_notifier_info *info)
1649 {
1650 	struct fib6_entry_notifier_info *fen_info;
1651 	struct mlx5e_tc_fib_event_data *fib_work;
1652 	struct mlx5e_route_entry *r;
1653 	struct mlx5e_route_key key;
1654 	struct net_device *fib_dev;
1655 
1656 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1657 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1658 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1659 	    fen_info->rt->fib6_dst.plen != 128)
1660 		return NULL;
1661 
1662 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1663 	if (!fib_work)
1664 		return ERR_PTR(-ENOMEM);
1665 
1666 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1667 	       sizeof(fen_info->rt->fib6_dst.addr));
1668 	key.ip_version = 6;
1669 
1670 	/* Can't fail after this point because releasing reference to r
1671 	 * requires obtaining sleeping mutex which we can't do in atomic
1672 	 * context.
1673 	 */
1674 	r = mlx5e_route_lookup_for_update(encap, &key);
1675 	if (!r)
1676 		goto out;
1677 	fib_work->r = r;
1678 	dev_hold(ul_dev);
1679 
1680 	return fib_work;
1681 
1682 out:
1683 	kfree(fib_work);
1684 	return NULL;
1685 }
1686 
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1687 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1688 {
1689 	struct mlx5e_tc_fib_event_data *fib_work;
1690 	struct fib_notifier_info *info = ptr;
1691 	struct mlx5e_tc_tun_encap *encap;
1692 	struct net_device *ul_dev;
1693 	struct mlx5e_priv *priv;
1694 
1695 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1696 	priv = encap->priv;
1697 	ul_dev = priv->netdev;
1698 	priv = netdev_priv(ul_dev);
1699 
1700 	switch (event) {
1701 	case FIB_EVENT_ENTRY_REPLACE:
1702 	case FIB_EVENT_ENTRY_DEL:
1703 		if (info->family == AF_INET)
1704 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1705 		else if (info->family == AF_INET6)
1706 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1707 		else
1708 			return NOTIFY_DONE;
1709 
1710 		if (!IS_ERR_OR_NULL(fib_work)) {
1711 			queue_work(priv->wq, &fib_work->work);
1712 		} else if (IS_ERR(fib_work)) {
1713 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1714 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1715 				       PTR_ERR(fib_work));
1716 		}
1717 
1718 		break;
1719 	default:
1720 		return NOTIFY_DONE;
1721 	}
1722 
1723 	return NOTIFY_DONE;
1724 }
1725 
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1726 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1727 {
1728 	struct mlx5e_tc_tun_encap *encap;
1729 	int err;
1730 
1731 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1732 	if (!encap)
1733 		return ERR_PTR(-ENOMEM);
1734 
1735 	encap->priv = priv;
1736 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1737 	spin_lock_init(&encap->route_lock);
1738 	hash_init(encap->route_tbl);
1739 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1740 				    NULL, NULL);
1741 	if (err) {
1742 		kvfree(encap);
1743 		return ERR_PTR(err);
1744 	}
1745 
1746 	return encap;
1747 }
1748 
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1749 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1750 {
1751 	if (!encap)
1752 		return;
1753 
1754 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1755 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1756 	kvfree(encap);
1757 }
1758