1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 
18 #include "lib/fs_chains.h"
19 #include "en/tc_ct.h"
20 #include "en/mod_hdr.h"
21 #include "en/mapping.h"
22 #include "en/tc/post_act.h"
23 #include "en.h"
24 #include "en_tc.h"
25 #include "en_rep.h"
26 
27 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen)
28 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
29 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
30 #define MLX5_CT_STATE_TRK_BIT BIT(2)
31 #define MLX5_CT_STATE_NAT_BIT BIT(3)
32 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
33 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
34 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
35 
36 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
37 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
38 
39 #define ct_dbg(fmt, args...)\
40 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
41 
42 struct mlx5_tc_ct_priv {
43 	struct mlx5_core_dev *dev;
44 	const struct net_device *netdev;
45 	struct mod_hdr_tbl *mod_hdr_tbl;
46 	struct xarray tuple_ids;
47 	struct rhashtable zone_ht;
48 	struct rhashtable ct_tuples_ht;
49 	struct rhashtable ct_tuples_nat_ht;
50 	struct mlx5_flow_table *ct;
51 	struct mlx5_flow_table *ct_nat;
52 	struct mlx5e_post_act *post_act;
53 	struct mutex control_lock; /* guards parallel adds/dels */
54 	struct mapping_ctx *zone_mapping;
55 	struct mapping_ctx *labels_mapping;
56 	enum mlx5_flow_namespace_type ns_type;
57 	struct mlx5_fs_chains *chains;
58 	spinlock_t ht_lock; /* protects ft entries */
59 };
60 
61 struct mlx5_ct_flow {
62 	struct mlx5_flow_attr *pre_ct_attr;
63 	struct mlx5_flow_handle *pre_ct_rule;
64 	struct mlx5e_post_act_handle *post_act_handle;
65 	struct mlx5_ct_ft *ft;
66 	u32 chain_mapping;
67 };
68 
69 struct mlx5_ct_zone_rule {
70 	struct mlx5_flow_handle *rule;
71 	struct mlx5e_mod_hdr_handle *mh;
72 	struct mlx5_flow_attr *attr;
73 	bool nat;
74 };
75 
76 struct mlx5_tc_ct_pre {
77 	struct mlx5_flow_table *ft;
78 	struct mlx5_flow_group *flow_grp;
79 	struct mlx5_flow_group *miss_grp;
80 	struct mlx5_flow_handle *flow_rule;
81 	struct mlx5_flow_handle *miss_rule;
82 	struct mlx5_modify_hdr *modify_hdr;
83 };
84 
85 struct mlx5_ct_ft {
86 	struct rhash_head node;
87 	u16 zone;
88 	u32 zone_restore_id;
89 	refcount_t refcount;
90 	struct nf_flowtable *nf_ft;
91 	struct mlx5_tc_ct_priv *ct_priv;
92 	struct rhashtable ct_entries_ht;
93 	struct mlx5_tc_ct_pre pre_ct;
94 	struct mlx5_tc_ct_pre pre_ct_nat;
95 };
96 
97 struct mlx5_ct_tuple {
98 	u16 addr_type;
99 	__be16 n_proto;
100 	u8 ip_proto;
101 	struct {
102 		union {
103 			__be32 src_v4;
104 			struct in6_addr src_v6;
105 		};
106 		union {
107 			__be32 dst_v4;
108 			struct in6_addr dst_v6;
109 		};
110 	} ip;
111 	struct {
112 		__be16 src;
113 		__be16 dst;
114 	} port;
115 
116 	u16 zone;
117 };
118 
119 struct mlx5_ct_counter {
120 	struct mlx5_fc *counter;
121 	refcount_t refcount;
122 	bool is_shared;
123 };
124 
125 enum {
126 	MLX5_CT_ENTRY_FLAG_VALID,
127 };
128 
129 struct mlx5_ct_entry {
130 	struct rhash_head node;
131 	struct rhash_head tuple_node;
132 	struct rhash_head tuple_nat_node;
133 	struct mlx5_ct_counter *counter;
134 	unsigned long cookie;
135 	unsigned long restore_cookie;
136 	struct mlx5_ct_tuple tuple;
137 	struct mlx5_ct_tuple tuple_nat;
138 	struct mlx5_ct_zone_rule zone_rules[2];
139 
140 	struct mlx5_tc_ct_priv *ct_priv;
141 	struct work_struct work;
142 
143 	refcount_t refcnt;
144 	unsigned long flags;
145 };
146 
147 static void
148 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
149 				 struct mlx5_flow_attr *attr,
150 				 struct mlx5e_mod_hdr_handle *mh);
151 
152 static const struct rhashtable_params cts_ht_params = {
153 	.head_offset = offsetof(struct mlx5_ct_entry, node),
154 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
155 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
156 	.automatic_shrinking = true,
157 	.min_size = 16 * 1024,
158 };
159 
160 static const struct rhashtable_params zone_params = {
161 	.head_offset = offsetof(struct mlx5_ct_ft, node),
162 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
163 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
164 	.automatic_shrinking = true,
165 };
166 
167 static const struct rhashtable_params tuples_ht_params = {
168 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
169 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
170 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
171 	.automatic_shrinking = true,
172 	.min_size = 16 * 1024,
173 };
174 
175 static const struct rhashtable_params tuples_nat_ht_params = {
176 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
177 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
178 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
179 	.automatic_shrinking = true,
180 	.min_size = 16 * 1024,
181 };
182 
183 static bool
mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry * entry)184 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
185 {
186 	return !!(entry->tuple_nat_node.next);
187 }
188 
189 static int
mlx5_get_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 * labels,u32 * id)190 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
191 		       u32 *labels, u32 *id)
192 {
193 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
194 		*id = 0;
195 		return 0;
196 	}
197 
198 	if (mapping_add(ct_priv->labels_mapping, labels, id))
199 		return -EOPNOTSUPP;
200 
201 	return 0;
202 }
203 
204 static void
mlx5_put_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 id)205 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
206 {
207 	if (id)
208 		mapping_remove(ct_priv->labels_mapping, id);
209 }
210 
211 static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)212 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
213 {
214 	struct flow_match_control control;
215 	struct flow_match_basic basic;
216 
217 	flow_rule_match_basic(rule, &basic);
218 	flow_rule_match_control(rule, &control);
219 
220 	tuple->n_proto = basic.key->n_proto;
221 	tuple->ip_proto = basic.key->ip_proto;
222 	tuple->addr_type = control.key->addr_type;
223 
224 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
225 		struct flow_match_ipv4_addrs match;
226 
227 		flow_rule_match_ipv4_addrs(rule, &match);
228 		tuple->ip.src_v4 = match.key->src;
229 		tuple->ip.dst_v4 = match.key->dst;
230 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
231 		struct flow_match_ipv6_addrs match;
232 
233 		flow_rule_match_ipv6_addrs(rule, &match);
234 		tuple->ip.src_v6 = match.key->src;
235 		tuple->ip.dst_v6 = match.key->dst;
236 	} else {
237 		return -EOPNOTSUPP;
238 	}
239 
240 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
241 		struct flow_match_ports match;
242 
243 		flow_rule_match_ports(rule, &match);
244 		switch (tuple->ip_proto) {
245 		case IPPROTO_TCP:
246 		case IPPROTO_UDP:
247 			tuple->port.src = match.key->src;
248 			tuple->port.dst = match.key->dst;
249 			break;
250 		default:
251 			return -EOPNOTSUPP;
252 		}
253 	} else {
254 		return -EOPNOTSUPP;
255 	}
256 
257 	return 0;
258 }
259 
260 static int
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)261 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
262 			     struct flow_rule *rule)
263 {
264 	struct flow_action *flow_action = &rule->action;
265 	struct flow_action_entry *act;
266 	u32 offset, val, ip6_offset;
267 	int i;
268 
269 	flow_action_for_each(i, act, flow_action) {
270 		if (act->id != FLOW_ACTION_MANGLE)
271 			continue;
272 
273 		offset = act->mangle.offset;
274 		val = act->mangle.val;
275 		switch (act->mangle.htype) {
276 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
277 			if (offset == offsetof(struct iphdr, saddr))
278 				tuple->ip.src_v4 = cpu_to_be32(val);
279 			else if (offset == offsetof(struct iphdr, daddr))
280 				tuple->ip.dst_v4 = cpu_to_be32(val);
281 			else
282 				return -EOPNOTSUPP;
283 			break;
284 
285 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
286 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
287 			ip6_offset /= 4;
288 			if (ip6_offset < 4)
289 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
290 			else if (ip6_offset < 8)
291 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
292 			else
293 				return -EOPNOTSUPP;
294 			break;
295 
296 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
297 			if (offset == offsetof(struct tcphdr, source))
298 				tuple->port.src = cpu_to_be16(val);
299 			else if (offset == offsetof(struct tcphdr, dest))
300 				tuple->port.dst = cpu_to_be16(val);
301 			else
302 				return -EOPNOTSUPP;
303 			break;
304 
305 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
306 			if (offset == offsetof(struct udphdr, source))
307 				tuple->port.src = cpu_to_be16(val);
308 			else if (offset == offsetof(struct udphdr, dest))
309 				tuple->port.dst = cpu_to_be16(val);
310 			else
311 				return -EOPNOTSUPP;
312 			break;
313 
314 		default:
315 			return -EOPNOTSUPP;
316 		}
317 	}
318 
319 	return 0;
320 }
321 
322 static int
mlx5_tc_ct_set_tuple_match(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_rule * rule)323 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
324 			   struct flow_rule *rule)
325 {
326 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
327 				       outer_headers);
328 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
329 				       outer_headers);
330 	u16 addr_type = 0;
331 	u8 ip_proto = 0;
332 
333 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
334 		struct flow_match_basic match;
335 
336 		flow_rule_match_basic(rule, &match);
337 
338 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
339 				       headers_v);
340 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
341 			 match.mask->ip_proto);
342 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
343 			 match.key->ip_proto);
344 
345 		ip_proto = match.key->ip_proto;
346 	}
347 
348 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
349 		struct flow_match_control match;
350 
351 		flow_rule_match_control(rule, &match);
352 		addr_type = match.key->addr_type;
353 	}
354 
355 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
356 		struct flow_match_ipv4_addrs match;
357 
358 		flow_rule_match_ipv4_addrs(rule, &match);
359 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
360 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
361 		       &match.mask->src, sizeof(match.mask->src));
362 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
363 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
364 		       &match.key->src, sizeof(match.key->src));
365 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
366 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
367 		       &match.mask->dst, sizeof(match.mask->dst));
368 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
369 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
370 		       &match.key->dst, sizeof(match.key->dst));
371 	}
372 
373 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
374 		struct flow_match_ipv6_addrs match;
375 
376 		flow_rule_match_ipv6_addrs(rule, &match);
377 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
378 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
379 		       &match.mask->src, sizeof(match.mask->src));
380 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
381 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
382 		       &match.key->src, sizeof(match.key->src));
383 
384 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
385 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
386 		       &match.mask->dst, sizeof(match.mask->dst));
387 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
388 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
389 		       &match.key->dst, sizeof(match.key->dst));
390 	}
391 
392 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
393 		struct flow_match_ports match;
394 
395 		flow_rule_match_ports(rule, &match);
396 		switch (ip_proto) {
397 		case IPPROTO_TCP:
398 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
399 				 tcp_sport, ntohs(match.mask->src));
400 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
401 				 tcp_sport, ntohs(match.key->src));
402 
403 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
404 				 tcp_dport, ntohs(match.mask->dst));
405 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
406 				 tcp_dport, ntohs(match.key->dst));
407 			break;
408 
409 		case IPPROTO_UDP:
410 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
411 				 udp_sport, ntohs(match.mask->src));
412 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
413 				 udp_sport, ntohs(match.key->src));
414 
415 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
416 				 udp_dport, ntohs(match.mask->dst));
417 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
418 				 udp_dport, ntohs(match.key->dst));
419 			break;
420 		default:
421 			break;
422 		}
423 	}
424 
425 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
426 		struct flow_match_tcp match;
427 
428 		flow_rule_match_tcp(rule, &match);
429 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
430 			 ntohs(match.mask->flags));
431 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
432 			 ntohs(match.key->flags));
433 	}
434 
435 	return 0;
436 }
437 
438 static void
mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)439 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
440 {
441 	if (entry->counter->is_shared &&
442 	    !refcount_dec_and_test(&entry->counter->refcount))
443 		return;
444 
445 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
446 	kfree(entry->counter);
447 }
448 
449 static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry,bool nat)450 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
451 			  struct mlx5_ct_entry *entry,
452 			  bool nat)
453 {
454 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
455 	struct mlx5_flow_attr *attr = zone_rule->attr;
456 
457 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
458 
459 	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
460 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
461 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
462 	kfree(attr);
463 }
464 
465 static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)466 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
467 			   struct mlx5_ct_entry *entry)
468 {
469 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
470 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
471 }
472 
473 static struct flow_action_entry *
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule * flow_rule)474 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
475 {
476 	struct flow_action *flow_action = &flow_rule->action;
477 	struct flow_action_entry *act;
478 	int i;
479 
480 	flow_action_for_each(i, act, flow_action) {
481 		if (act->id == FLOW_ACTION_CT_METADATA)
482 			return act;
483 	}
484 
485 	return NULL;
486 }
487 
488 static int
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_mod_hdr_acts * mod_acts,u8 ct_state,u32 mark,u32 labels_id,u8 zone_restore_id)489 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
490 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
491 			       u8 ct_state,
492 			       u32 mark,
493 			       u32 labels_id,
494 			       u8 zone_restore_id)
495 {
496 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
497 	struct mlx5_core_dev *dev = ct_priv->dev;
498 	int err;
499 
500 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
501 					CTSTATE_TO_REG, ct_state);
502 	if (err)
503 		return err;
504 
505 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
506 					MARK_TO_REG, mark);
507 	if (err)
508 		return err;
509 
510 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
511 					LABELS_TO_REG, labels_id);
512 	if (err)
513 		return err;
514 
515 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
516 					ZONE_RESTORE_TO_REG, zone_restore_id);
517 	if (err)
518 		return err;
519 
520 	/* Make another copy of zone id in reg_b for
521 	 * NIC rx flows since we don't copy reg_c1 to
522 	 * reg_b upon miss.
523 	 */
524 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
525 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
526 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
527 		if (err)
528 			return err;
529 	}
530 	return 0;
531 }
532 
533 static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry * act,char * modact)534 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
535 				   char *modact)
536 {
537 	u32 offset = act->mangle.offset, field;
538 
539 	switch (act->mangle.htype) {
540 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
541 		MLX5_SET(set_action_in, modact, length, 0);
542 		if (offset == offsetof(struct iphdr, saddr))
543 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
544 		else if (offset == offsetof(struct iphdr, daddr))
545 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
546 		else
547 			return -EOPNOTSUPP;
548 		break;
549 
550 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
551 		MLX5_SET(set_action_in, modact, length, 0);
552 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
553 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
554 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
555 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
556 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
557 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
558 		else if (offset == offsetof(struct ipv6hdr, saddr))
559 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
560 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
561 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
562 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
563 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
564 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
565 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
566 		else if (offset == offsetof(struct ipv6hdr, daddr))
567 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
568 		else
569 			return -EOPNOTSUPP;
570 		break;
571 
572 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
573 		MLX5_SET(set_action_in, modact, length, 16);
574 		if (offset == offsetof(struct tcphdr, source))
575 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
576 		else if (offset == offsetof(struct tcphdr, dest))
577 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
578 		else
579 			return -EOPNOTSUPP;
580 		break;
581 
582 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
583 		MLX5_SET(set_action_in, modact, length, 16);
584 		if (offset == offsetof(struct udphdr, source))
585 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
586 		else if (offset == offsetof(struct udphdr, dest))
587 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
588 		else
589 			return -EOPNOTSUPP;
590 		break;
591 
592 	default:
593 		return -EOPNOTSUPP;
594 	}
595 
596 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
597 	MLX5_SET(set_action_in, modact, offset, 0);
598 	MLX5_SET(set_action_in, modact, field, field);
599 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
600 
601 	return 0;
602 }
603 
604 static int
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5e_tc_mod_hdr_acts * mod_acts)605 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
606 			    struct flow_rule *flow_rule,
607 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
608 {
609 	struct flow_action *flow_action = &flow_rule->action;
610 	struct mlx5_core_dev *mdev = ct_priv->dev;
611 	struct flow_action_entry *act;
612 	size_t action_size;
613 	char *modact;
614 	int err, i;
615 
616 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
617 
618 	flow_action_for_each(i, act, flow_action) {
619 		switch (act->id) {
620 		case FLOW_ACTION_MANGLE: {
621 			err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
622 						    mod_acts);
623 			if (err)
624 				return err;
625 
626 			modact = mod_acts->actions +
627 				 mod_acts->num_actions * action_size;
628 
629 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
630 			if (err)
631 				return err;
632 
633 			mod_acts->num_actions++;
634 		}
635 		break;
636 
637 		case FLOW_ACTION_CT_METADATA:
638 			/* Handled earlier */
639 			continue;
640 		default:
641 			return -EOPNOTSUPP;
642 		}
643 	}
644 
645 	return 0;
646 }
647 
648 static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct flow_rule * flow_rule,struct mlx5e_mod_hdr_handle ** mh,u8 zone_restore_id,bool nat)649 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
650 				struct mlx5_flow_attr *attr,
651 				struct flow_rule *flow_rule,
652 				struct mlx5e_mod_hdr_handle **mh,
653 				u8 zone_restore_id, bool nat)
654 {
655 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
656 	struct flow_action_entry *meta;
657 	u16 ct_state = 0;
658 	int err;
659 
660 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
661 	if (!meta)
662 		return -EOPNOTSUPP;
663 
664 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
665 				     &attr->ct_attr.ct_labels_id);
666 	if (err)
667 		return -EOPNOTSUPP;
668 	if (nat) {
669 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
670 						  &mod_acts);
671 		if (err)
672 			goto err_mapping;
673 
674 		ct_state |= MLX5_CT_STATE_NAT_BIT;
675 	}
676 
677 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
678 	ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
679 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
680 					     ct_state,
681 					     meta->ct_metadata.mark,
682 					     attr->ct_attr.ct_labels_id,
683 					     zone_restore_id);
684 	if (err)
685 		goto err_mapping;
686 
687 	if (nat) {
688 		attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
689 							    mod_acts.num_actions,
690 							    mod_acts.actions);
691 		if (IS_ERR(attr->modify_hdr)) {
692 			err = PTR_ERR(attr->modify_hdr);
693 			goto err_mapping;
694 		}
695 
696 		*mh = NULL;
697 	} else {
698 		*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
699 					   ct_priv->mod_hdr_tbl,
700 					   ct_priv->ns_type,
701 					   &mod_acts);
702 		if (IS_ERR(*mh)) {
703 			err = PTR_ERR(*mh);
704 			goto err_mapping;
705 		}
706 		attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
707 	}
708 
709 	dealloc_mod_hdr_actions(&mod_acts);
710 	return 0;
711 
712 err_mapping:
713 	dealloc_mod_hdr_actions(&mod_acts);
714 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
715 	return err;
716 }
717 
718 static void
mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct mlx5e_mod_hdr_handle * mh)719 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
720 				 struct mlx5_flow_attr *attr,
721 				 struct mlx5e_mod_hdr_handle *mh)
722 {
723 	if (mh)
724 		mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
725 	else
726 		mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
727 }
728 
729 static int
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)730 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
731 			  struct flow_rule *flow_rule,
732 			  struct mlx5_ct_entry *entry,
733 			  bool nat, u8 zone_restore_id)
734 {
735 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
736 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
737 	struct mlx5_flow_spec *spec = NULL;
738 	struct mlx5_flow_attr *attr;
739 	int err;
740 
741 	zone_rule->nat = nat;
742 
743 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
744 	if (!spec)
745 		return -ENOMEM;
746 
747 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
748 	if (!attr) {
749 		err = -ENOMEM;
750 		goto err_attr;
751 	}
752 
753 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
754 					      &zone_rule->mh,
755 					      zone_restore_id, nat);
756 	if (err) {
757 		ct_dbg("Failed to create ct entry mod hdr");
758 		goto err_mod_hdr;
759 	}
760 
761 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
762 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
763 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
764 	attr->dest_chain = 0;
765 	attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
766 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
767 	attr->outer_match_level = MLX5_MATCH_L4;
768 	attr->counter = entry->counter->counter;
769 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
770 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
771 		attr->esw_attr->in_mdev = priv->mdev;
772 
773 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
774 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
775 
776 	zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
777 	if (IS_ERR(zone_rule->rule)) {
778 		err = PTR_ERR(zone_rule->rule);
779 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
780 		goto err_rule;
781 	}
782 
783 	zone_rule->attr = attr;
784 
785 	kvfree(spec);
786 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
787 
788 	return 0;
789 
790 err_rule:
791 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
792 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
793 err_mod_hdr:
794 	kfree(attr);
795 err_attr:
796 	kvfree(spec);
797 	return err;
798 }
799 
800 static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry * entry)801 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
802 {
803 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
804 }
805 
806 static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_tuple * tuple)807 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
808 {
809 	struct mlx5_ct_entry *entry;
810 
811 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
812 				       tuples_ht_params);
813 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
814 	    refcount_inc_not_zero(&entry->refcnt)) {
815 		return entry;
816 	} else if (!entry) {
817 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
818 					       tuple, tuples_nat_ht_params);
819 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
820 		    refcount_inc_not_zero(&entry->refcnt))
821 			return entry;
822 	}
823 
824 	return entry ? ERR_PTR(-EINVAL) : NULL;
825 }
826 
mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry * entry)827 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
828 {
829 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
830 
831 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
832 			       &entry->tuple_nat_node,
833 			       tuples_nat_ht_params);
834 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
835 			       tuples_ht_params);
836 }
837 
mlx5_tc_ct_entry_del(struct mlx5_ct_entry * entry)838 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
839 {
840 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
841 
842 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
843 
844 	spin_lock_bh(&ct_priv->ht_lock);
845 	mlx5_tc_ct_entry_remove_from_tuples(entry);
846 	spin_unlock_bh(&ct_priv->ht_lock);
847 
848 	mlx5_tc_ct_counter_put(ct_priv, entry);
849 	kfree(entry);
850 }
851 
852 static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)853 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
854 {
855 	if (!refcount_dec_and_test(&entry->refcnt))
856 		return;
857 
858 	mlx5_tc_ct_entry_del(entry);
859 }
860 
mlx5_tc_ct_entry_del_work(struct work_struct * work)861 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
862 {
863 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
864 
865 	mlx5_tc_ct_entry_del(entry);
866 }
867 
868 static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)869 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
870 {
871 	struct mlx5e_priv *priv;
872 
873 	if (!refcount_dec_and_test(&entry->refcnt))
874 		return;
875 
876 	priv = netdev_priv(entry->ct_priv->netdev);
877 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
878 	queue_work(priv->wq, &entry->work);
879 }
880 
881 static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv * ct_priv)882 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
883 {
884 	struct mlx5_ct_counter *counter;
885 	int ret;
886 
887 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
888 	if (!counter)
889 		return ERR_PTR(-ENOMEM);
890 
891 	counter->is_shared = false;
892 	counter->counter = mlx5_fc_create(ct_priv->dev, true);
893 	if (IS_ERR(counter->counter)) {
894 		ct_dbg("Failed to create counter for ct entry");
895 		ret = PTR_ERR(counter->counter);
896 		kfree(counter);
897 		return ERR_PTR(ret);
898 	}
899 
900 	return counter;
901 }
902 
903 static struct mlx5_ct_counter *
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)904 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
905 			      struct mlx5_ct_entry *entry)
906 {
907 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
908 	struct mlx5_ct_counter *shared_counter;
909 	struct mlx5_ct_entry *rev_entry;
910 	__be16 tmp_port;
911 
912 	/* get the reversed tuple */
913 	tmp_port = rev_tuple.port.src;
914 	rev_tuple.port.src = rev_tuple.port.dst;
915 	rev_tuple.port.dst = tmp_port;
916 
917 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
918 		__be32 tmp_addr = rev_tuple.ip.src_v4;
919 
920 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
921 		rev_tuple.ip.dst_v4 = tmp_addr;
922 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
923 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
924 
925 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
926 		rev_tuple.ip.dst_v6 = tmp_addr;
927 	} else {
928 		return ERR_PTR(-EOPNOTSUPP);
929 	}
930 
931 	/* Use the same counter as the reverse direction */
932 	spin_lock_bh(&ct_priv->ht_lock);
933 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
934 
935 	if (IS_ERR(rev_entry)) {
936 		spin_unlock_bh(&ct_priv->ht_lock);
937 		goto create_counter;
938 	}
939 
940 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
941 		ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
942 		shared_counter = rev_entry->counter;
943 		spin_unlock_bh(&ct_priv->ht_lock);
944 
945 		mlx5_tc_ct_entry_put(rev_entry);
946 		return shared_counter;
947 	}
948 
949 	spin_unlock_bh(&ct_priv->ht_lock);
950 
951 create_counter:
952 
953 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
954 	if (IS_ERR(shared_counter))
955 		return shared_counter;
956 
957 	shared_counter->is_shared = true;
958 	refcount_set(&shared_counter->refcount, 1);
959 	return shared_counter;
960 }
961 
962 static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)963 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
964 			   struct flow_rule *flow_rule,
965 			   struct mlx5_ct_entry *entry,
966 			   u8 zone_restore_id)
967 {
968 	int err;
969 
970 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
971 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
972 	else
973 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
974 
975 	if (IS_ERR(entry->counter)) {
976 		err = PTR_ERR(entry->counter);
977 		return err;
978 	}
979 
980 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
981 					zone_restore_id);
982 	if (err)
983 		goto err_orig;
984 
985 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
986 					zone_restore_id);
987 	if (err)
988 		goto err_nat;
989 
990 	return 0;
991 
992 err_nat:
993 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
994 err_orig:
995 	mlx5_tc_ct_counter_put(ct_priv, entry);
996 	return err;
997 }
998 
999 static int
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1000 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1001 				  struct flow_cls_offload *flow)
1002 {
1003 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1004 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1005 	struct flow_action_entry *meta_action;
1006 	unsigned long cookie = flow->cookie;
1007 	struct mlx5_ct_entry *entry;
1008 	int err;
1009 
1010 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1011 	if (!meta_action)
1012 		return -EOPNOTSUPP;
1013 
1014 	spin_lock_bh(&ct_priv->ht_lock);
1015 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1016 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1017 		spin_unlock_bh(&ct_priv->ht_lock);
1018 		mlx5_tc_ct_entry_put(entry);
1019 		return -EEXIST;
1020 	}
1021 	spin_unlock_bh(&ct_priv->ht_lock);
1022 
1023 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1024 	if (!entry)
1025 		return -ENOMEM;
1026 
1027 	entry->tuple.zone = ft->zone;
1028 	entry->cookie = flow->cookie;
1029 	entry->restore_cookie = meta_action->ct_metadata.cookie;
1030 	refcount_set(&entry->refcnt, 2);
1031 	entry->ct_priv = ct_priv;
1032 
1033 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1034 	if (err)
1035 		goto err_set;
1036 
1037 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1038 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1039 	if (err)
1040 		goto err_set;
1041 
1042 	spin_lock_bh(&ct_priv->ht_lock);
1043 
1044 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1045 					    cts_ht_params);
1046 	if (err)
1047 		goto err_entries;
1048 
1049 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1050 					    &entry->tuple_node,
1051 					    tuples_ht_params);
1052 	if (err)
1053 		goto err_tuple;
1054 
1055 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1056 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1057 						    &entry->tuple_nat_node,
1058 						    tuples_nat_ht_params);
1059 		if (err)
1060 			goto err_tuple_nat;
1061 	}
1062 	spin_unlock_bh(&ct_priv->ht_lock);
1063 
1064 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1065 					 ft->zone_restore_id);
1066 	if (err)
1067 		goto err_rules;
1068 
1069 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1070 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1071 
1072 	return 0;
1073 
1074 err_rules:
1075 	spin_lock_bh(&ct_priv->ht_lock);
1076 	if (mlx5_tc_ct_entry_has_nat(entry))
1077 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1078 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1079 err_tuple_nat:
1080 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1081 			       &entry->tuple_node,
1082 			       tuples_ht_params);
1083 err_tuple:
1084 	rhashtable_remove_fast(&ft->ct_entries_ht,
1085 			       &entry->node,
1086 			       cts_ht_params);
1087 err_entries:
1088 	spin_unlock_bh(&ct_priv->ht_lock);
1089 err_set:
1090 	kfree(entry);
1091 	if (err != -EEXIST)
1092 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1093 	return err;
1094 }
1095 
1096 static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1097 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1098 				  struct flow_cls_offload *flow)
1099 {
1100 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1101 	unsigned long cookie = flow->cookie;
1102 	struct mlx5_ct_entry *entry;
1103 
1104 	spin_lock_bh(&ct_priv->ht_lock);
1105 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1106 	if (!entry) {
1107 		spin_unlock_bh(&ct_priv->ht_lock);
1108 		return -ENOENT;
1109 	}
1110 
1111 	if (!mlx5_tc_ct_entry_valid(entry)) {
1112 		spin_unlock_bh(&ct_priv->ht_lock);
1113 		return -EINVAL;
1114 	}
1115 
1116 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1117 	mlx5_tc_ct_entry_remove_from_tuples(entry);
1118 	spin_unlock_bh(&ct_priv->ht_lock);
1119 
1120 	mlx5_tc_ct_entry_put(entry);
1121 
1122 	return 0;
1123 }
1124 
1125 static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft * ft,struct flow_cls_offload * f)1126 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1127 				    struct flow_cls_offload *f)
1128 {
1129 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1130 	unsigned long cookie = f->cookie;
1131 	struct mlx5_ct_entry *entry;
1132 	u64 lastuse, packets, bytes;
1133 
1134 	spin_lock_bh(&ct_priv->ht_lock);
1135 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1136 	if (!entry) {
1137 		spin_unlock_bh(&ct_priv->ht_lock);
1138 		return -ENOENT;
1139 	}
1140 
1141 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1142 		spin_unlock_bh(&ct_priv->ht_lock);
1143 		return -EINVAL;
1144 	}
1145 
1146 	spin_unlock_bh(&ct_priv->ht_lock);
1147 
1148 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1149 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1150 			  FLOW_ACTION_HW_STATS_DELAYED);
1151 
1152 	mlx5_tc_ct_entry_put(entry);
1153 	return 0;
1154 }
1155 
1156 static int
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type,void * type_data,void * cb_priv)1157 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1158 			      void *cb_priv)
1159 {
1160 	struct flow_cls_offload *f = type_data;
1161 	struct mlx5_ct_ft *ft = cb_priv;
1162 
1163 	if (type != TC_SETUP_CLSFLOWER)
1164 		return -EOPNOTSUPP;
1165 
1166 	switch (f->command) {
1167 	case FLOW_CLS_REPLACE:
1168 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1169 	case FLOW_CLS_DESTROY:
1170 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1171 	case FLOW_CLS_STATS:
1172 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1173 	default:
1174 		break;
1175 	}
1176 
1177 	return -EOPNOTSUPP;
1178 }
1179 
1180 static bool
mlx5_tc_ct_skb_to_tuple(struct sk_buff * skb,struct mlx5_ct_tuple * tuple,u16 zone)1181 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1182 			u16 zone)
1183 {
1184 	struct flow_keys flow_keys;
1185 
1186 	skb_reset_network_header(skb);
1187 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1188 
1189 	tuple->zone = zone;
1190 
1191 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1192 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
1193 		return false;
1194 
1195 	tuple->port.src = flow_keys.ports.src;
1196 	tuple->port.dst = flow_keys.ports.dst;
1197 	tuple->n_proto = flow_keys.basic.n_proto;
1198 	tuple->ip_proto = flow_keys.basic.ip_proto;
1199 
1200 	switch (flow_keys.basic.n_proto) {
1201 	case htons(ETH_P_IP):
1202 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1203 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1204 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1205 		break;
1206 
1207 	case htons(ETH_P_IPV6):
1208 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1209 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1210 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1211 		break;
1212 	default:
1213 		goto out;
1214 	}
1215 
1216 	return true;
1217 
1218 out:
1219 	return false;
1220 }
1221 
mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec * spec)1222 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1223 {
1224 	u32 ctstate = 0, ctstate_mask = 0;
1225 
1226 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1227 					&ctstate, &ctstate_mask);
1228 
1229 	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1230 		return -EOPNOTSUPP;
1231 
1232 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1233 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1234 				    ctstate, ctstate_mask);
1235 
1236 	return 0;
1237 }
1238 
mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv * priv,struct mlx5_ct_attr * ct_attr)1239 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1240 {
1241 	if (!priv || !ct_attr->ct_labels_id)
1242 		return;
1243 
1244 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1245 }
1246 
1247 int
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct mlx5_ct_attr * ct_attr,struct netlink_ext_ack * extack)1248 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1249 		     struct mlx5_flow_spec *spec,
1250 		     struct flow_cls_offload *f,
1251 		     struct mlx5_ct_attr *ct_attr,
1252 		     struct netlink_ext_ack *extack)
1253 {
1254 	bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1255 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1256 	struct flow_dissector_key_ct *mask, *key;
1257 	u32 ctstate = 0, ctstate_mask = 0;
1258 	u16 ct_state_on, ct_state_off;
1259 	u16 ct_state, ct_state_mask;
1260 	struct flow_match_ct match;
1261 	u32 ct_labels[4];
1262 
1263 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1264 		return 0;
1265 
1266 	if (!priv) {
1267 		NL_SET_ERR_MSG_MOD(extack,
1268 				   "offload of ct matching isn't available");
1269 		return -EOPNOTSUPP;
1270 	}
1271 
1272 	flow_rule_match_ct(rule, &match);
1273 
1274 	key = match.key;
1275 	mask = match.mask;
1276 
1277 	ct_state = key->ct_state;
1278 	ct_state_mask = mask->ct_state;
1279 
1280 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1281 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1282 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
1283 			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1284 			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1285 			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1286 		NL_SET_ERR_MSG_MOD(extack,
1287 				   "only ct_state trk, est, new and rpl are supported for offload");
1288 		return -EOPNOTSUPP;
1289 	}
1290 
1291 	ct_state_on = ct_state & ct_state_mask;
1292 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1293 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1294 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1295 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1296 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1297 	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1298 	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1299 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1300 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1301 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1302 	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1303 	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1304 
1305 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1306 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1307 	ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1308 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1309 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1310 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1311 	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1312 	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1313 
1314 	if (rel) {
1315 		NL_SET_ERR_MSG_MOD(extack,
1316 				   "matching on ct_state +rel isn't supported");
1317 		return -EOPNOTSUPP;
1318 	}
1319 
1320 	if (inv) {
1321 		NL_SET_ERR_MSG_MOD(extack,
1322 				   "matching on ct_state +inv isn't supported");
1323 		return -EOPNOTSUPP;
1324 	}
1325 
1326 	if (new) {
1327 		NL_SET_ERR_MSG_MOD(extack,
1328 				   "matching on ct_state +new isn't supported");
1329 		return -EOPNOTSUPP;
1330 	}
1331 
1332 	if (mask->ct_zone)
1333 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1334 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1335 	if (ctstate_mask)
1336 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1337 					    ctstate, ctstate_mask);
1338 	if (mask->ct_mark)
1339 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1340 					    key->ct_mark, mask->ct_mark);
1341 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1342 	    mask->ct_labels[3]) {
1343 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1344 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1345 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1346 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1347 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1348 			return -EOPNOTSUPP;
1349 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1350 					    MLX5_CT_LABELS_MASK);
1351 	}
1352 
1353 	return 0;
1354 }
1355 
1356 int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr,const struct flow_action_entry * act,struct netlink_ext_ack * extack)1357 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1358 			struct mlx5_flow_attr *attr,
1359 			const struct flow_action_entry *act,
1360 			struct netlink_ext_ack *extack)
1361 {
1362 	if (!priv) {
1363 		NL_SET_ERR_MSG_MOD(extack,
1364 				   "offload of ct action isn't available");
1365 		return -EOPNOTSUPP;
1366 	}
1367 
1368 	attr->ct_attr.zone = act->ct.zone;
1369 	attr->ct_attr.ct_action = act->ct.action;
1370 	attr->ct_attr.nf_ft = act->ct.flow_table;
1371 
1372 	return 0;
1373 }
1374 
tc_ct_pre_ct_add_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1375 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1376 				  struct mlx5_tc_ct_pre *pre_ct,
1377 				  bool nat)
1378 {
1379 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1380 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1381 	struct mlx5_core_dev *dev = ct_priv->dev;
1382 	struct mlx5_flow_table *ft = pre_ct->ft;
1383 	struct mlx5_flow_destination dest = {};
1384 	struct mlx5_flow_act flow_act = {};
1385 	struct mlx5_modify_hdr *mod_hdr;
1386 	struct mlx5_flow_handle *rule;
1387 	struct mlx5_flow_spec *spec;
1388 	u32 ctstate;
1389 	u16 zone;
1390 	int err;
1391 
1392 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1393 	if (!spec)
1394 		return -ENOMEM;
1395 
1396 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1397 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1398 					ZONE_TO_REG, zone);
1399 	if (err) {
1400 		ct_dbg("Failed to set zone register mapping");
1401 		goto err_mapping;
1402 	}
1403 
1404 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1405 					   pre_mod_acts.num_actions,
1406 					   pre_mod_acts.actions);
1407 
1408 	if (IS_ERR(mod_hdr)) {
1409 		err = PTR_ERR(mod_hdr);
1410 		ct_dbg("Failed to create pre ct mod hdr");
1411 		goto err_mapping;
1412 	}
1413 	pre_ct->modify_hdr = mod_hdr;
1414 
1415 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1416 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1417 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1418 	flow_act.modify_hdr = mod_hdr;
1419 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1420 
1421 	/* add flow rule */
1422 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1423 				    zone, MLX5_CT_ZONE_MASK);
1424 	ctstate = MLX5_CT_STATE_TRK_BIT;
1425 	if (nat)
1426 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1427 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1428 
1429 	dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1430 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1431 	if (IS_ERR(rule)) {
1432 		err = PTR_ERR(rule);
1433 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1434 		goto err_flow_rule;
1435 	}
1436 	pre_ct->flow_rule = rule;
1437 
1438 	/* add miss rule */
1439 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1440 	rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1441 	if (IS_ERR(rule)) {
1442 		err = PTR_ERR(rule);
1443 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1444 		goto err_miss_rule;
1445 	}
1446 	pre_ct->miss_rule = rule;
1447 
1448 	dealloc_mod_hdr_actions(&pre_mod_acts);
1449 	kvfree(spec);
1450 	return 0;
1451 
1452 err_miss_rule:
1453 	mlx5_del_flow_rules(pre_ct->flow_rule);
1454 err_flow_rule:
1455 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1456 err_mapping:
1457 	dealloc_mod_hdr_actions(&pre_mod_acts);
1458 	kvfree(spec);
1459 	return err;
1460 }
1461 
1462 static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1463 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1464 		       struct mlx5_tc_ct_pre *pre_ct)
1465 {
1466 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1467 	struct mlx5_core_dev *dev = ct_priv->dev;
1468 
1469 	mlx5_del_flow_rules(pre_ct->flow_rule);
1470 	mlx5_del_flow_rules(pre_ct->miss_rule);
1471 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1472 }
1473 
1474 static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1475 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1476 			struct mlx5_tc_ct_pre *pre_ct,
1477 			bool nat)
1478 {
1479 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1480 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1481 	struct mlx5_core_dev *dev = ct_priv->dev;
1482 	struct mlx5_flow_table_attr ft_attr = {};
1483 	struct mlx5_flow_namespace *ns;
1484 	struct mlx5_flow_table *ft;
1485 	struct mlx5_flow_group *g;
1486 	u32 metadata_reg_c_2_mask;
1487 	u32 *flow_group_in;
1488 	void *misc;
1489 	int err;
1490 
1491 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1492 	if (!ns) {
1493 		err = -EOPNOTSUPP;
1494 		ct_dbg("Failed to get flow namespace");
1495 		return err;
1496 	}
1497 
1498 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1499 	if (!flow_group_in)
1500 		return -ENOMEM;
1501 
1502 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1503 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1504 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1505 	ft_attr.max_fte = 2;
1506 	ft_attr.level = 1;
1507 	ft = mlx5_create_flow_table(ns, &ft_attr);
1508 	if (IS_ERR(ft)) {
1509 		err = PTR_ERR(ft);
1510 		ct_dbg("Failed to create pre ct table");
1511 		goto out_free;
1512 	}
1513 	pre_ct->ft = ft;
1514 
1515 	/* create flow group */
1516 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1517 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1518 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1519 		 MLX5_MATCH_MISC_PARAMETERS_2);
1520 
1521 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1522 			    match_criteria.misc_parameters_2);
1523 
1524 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1525 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1526 	if (nat)
1527 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1528 
1529 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1530 		 metadata_reg_c_2_mask);
1531 
1532 	g = mlx5_create_flow_group(ft, flow_group_in);
1533 	if (IS_ERR(g)) {
1534 		err = PTR_ERR(g);
1535 		ct_dbg("Failed to create pre ct group");
1536 		goto err_flow_grp;
1537 	}
1538 	pre_ct->flow_grp = g;
1539 
1540 	/* create miss group */
1541 	memset(flow_group_in, 0, inlen);
1542 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1543 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1544 	g = mlx5_create_flow_group(ft, flow_group_in);
1545 	if (IS_ERR(g)) {
1546 		err = PTR_ERR(g);
1547 		ct_dbg("Failed to create pre ct miss group");
1548 		goto err_miss_grp;
1549 	}
1550 	pre_ct->miss_grp = g;
1551 
1552 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1553 	if (err)
1554 		goto err_add_rules;
1555 
1556 	kvfree(flow_group_in);
1557 	return 0;
1558 
1559 err_add_rules:
1560 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1561 err_miss_grp:
1562 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1563 err_flow_grp:
1564 	mlx5_destroy_flow_table(ft);
1565 out_free:
1566 	kvfree(flow_group_in);
1567 	return err;
1568 }
1569 
1570 static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1571 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1572 		       struct mlx5_tc_ct_pre *pre_ct)
1573 {
1574 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1575 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1576 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1577 	mlx5_destroy_flow_table(pre_ct->ft);
1578 }
1579 
1580 static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft * ft)1581 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1582 {
1583 	int err;
1584 
1585 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1586 	if (err)
1587 		return err;
1588 
1589 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1590 	if (err)
1591 		goto err_pre_ct_nat;
1592 
1593 	return 0;
1594 
1595 err_pre_ct_nat:
1596 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1597 	return err;
1598 }
1599 
1600 static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft * ft)1601 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1602 {
1603 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1604 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1605 }
1606 
1607 /* To avoid false lock dependency warning set the ct_entries_ht lock
1608  * class different than the lock class of the ht being used when deleting
1609  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1610  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1611  * it's different than the ht->mutex here.
1612  */
1613 static struct lock_class_key ct_entries_ht_lock_key;
1614 
1615 static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv * ct_priv,u16 zone,struct nf_flowtable * nf_ft)1616 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1617 		     struct nf_flowtable *nf_ft)
1618 {
1619 	struct mlx5_ct_ft *ft;
1620 	int err;
1621 
1622 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1623 	if (ft) {
1624 		refcount_inc(&ft->refcount);
1625 		return ft;
1626 	}
1627 
1628 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1629 	if (!ft)
1630 		return ERR_PTR(-ENOMEM);
1631 
1632 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1633 	if (err)
1634 		goto err_mapping;
1635 
1636 	ft->zone = zone;
1637 	ft->nf_ft = nf_ft;
1638 	ft->ct_priv = ct_priv;
1639 	refcount_set(&ft->refcount, 1);
1640 
1641 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1642 	if (err)
1643 		goto err_alloc_pre_ct;
1644 
1645 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1646 	if (err)
1647 		goto err_init;
1648 
1649 	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1650 
1651 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1652 				     zone_params);
1653 	if (err)
1654 		goto err_insert;
1655 
1656 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1657 					   mlx5_tc_ct_block_flow_offload, ft);
1658 	if (err)
1659 		goto err_add_cb;
1660 
1661 	return ft;
1662 
1663 err_add_cb:
1664 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1665 err_insert:
1666 	rhashtable_destroy(&ft->ct_entries_ht);
1667 err_init:
1668 	mlx5_tc_ct_free_pre_ct_tables(ft);
1669 err_alloc_pre_ct:
1670 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1671 err_mapping:
1672 	kfree(ft);
1673 	return ERR_PTR(err);
1674 }
1675 
1676 static void
mlx5_tc_ct_flush_ft_entry(void * ptr,void * arg)1677 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1678 {
1679 	struct mlx5_ct_entry *entry = ptr;
1680 
1681 	mlx5_tc_ct_entry_put(entry);
1682 }
1683 
1684 static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_ft * ft)1685 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1686 {
1687 	if (!refcount_dec_and_test(&ft->refcount))
1688 		return;
1689 
1690 	nf_flow_table_offload_del_cb(ft->nf_ft,
1691 				     mlx5_tc_ct_block_flow_offload, ft);
1692 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1693 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1694 				    mlx5_tc_ct_flush_ft_entry,
1695 				    ct_priv);
1696 	mlx5_tc_ct_free_pre_ct_tables(ft);
1697 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1698 	kfree(ft);
1699 }
1700 
1701 /* We translate the tc filter with CT action to the following HW model:
1702  *
1703  * +---------------------+
1704  * + ft prio (tc chain) +
1705  * + original match      +
1706  * +---------------------+
1707  *      | set chain miss mapping
1708  *      | set fte_id
1709  *      | set tunnel_id
1710  *      | do decap
1711  *      v
1712  * +---------------------+
1713  * + pre_ct/pre_ct_nat   +  if matches     +-------------------------+
1714  * + zone+nat match      +---------------->+ post_act (see below) +
1715  * +---------------------+  set zone       +-------------------------+
1716  *      | set zone
1717  *      v
1718  * +--------------------+
1719  * + CT (nat or no nat) +
1720  * + tuple + zone match +
1721  * +--------------------+
1722  *      | set mark
1723  *      | set labels_id
1724  *      | set established
1725  *	| set zone_restore
1726  *      | do nat (if needed)
1727  *      v
1728  * +--------------+
1729  * + post_act  + original filter actions
1730  * + fte_id match +------------------------>
1731  * +--------------+
1732  */
1733 static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * orig_spec,struct mlx5_flow_attr * attr)1734 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1735 			  struct mlx5e_tc_flow *flow,
1736 			  struct mlx5_flow_spec *orig_spec,
1737 			  struct mlx5_flow_attr *attr)
1738 {
1739 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1740 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1741 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1742 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1743 	struct mlx5e_post_act_handle *handle;
1744 	struct mlx5_flow_attr *pre_ct_attr;
1745 	struct mlx5_modify_hdr *mod_hdr;
1746 	struct mlx5_ct_flow *ct_flow;
1747 	int chain_mapping = 0, err;
1748 	struct mlx5_ct_ft *ft;
1749 
1750 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1751 	if (!ct_flow) {
1752 		kfree(ct_flow);
1753 		return ERR_PTR(-ENOMEM);
1754 	}
1755 
1756 	/* Register for CT established events */
1757 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1758 				  attr->ct_attr.nf_ft);
1759 	if (IS_ERR(ft)) {
1760 		err = PTR_ERR(ft);
1761 		ct_dbg("Failed to register to ft callback");
1762 		goto err_ft;
1763 	}
1764 	ct_flow->ft = ft;
1765 
1766 	handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
1767 	if (IS_ERR(handle)) {
1768 		err = PTR_ERR(handle);
1769 		ct_dbg("Failed to allocate post action handle");
1770 		goto err_post_act_handle;
1771 	}
1772 	ct_flow->post_act_handle = handle;
1773 
1774 	/* Base flow attributes of both rules on original rule attribute */
1775 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1776 	if (!ct_flow->pre_ct_attr) {
1777 		err = -ENOMEM;
1778 		goto err_alloc_pre;
1779 	}
1780 
1781 	pre_ct_attr = ct_flow->pre_ct_attr;
1782 	memcpy(pre_ct_attr, attr, attr_sz);
1783 
1784 	/* Modify the original rule's action to fwd and modify, leave decap */
1785 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1786 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1787 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1788 
1789 	/* Write chain miss tag for miss in ct table as we
1790 	 * don't go though all prios of this chain as normal tc rules
1791 	 * miss.
1792 	 */
1793 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1794 					    &chain_mapping);
1795 	if (err) {
1796 		ct_dbg("Failed to get chain register mapping for chain");
1797 		goto err_get_chain;
1798 	}
1799 	ct_flow->chain_mapping = chain_mapping;
1800 
1801 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1802 					CHAIN_TO_REG, chain_mapping);
1803 	if (err) {
1804 		ct_dbg("Failed to set chain register mapping");
1805 		goto err_mapping;
1806 	}
1807 
1808 	err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
1809 	if (err) {
1810 		ct_dbg("Failed to set post action handle");
1811 		goto err_mapping;
1812 	}
1813 
1814 	/* If original flow is decap, we do it before going into ct table
1815 	 * so add a rewrite for the tunnel match_id.
1816 	 */
1817 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1818 	    attr->chain == 0) {
1819 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1820 
1821 		err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1822 						ct_priv->ns_type,
1823 						TUNNEL_TO_REG,
1824 						tun_id);
1825 		if (err) {
1826 			ct_dbg("Failed to set tunnel register mapping");
1827 			goto err_mapping;
1828 		}
1829 	}
1830 
1831 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1832 					   pre_mod_acts.num_actions,
1833 					   pre_mod_acts.actions);
1834 	if (IS_ERR(mod_hdr)) {
1835 		err = PTR_ERR(mod_hdr);
1836 		ct_dbg("Failed to create pre ct mod hdr");
1837 		goto err_mapping;
1838 	}
1839 	pre_ct_attr->modify_hdr = mod_hdr;
1840 
1841 	/* Change original rule point to ct table */
1842 	pre_ct_attr->dest_chain = 0;
1843 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1844 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1845 						   pre_ct_attr);
1846 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1847 		err = PTR_ERR(ct_flow->pre_ct_rule);
1848 		ct_dbg("Failed to add pre ct rule");
1849 		goto err_insert_orig;
1850 	}
1851 
1852 	attr->ct_attr.ct_flow = ct_flow;
1853 	dealloc_mod_hdr_actions(&pre_mod_acts);
1854 
1855 	return ct_flow->pre_ct_rule;
1856 
1857 err_insert_orig:
1858 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1859 err_mapping:
1860 	dealloc_mod_hdr_actions(&pre_mod_acts);
1861 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1862 err_get_chain:
1863 	kfree(ct_flow->pre_ct_attr);
1864 err_alloc_pre:
1865 	mlx5e_tc_post_act_del(ct_priv->post_act, handle);
1866 err_post_act_handle:
1867 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1868 err_ft:
1869 	kfree(ct_flow);
1870 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1871 	return ERR_PTR(err);
1872 }
1873 
1874 static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_spec * orig_spec,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_acts)1875 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1876 				struct mlx5_flow_spec *orig_spec,
1877 				struct mlx5_flow_attr *attr,
1878 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1879 {
1880 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1881 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1882 	struct mlx5_flow_attr *pre_ct_attr;
1883 	struct mlx5_modify_hdr *mod_hdr;
1884 	struct mlx5_flow_handle *rule;
1885 	struct mlx5_ct_flow *ct_flow;
1886 	int err;
1887 
1888 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1889 	if (!ct_flow)
1890 		return ERR_PTR(-ENOMEM);
1891 
1892 	/* Base esw attributes on original rule attribute */
1893 	pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1894 	if (!pre_ct_attr) {
1895 		err = -ENOMEM;
1896 		goto err_attr;
1897 	}
1898 
1899 	memcpy(pre_ct_attr, attr, attr_sz);
1900 
1901 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1902 	if (err) {
1903 		ct_dbg("Failed to set register for ct clear");
1904 		goto err_set_registers;
1905 	}
1906 
1907 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1908 					   mod_acts->num_actions,
1909 					   mod_acts->actions);
1910 	if (IS_ERR(mod_hdr)) {
1911 		err = PTR_ERR(mod_hdr);
1912 		ct_dbg("Failed to add create ct clear mod hdr");
1913 		goto err_set_registers;
1914 	}
1915 
1916 	pre_ct_attr->modify_hdr = mod_hdr;
1917 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1918 
1919 	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1920 	if (IS_ERR(rule)) {
1921 		err = PTR_ERR(rule);
1922 		ct_dbg("Failed to add ct clear rule");
1923 		goto err_insert;
1924 	}
1925 
1926 	attr->ct_attr.ct_flow = ct_flow;
1927 	ct_flow->pre_ct_attr = pre_ct_attr;
1928 	ct_flow->pre_ct_rule = rule;
1929 	return rule;
1930 
1931 err_insert:
1932 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1933 err_set_registers:
1934 	netdev_warn(priv->netdev,
1935 		    "Failed to offload ct clear flow, err %d\n", err);
1936 	kfree(pre_ct_attr);
1937 err_attr:
1938 	kfree(ct_flow);
1939 
1940 	return ERR_PTR(err);
1941 }
1942 
1943 struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts)1944 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1945 			struct mlx5e_tc_flow *flow,
1946 			struct mlx5_flow_spec *spec,
1947 			struct mlx5_flow_attr *attr,
1948 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1949 {
1950 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1951 	struct mlx5_flow_handle *rule;
1952 
1953 	if (!priv)
1954 		return ERR_PTR(-EOPNOTSUPP);
1955 
1956 	mutex_lock(&priv->control_lock);
1957 
1958 	if (clear_action)
1959 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1960 	else
1961 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1962 	mutex_unlock(&priv->control_lock);
1963 
1964 	return rule;
1965 }
1966 
1967 static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_flow * flow,struct mlx5_ct_flow * ct_flow)1968 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1969 			 struct mlx5e_tc_flow *flow,
1970 			 struct mlx5_ct_flow *ct_flow)
1971 {
1972 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1973 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1974 
1975 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1976 			    pre_ct_attr);
1977 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1978 
1979 	if (ct_flow->post_act_handle) {
1980 		mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1981 		mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
1982 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1983 	}
1984 
1985 	kfree(ct_flow->pre_ct_attr);
1986 	kfree(ct_flow);
1987 }
1988 
1989 void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1990 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1991 		       struct mlx5e_tc_flow *flow,
1992 		       struct mlx5_flow_attr *attr)
1993 {
1994 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1995 
1996 	/* We are called on error to clean up stuff from parsing
1997 	 * but we don't have anything for now
1998 	 */
1999 	if (!ct_flow)
2000 		return;
2001 
2002 	mutex_lock(&priv->control_lock);
2003 	__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
2004 	mutex_unlock(&priv->control_lock);
2005 }
2006 
2007 static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch * esw,const char ** err_msg)2008 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2009 				  const char **err_msg)
2010 {
2011 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2012 		/* vlan workaround should be avoided for multi chain rules.
2013 		 * This is just a sanity check as pop vlan action should
2014 		 * be supported by any FW that supports ignore_flow_level
2015 		 */
2016 
2017 		*err_msg = "firmware vlan actions support is missing";
2018 		return -EOPNOTSUPP;
2019 	}
2020 
2021 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2022 				    fdb_modify_header_fwd_to_table)) {
2023 		/* CT always writes to registers which are mod header actions.
2024 		 * Therefore, mod header and goto is required
2025 		 */
2026 
2027 		*err_msg = "firmware fwd and modify support is missing";
2028 		return -EOPNOTSUPP;
2029 	}
2030 
2031 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2032 		*err_msg = "register loopback isn't supported";
2033 		return -EOPNOTSUPP;
2034 	}
2035 
2036 	return 0;
2037 }
2038 
2039 static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv * priv,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act,const char ** err_msg)2040 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2041 			      enum mlx5_flow_namespace_type ns_type,
2042 			      struct mlx5e_post_act *post_act,
2043 			      const char **err_msg)
2044 {
2045 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2046 
2047 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2048 	/* cannot restore chain ID on HW miss */
2049 
2050 	*err_msg = "tc skb extension missing";
2051 	return -EOPNOTSUPP;
2052 #endif
2053 	if (IS_ERR_OR_NULL(post_act)) {
2054 		*err_msg = "tc ct offload not supported, post action is missing";
2055 		return -EOPNOTSUPP;
2056 	}
2057 
2058 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2059 		return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2060 	return 0;
2061 }
2062 
2063 #define INIT_ERR_PREFIX "tc ct offload init failed"
2064 
2065 struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv * priv,struct mlx5_fs_chains * chains,struct mod_hdr_tbl * mod_hdr,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2066 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2067 		struct mod_hdr_tbl *mod_hdr,
2068 		enum mlx5_flow_namespace_type ns_type,
2069 		struct mlx5e_post_act *post_act)
2070 {
2071 	struct mlx5_tc_ct_priv *ct_priv;
2072 	struct mlx5_core_dev *dev;
2073 	const char *msg;
2074 	u64 mapping_id;
2075 	int err;
2076 
2077 	dev = priv->mdev;
2078 	err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg);
2079 	if (err) {
2080 		mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg);
2081 		goto err_support;
2082 	}
2083 
2084 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2085 	if (!ct_priv)
2086 		goto err_alloc;
2087 
2088 	mapping_id = mlx5_query_nic_system_image_guid(dev);
2089 
2090 	ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2091 						      sizeof(u16), 0, true);
2092 	if (IS_ERR(ct_priv->zone_mapping)) {
2093 		err = PTR_ERR(ct_priv->zone_mapping);
2094 		goto err_mapping_zone;
2095 	}
2096 
2097 	ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2098 							sizeof(u32) * 4, 0, true);
2099 	if (IS_ERR(ct_priv->labels_mapping)) {
2100 		err = PTR_ERR(ct_priv->labels_mapping);
2101 		goto err_mapping_labels;
2102 	}
2103 
2104 	spin_lock_init(&ct_priv->ht_lock);
2105 	ct_priv->ns_type = ns_type;
2106 	ct_priv->chains = chains;
2107 	ct_priv->netdev = priv->netdev;
2108 	ct_priv->dev = priv->mdev;
2109 	ct_priv->mod_hdr_tbl = mod_hdr;
2110 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2111 	if (IS_ERR(ct_priv->ct)) {
2112 		err = PTR_ERR(ct_priv->ct);
2113 		mlx5_core_warn(dev,
2114 			       "%s, failed to create ct table err: %d\n",
2115 			       INIT_ERR_PREFIX, err);
2116 		goto err_ct_tbl;
2117 	}
2118 
2119 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2120 	if (IS_ERR(ct_priv->ct_nat)) {
2121 		err = PTR_ERR(ct_priv->ct_nat);
2122 		mlx5_core_warn(dev,
2123 			       "%s, failed to create ct nat table err: %d\n",
2124 			       INIT_ERR_PREFIX, err);
2125 		goto err_ct_nat_tbl;
2126 	}
2127 
2128 	ct_priv->post_act = post_act;
2129 	mutex_init(&ct_priv->control_lock);
2130 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
2131 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2132 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2133 
2134 	return ct_priv;
2135 
2136 err_ct_nat_tbl:
2137 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2138 err_ct_tbl:
2139 	mapping_destroy(ct_priv->labels_mapping);
2140 err_mapping_labels:
2141 	mapping_destroy(ct_priv->zone_mapping);
2142 err_mapping_zone:
2143 	kfree(ct_priv);
2144 err_alloc:
2145 err_support:
2146 
2147 	return NULL;
2148 }
2149 
2150 void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv * ct_priv)2151 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2152 {
2153 	struct mlx5_fs_chains *chains;
2154 
2155 	if (!ct_priv)
2156 		return;
2157 
2158 	chains = ct_priv->chains;
2159 
2160 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2161 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2162 	mapping_destroy(ct_priv->zone_mapping);
2163 	mapping_destroy(ct_priv->labels_mapping);
2164 
2165 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2166 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2167 	rhashtable_destroy(&ct_priv->zone_ht);
2168 	mutex_destroy(&ct_priv->control_lock);
2169 	kfree(ct_priv);
2170 }
2171 
2172 bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv * ct_priv,struct sk_buff * skb,u8 zone_restore_id)2173 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2174 			 struct sk_buff *skb, u8 zone_restore_id)
2175 {
2176 	struct mlx5_ct_tuple tuple = {};
2177 	struct mlx5_ct_entry *entry;
2178 	u16 zone;
2179 
2180 	if (!ct_priv || !zone_restore_id)
2181 		return true;
2182 
2183 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2184 		return false;
2185 
2186 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2187 		return false;
2188 
2189 	spin_lock(&ct_priv->ht_lock);
2190 
2191 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2192 	if (!entry) {
2193 		spin_unlock(&ct_priv->ht_lock);
2194 		return false;
2195 	}
2196 
2197 	if (IS_ERR(entry)) {
2198 		spin_unlock(&ct_priv->ht_lock);
2199 		return false;
2200 	}
2201 	spin_unlock(&ct_priv->ht_lock);
2202 
2203 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2204 	__mlx5_tc_ct_entry_put(entry);
2205 
2206 	return true;
2207 }
2208