1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/arp.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include "en.h"
47 #include "en/tc/post_act.h"
48 #include "en_rep.h"
49 #include "en/rep/tc.h"
50 #include "en/rep/neigh.h"
51 #include "en_tc.h"
52 #include "eswitch.h"
53 #include "fs_core.h"
54 #include "en/port.h"
55 #include "en/tc_tun.h"
56 #include "en/mapping.h"
57 #include "en/tc_ct.h"
58 #include "en/mod_hdr.h"
59 #include "en/tc_tun_encap.h"
60 #include "en/tc/sample.h"
61 #include "en/tc/act/act.h"
62 #include "en/tc/post_meter.h"
63 #include "lib/devcom.h"
64 #include "lib/geneve.h"
65 #include "lib/fs_chains.h"
66 #include "diag/en_tc_tracepoint.h"
67 #include <asm/div64.h>
68 #include "lag/lag.h"
69 #include "lag/mp.h"
70 
71 #define MLX5E_TC_TABLE_NUM_GROUPS 4
72 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
73 
74 struct mlx5e_tc_table {
75 	/* Protects the dynamic assignment of the t parameter
76 	 * which is the nic tc root table.
77 	 */
78 	struct mutex			t_lock;
79 	struct mlx5e_priv		*priv;
80 	struct mlx5_flow_table		*t;
81 	struct mlx5_flow_table		*miss_t;
82 	struct mlx5_fs_chains           *chains;
83 	struct mlx5e_post_act		*post_act;
84 
85 	struct rhashtable               ht;
86 
87 	struct mod_hdr_tbl mod_hdr;
88 	struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
89 	DECLARE_HASHTABLE(hairpin_tbl, 8);
90 
91 	struct notifier_block     netdevice_nb;
92 	struct netdev_net_notifier	netdevice_nn;
93 
94 	struct mlx5_tc_ct_priv         *ct;
95 	struct mapping_ctx             *mapping;
96 };
97 
98 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
99 	[CHAIN_TO_REG] = {
100 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
101 		.moffset = 0,
102 		.mlen = 16,
103 	},
104 	[VPORT_TO_REG] = {
105 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
106 		.moffset = 16,
107 		.mlen = 16,
108 	},
109 	[TUNNEL_TO_REG] = {
110 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
111 		.moffset = 8,
112 		.mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
113 		.soffset = MLX5_BYTE_OFF(fte_match_param,
114 					 misc_parameters_2.metadata_reg_c_1),
115 	},
116 	[ZONE_TO_REG] = zone_to_reg_ct,
117 	[ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
118 	[CTSTATE_TO_REG] = ctstate_to_reg_ct,
119 	[MARK_TO_REG] = mark_to_reg_ct,
120 	[LABELS_TO_REG] = labels_to_reg_ct,
121 	[FTEID_TO_REG] = fteid_to_reg_ct,
122 	/* For NIC rules we store the restore metadata directly
123 	 * into reg_b that is passed to SW since we don't
124 	 * jump between steering domains.
125 	 */
126 	[NIC_CHAIN_TO_REG] = {
127 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
128 		.moffset = 0,
129 		.mlen = 16,
130 	},
131 	[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
132 	[PACKET_COLOR_TO_REG] = packet_color_to_reg,
133 };
134 
mlx5e_tc_table_alloc(void)135 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
136 {
137 	struct mlx5e_tc_table *tc;
138 
139 	tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
140 	return tc ? tc : ERR_PTR(-ENOMEM);
141 }
142 
mlx5e_tc_table_free(struct mlx5e_tc_table * tc)143 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
144 {
145 	kvfree(tc);
146 }
147 
mlx5e_nic_chains(struct mlx5e_tc_table * tc)148 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
149 {
150 	return tc->chains;
151 }
152 
153 /* To avoid false lock dependency warning set the tc_ht lock
154  * class different than the lock class of the ht being used when deleting
155  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
156  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
157  * it's different than the ht->mutex here.
158  */
159 static struct lock_class_key tc_ht_lock_key;
160 
161 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
162 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
163 
164 void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 val,u32 mask)165 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
166 			    enum mlx5e_tc_attr_to_reg type,
167 			    u32 val,
168 			    u32 mask)
169 {
170 	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
171 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
172 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
173 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
174 	u32 max_mask = GENMASK(match_len - 1, 0);
175 	__be32 curr_mask_be, curr_val_be;
176 	u32 curr_mask, curr_val;
177 
178 	fmask = headers_c + soffset;
179 	fval = headers_v + soffset;
180 
181 	memcpy(&curr_mask_be, fmask, 4);
182 	memcpy(&curr_val_be, fval, 4);
183 
184 	curr_mask = be32_to_cpu(curr_mask_be);
185 	curr_val = be32_to_cpu(curr_val_be);
186 
187 	//move to correct offset
188 	WARN_ON(mask > max_mask);
189 	mask <<= moffset;
190 	val <<= moffset;
191 	max_mask <<= moffset;
192 
193 	//zero val and mask
194 	curr_mask &= ~max_mask;
195 	curr_val &= ~max_mask;
196 
197 	//add current to mask
198 	curr_mask |= mask;
199 	curr_val |= val;
200 
201 	//back to be32 and write
202 	curr_mask_be = cpu_to_be32(curr_mask);
203 	curr_val_be = cpu_to_be32(curr_val);
204 
205 	memcpy(fmask, &curr_mask_be, 4);
206 	memcpy(fval, &curr_val_be, 4);
207 
208 	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
209 }
210 
211 void
mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 * val,u32 * mask)212 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
213 				enum mlx5e_tc_attr_to_reg type,
214 				u32 *val,
215 				u32 *mask)
216 {
217 	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
218 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
219 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
220 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
221 	u32 max_mask = GENMASK(match_len - 1, 0);
222 	__be32 curr_mask_be, curr_val_be;
223 	u32 curr_mask, curr_val;
224 
225 	fmask = headers_c + soffset;
226 	fval = headers_v + soffset;
227 
228 	memcpy(&curr_mask_be, fmask, 4);
229 	memcpy(&curr_val_be, fval, 4);
230 
231 	curr_mask = be32_to_cpu(curr_mask_be);
232 	curr_val = be32_to_cpu(curr_val_be);
233 
234 	*mask = (curr_mask >> moffset) & max_mask;
235 	*val = (curr_val >> moffset) & max_mask;
236 }
237 
238 int
mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)239 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
240 				     struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
241 				     enum mlx5_flow_namespace_type ns,
242 				     enum mlx5e_tc_attr_to_reg type,
243 				     u32 data)
244 {
245 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
246 	int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
247 	int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
248 	char *modact;
249 	int err;
250 
251 	modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
252 	if (IS_ERR(modact))
253 		return PTR_ERR(modact);
254 
255 	/* Firmware has 5bit length field and 0 means 32bits */
256 	if (mlen == 32)
257 		mlen = 0;
258 
259 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
260 	MLX5_SET(set_action_in, modact, field, mfield);
261 	MLX5_SET(set_action_in, modact, offset, moffset);
262 	MLX5_SET(set_action_in, modact, length, mlen);
263 	MLX5_SET(set_action_in, modact, data, data);
264 	err = mod_hdr_acts->num_actions;
265 	mod_hdr_acts->num_actions++;
266 
267 	return err;
268 }
269 
270 struct mlx5e_tc_int_port_priv *
mlx5e_get_int_port_priv(struct mlx5e_priv * priv)271 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
272 {
273 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
274 	struct mlx5_rep_uplink_priv *uplink_priv;
275 	struct mlx5e_rep_priv *uplink_rpriv;
276 
277 	if (is_mdev_switchdev_mode(priv->mdev)) {
278 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
279 		uplink_priv = &uplink_rpriv->uplink_priv;
280 
281 		return uplink_priv->int_port_priv;
282 	}
283 
284 	return NULL;
285 }
286 
287 struct mlx5e_flow_meters *
mlx5e_get_flow_meters(struct mlx5_core_dev * dev)288 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
289 {
290 	struct mlx5_eswitch *esw = dev->priv.eswitch;
291 	struct mlx5_rep_uplink_priv *uplink_priv;
292 	struct mlx5e_rep_priv *uplink_rpriv;
293 	struct mlx5e_priv *priv;
294 
295 	if (is_mdev_switchdev_mode(dev)) {
296 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
297 		uplink_priv = &uplink_rpriv->uplink_priv;
298 		priv = netdev_priv(uplink_rpriv->netdev);
299 		if (!uplink_priv->flow_meters)
300 			uplink_priv->flow_meters =
301 				mlx5e_flow_meters_init(priv,
302 						       MLX5_FLOW_NAMESPACE_FDB,
303 						       uplink_priv->post_act);
304 		if (!IS_ERR(uplink_priv->flow_meters))
305 			return uplink_priv->flow_meters;
306 	}
307 
308 	return NULL;
309 }
310 
311 static struct mlx5_tc_ct_priv *
get_ct_priv(struct mlx5e_priv * priv)312 get_ct_priv(struct mlx5e_priv *priv)
313 {
314 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
315 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
316 	struct mlx5_rep_uplink_priv *uplink_priv;
317 	struct mlx5e_rep_priv *uplink_rpriv;
318 
319 	if (is_mdev_switchdev_mode(priv->mdev)) {
320 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
321 		uplink_priv = &uplink_rpriv->uplink_priv;
322 
323 		return uplink_priv->ct_priv;
324 	}
325 
326 	return tc->ct;
327 }
328 
329 static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv * priv)330 get_sample_priv(struct mlx5e_priv *priv)
331 {
332 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
333 	struct mlx5_rep_uplink_priv *uplink_priv;
334 	struct mlx5e_rep_priv *uplink_rpriv;
335 
336 	if (is_mdev_switchdev_mode(priv->mdev)) {
337 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
338 		uplink_priv = &uplink_rpriv->uplink_priv;
339 
340 		return uplink_priv->tc_psample;
341 	}
342 
343 	return NULL;
344 }
345 
346 static struct mlx5e_post_act *
get_post_action(struct mlx5e_priv * priv)347 get_post_action(struct mlx5e_priv *priv)
348 {
349 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
350 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
351 	struct mlx5_rep_uplink_priv *uplink_priv;
352 	struct mlx5e_rep_priv *uplink_rpriv;
353 
354 	if (is_mdev_switchdev_mode(priv->mdev)) {
355 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
356 		uplink_priv = &uplink_rpriv->uplink_priv;
357 
358 		return uplink_priv->post_act;
359 	}
360 
361 	return tc->post_act;
362 }
363 
364 struct mlx5_flow_handle *
mlx5_tc_rule_insert(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)365 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
366 		    struct mlx5_flow_spec *spec,
367 		    struct mlx5_flow_attr *attr)
368 {
369 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
370 
371 	if (is_mdev_switchdev_mode(priv->mdev))
372 		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
373 
374 	return	mlx5e_add_offloaded_nic_rule(priv, spec, attr);
375 }
376 
377 void
mlx5_tc_rule_delete(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)378 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
379 		    struct mlx5_flow_handle *rule,
380 		    struct mlx5_flow_attr *attr)
381 {
382 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
383 
384 	if (is_mdev_switchdev_mode(priv->mdev)) {
385 		mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
386 		return;
387 	}
388 
389 	mlx5e_del_offloaded_nic_rule(priv, rule, attr);
390 }
391 
392 static bool
is_flow_meter_action(struct mlx5_flow_attr * attr)393 is_flow_meter_action(struct mlx5_flow_attr *attr)
394 {
395 	return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
396 		(attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
397 }
398 
399 static int
mlx5e_tc_add_flow_meter(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr)400 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
401 			struct mlx5_flow_attr *attr)
402 {
403 	struct mlx5e_post_act *post_act = get_post_action(priv);
404 	struct mlx5e_post_meter_priv *post_meter;
405 	enum mlx5_flow_namespace_type ns_type;
406 	struct mlx5e_flow_meter_handle *meter;
407 
408 	meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
409 	if (IS_ERR(meter)) {
410 		mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
411 		return PTR_ERR(meter);
412 	}
413 
414 	ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
415 	post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter,
416 					   meter->red_counter);
417 	if (IS_ERR(post_meter)) {
418 		mlx5_core_err(priv->mdev, "Failed to init post meter\n");
419 		goto err_meter_init;
420 	}
421 
422 	attr->meter_attr.meter = meter;
423 	attr->meter_attr.post_meter = post_meter;
424 	attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
425 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
426 
427 	return 0;
428 
429 err_meter_init:
430 	mlx5e_tc_meter_put(meter);
431 	return PTR_ERR(post_meter);
432 }
433 
434 static void
mlx5e_tc_del_flow_meter(struct mlx5_flow_attr * attr)435 mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr)
436 {
437 	mlx5e_post_meter_cleanup(attr->meter_attr.post_meter);
438 	mlx5e_tc_meter_put(attr->meter_attr.meter);
439 }
440 
441 struct mlx5_flow_handle *
mlx5e_tc_rule_offload(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)442 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
443 		      struct mlx5_flow_spec *spec,
444 		      struct mlx5_flow_attr *attr)
445 {
446 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
447 	int err;
448 
449 	if (attr->flags & MLX5_ATTR_FLAG_CT) {
450 		struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
451 			&attr->parse_attr->mod_hdr_acts;
452 
453 		return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
454 					       spec, attr,
455 					       mod_hdr_acts);
456 	}
457 
458 	if (!is_mdev_switchdev_mode(priv->mdev))
459 		return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
460 
461 	if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
462 		return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
463 
464 	if (is_flow_meter_action(attr)) {
465 		err = mlx5e_tc_add_flow_meter(priv, attr);
466 		if (err)
467 			return ERR_PTR(err);
468 	}
469 
470 	return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
471 }
472 
473 void
mlx5e_tc_rule_unoffload(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)474 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
475 			struct mlx5_flow_handle *rule,
476 			struct mlx5_flow_attr *attr)
477 {
478 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
479 
480 	if (attr->flags & MLX5_ATTR_FLAG_CT) {
481 		mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
482 		return;
483 	}
484 
485 	if (!is_mdev_switchdev_mode(priv->mdev)) {
486 		mlx5e_del_offloaded_nic_rule(priv, rule, attr);
487 		return;
488 	}
489 
490 	if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
491 		mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
492 		return;
493 	}
494 
495 	mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
496 
497 	if (attr->meter_attr.meter)
498 		mlx5e_tc_del_flow_meter(attr);
499 }
500 
501 int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)502 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
503 			  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
504 			  enum mlx5_flow_namespace_type ns,
505 			  enum mlx5e_tc_attr_to_reg type,
506 			  u32 data)
507 {
508 	int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
509 
510 	return ret < 0 ? ret : 0;
511 }
512 
mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5e_tc_attr_to_reg type,int act_id,u32 data)513 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
514 					  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
515 					  enum mlx5e_tc_attr_to_reg type,
516 					  int act_id, u32 data)
517 {
518 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
519 	int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
520 	int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
521 	char *modact;
522 
523 	modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
524 
525 	/* Firmware has 5bit length field and 0 means 32bits */
526 	if (mlen == 32)
527 		mlen = 0;
528 
529 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
530 	MLX5_SET(set_action_in, modact, field, mfield);
531 	MLX5_SET(set_action_in, modact, offset, moffset);
532 	MLX5_SET(set_action_in, modact, length, mlen);
533 	MLX5_SET(set_action_in, modact, data, data);
534 }
535 
536 struct mlx5e_hairpin {
537 	struct mlx5_hairpin *pair;
538 
539 	struct mlx5_core_dev *func_mdev;
540 	struct mlx5e_priv *func_priv;
541 	u32 tdn;
542 	struct mlx5e_tir direct_tir;
543 
544 	int num_channels;
545 	struct mlx5e_rqt indir_rqt;
546 	struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
547 	struct mlx5_ttc_table *ttc;
548 };
549 
550 struct mlx5e_hairpin_entry {
551 	/* a node of a hash table which keeps all the  hairpin entries */
552 	struct hlist_node hairpin_hlist;
553 
554 	/* protects flows list */
555 	spinlock_t flows_lock;
556 	/* flows sharing the same hairpin */
557 	struct list_head flows;
558 	/* hpe's that were not fully initialized when dead peer update event
559 	 * function traversed them.
560 	 */
561 	struct list_head dead_peer_wait_list;
562 
563 	u16 peer_vhca_id;
564 	u8 prio;
565 	struct mlx5e_hairpin *hp;
566 	refcount_t refcnt;
567 	struct completion res_ready;
568 };
569 
570 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
571 			      struct mlx5e_tc_flow *flow);
572 
mlx5e_flow_get(struct mlx5e_tc_flow * flow)573 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
574 {
575 	if (!flow || !refcount_inc_not_zero(&flow->refcnt))
576 		return ERR_PTR(-EINVAL);
577 	return flow;
578 }
579 
mlx5e_flow_put(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)580 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
581 {
582 	if (refcount_dec_and_test(&flow->refcnt)) {
583 		mlx5e_tc_del_flow(priv, flow);
584 		kfree_rcu(flow, rcu_head);
585 	}
586 }
587 
mlx5e_is_eswitch_flow(struct mlx5e_tc_flow * flow)588 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
589 {
590 	return flow_flag_test(flow, ESWITCH);
591 }
592 
mlx5e_is_ft_flow(struct mlx5e_tc_flow * flow)593 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
594 {
595 	return flow_flag_test(flow, FT);
596 }
597 
mlx5e_is_offloaded_flow(struct mlx5e_tc_flow * flow)598 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
599 {
600 	return flow_flag_test(flow, OFFLOADED);
601 }
602 
mlx5e_get_flow_namespace(struct mlx5e_tc_flow * flow)603 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
604 {
605 	return mlx5e_is_eswitch_flow(flow) ?
606 		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
607 }
608 
609 static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)610 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
611 {
612 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
613 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
614 
615 	return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
616 		&esw->offloads.mod_hdr :
617 		&tc->mod_hdr;
618 }
619 
mlx5e_attach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_tc_flow_parse_attr * parse_attr)620 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
621 				struct mlx5e_tc_flow *flow,
622 				struct mlx5e_tc_flow_parse_attr *parse_attr)
623 {
624 	struct mlx5_modify_hdr *modify_hdr;
625 	struct mlx5e_mod_hdr_handle *mh;
626 
627 	mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
628 				  mlx5e_get_flow_namespace(flow),
629 				  &parse_attr->mod_hdr_acts);
630 	if (IS_ERR(mh))
631 		return PTR_ERR(mh);
632 
633 	modify_hdr = mlx5e_mod_hdr_get(mh);
634 	flow->attr->modify_hdr = modify_hdr;
635 	flow->mh = mh;
636 
637 	return 0;
638 }
639 
mlx5e_detach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)640 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
641 				 struct mlx5e_tc_flow *flow)
642 {
643 	/* flow wasn't fully initialized */
644 	if (!flow->mh)
645 		return;
646 
647 	mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
648 			     flow->mh);
649 	flow->mh = NULL;
650 }
651 
652 static
mlx5e_hairpin_get_mdev(struct net * net,int ifindex)653 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
654 {
655 	struct mlx5_core_dev *mdev;
656 	struct net_device *netdev;
657 	struct mlx5e_priv *priv;
658 
659 	netdev = dev_get_by_index(net, ifindex);
660 	if (!netdev)
661 		return ERR_PTR(-ENODEV);
662 
663 	priv = netdev_priv(netdev);
664 	mdev = priv->mdev;
665 	dev_put(netdev);
666 
667 	/* Mirred tc action holds a refcount on the ifindex net_device (see
668 	 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
669 	 * after dev_put(netdev), while we're in the context of adding a tc flow.
670 	 *
671 	 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
672 	 * stored in a hairpin object, which exists until all flows, that refer to it, get
673 	 * removed.
674 	 *
675 	 * On the other hand, after a hairpin object has been created, the peer net_device may
676 	 * be removed/unbound while there are still some hairpin flows that are using it. This
677 	 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
678 	 * NETDEV_UNREGISTER event of the peer net_device.
679 	 */
680 	return mdev;
681 }
682 
mlx5e_hairpin_create_transport(struct mlx5e_hairpin * hp)683 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
684 {
685 	struct mlx5e_tir_builder *builder;
686 	int err;
687 
688 	builder = mlx5e_tir_builder_alloc(false);
689 	if (!builder)
690 		return -ENOMEM;
691 
692 	err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
693 	if (err)
694 		goto out;
695 
696 	mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
697 	err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
698 	if (err)
699 		goto create_tir_err;
700 
701 out:
702 	mlx5e_tir_builder_free(builder);
703 	return err;
704 
705 create_tir_err:
706 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
707 
708 	goto out;
709 }
710 
mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin * hp)711 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
712 {
713 	mlx5e_tir_destroy(&hp->direct_tir);
714 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
715 }
716 
mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin * hp)717 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
718 {
719 	struct mlx5e_priv *priv = hp->func_priv;
720 	struct mlx5_core_dev *mdev = priv->mdev;
721 	struct mlx5e_rss_params_indir *indir;
722 	int err;
723 
724 	indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
725 	if (!indir)
726 		return -ENOMEM;
727 
728 	mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
729 	err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
730 				   mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
731 				   indir);
732 
733 	kvfree(indir);
734 	return err;
735 }
736 
mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin * hp)737 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
738 {
739 	struct mlx5e_priv *priv = hp->func_priv;
740 	struct mlx5e_rss_params_hash rss_hash;
741 	enum mlx5_traffic_types tt, max_tt;
742 	struct mlx5e_tir_builder *builder;
743 	int err = 0;
744 
745 	builder = mlx5e_tir_builder_alloc(false);
746 	if (!builder)
747 		return -ENOMEM;
748 
749 	rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
750 
751 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
752 		struct mlx5e_rss_params_traffic_type rss_tt;
753 
754 		rss_tt = mlx5e_rss_get_default_tt_config(tt);
755 
756 		mlx5e_tir_builder_build_rqt(builder, hp->tdn,
757 					    mlx5e_rqt_get_rqtn(&hp->indir_rqt),
758 					    false);
759 		mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
760 
761 		err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
762 		if (err) {
763 			mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
764 			goto err_destroy_tirs;
765 		}
766 
767 		mlx5e_tir_builder_clear(builder);
768 	}
769 
770 out:
771 	mlx5e_tir_builder_free(builder);
772 	return err;
773 
774 err_destroy_tirs:
775 	max_tt = tt;
776 	for (tt = 0; tt < max_tt; tt++)
777 		mlx5e_tir_destroy(&hp->indir_tir[tt]);
778 
779 	goto out;
780 }
781 
mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin * hp)782 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
783 {
784 	int tt;
785 
786 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
787 		mlx5e_tir_destroy(&hp->indir_tir[tt]);
788 }
789 
mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin * hp,struct ttc_params * ttc_params)790 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
791 					 struct ttc_params *ttc_params)
792 {
793 	struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
794 	int tt;
795 
796 	memset(ttc_params, 0, sizeof(*ttc_params));
797 
798 	ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
799 						 MLX5_FLOW_NAMESPACE_KERNEL);
800 	for (tt = 0; tt < MLX5_NUM_TT; tt++) {
801 		ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
802 		ttc_params->dests[tt].tir_num =
803 			tt == MLX5_TT_ANY ?
804 				mlx5e_tir_get_tirn(&hp->direct_tir) :
805 				mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
806 	}
807 
808 	ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
809 	ft_attr->prio = MLX5E_TC_PRIO;
810 }
811 
mlx5e_hairpin_rss_init(struct mlx5e_hairpin * hp)812 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
813 {
814 	struct mlx5e_priv *priv = hp->func_priv;
815 	struct ttc_params ttc_params;
816 	struct mlx5_ttc_table *ttc;
817 	int err;
818 
819 	err = mlx5e_hairpin_create_indirect_rqt(hp);
820 	if (err)
821 		return err;
822 
823 	err = mlx5e_hairpin_create_indirect_tirs(hp);
824 	if (err)
825 		goto err_create_indirect_tirs;
826 
827 	mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
828 	hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
829 	if (IS_ERR(hp->ttc)) {
830 		err = PTR_ERR(hp->ttc);
831 		goto err_create_ttc_table;
832 	}
833 
834 	ttc = mlx5e_fs_get_ttc(priv->fs, false);
835 	netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
836 		   hp->num_channels,
837 		   mlx5_get_ttc_flow_table(ttc)->id);
838 
839 	return 0;
840 
841 err_create_ttc_table:
842 	mlx5e_hairpin_destroy_indirect_tirs(hp);
843 err_create_indirect_tirs:
844 	mlx5e_rqt_destroy(&hp->indir_rqt);
845 
846 	return err;
847 }
848 
mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin * hp)849 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
850 {
851 	mlx5_destroy_ttc_table(hp->ttc);
852 	mlx5e_hairpin_destroy_indirect_tirs(hp);
853 	mlx5e_rqt_destroy(&hp->indir_rqt);
854 }
855 
856 static struct mlx5e_hairpin *
mlx5e_hairpin_create(struct mlx5e_priv * priv,struct mlx5_hairpin_params * params,int peer_ifindex)857 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
858 		     int peer_ifindex)
859 {
860 	struct mlx5_core_dev *func_mdev, *peer_mdev;
861 	struct mlx5e_hairpin *hp;
862 	struct mlx5_hairpin *pair;
863 	int err;
864 
865 	hp = kzalloc(sizeof(*hp), GFP_KERNEL);
866 	if (!hp)
867 		return ERR_PTR(-ENOMEM);
868 
869 	func_mdev = priv->mdev;
870 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
871 	if (IS_ERR(peer_mdev)) {
872 		err = PTR_ERR(peer_mdev);
873 		goto create_pair_err;
874 	}
875 
876 	pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
877 	if (IS_ERR(pair)) {
878 		err = PTR_ERR(pair);
879 		goto create_pair_err;
880 	}
881 	hp->pair = pair;
882 	hp->func_mdev = func_mdev;
883 	hp->func_priv = priv;
884 	hp->num_channels = params->num_channels;
885 
886 	err = mlx5e_hairpin_create_transport(hp);
887 	if (err)
888 		goto create_transport_err;
889 
890 	if (hp->num_channels > 1) {
891 		err = mlx5e_hairpin_rss_init(hp);
892 		if (err)
893 			goto rss_init_err;
894 	}
895 
896 	return hp;
897 
898 rss_init_err:
899 	mlx5e_hairpin_destroy_transport(hp);
900 create_transport_err:
901 	mlx5_core_hairpin_destroy(hp->pair);
902 create_pair_err:
903 	kfree(hp);
904 	return ERR_PTR(err);
905 }
906 
mlx5e_hairpin_destroy(struct mlx5e_hairpin * hp)907 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
908 {
909 	if (hp->num_channels > 1)
910 		mlx5e_hairpin_rss_cleanup(hp);
911 	mlx5e_hairpin_destroy_transport(hp);
912 	mlx5_core_hairpin_destroy(hp->pair);
913 	kvfree(hp);
914 }
915 
hash_hairpin_info(u16 peer_vhca_id,u8 prio)916 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
917 {
918 	return (peer_vhca_id << 16 | prio);
919 }
920 
mlx5e_hairpin_get(struct mlx5e_priv * priv,u16 peer_vhca_id,u8 prio)921 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
922 						     u16 peer_vhca_id, u8 prio)
923 {
924 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
925 	struct mlx5e_hairpin_entry *hpe;
926 	u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
927 
928 	hash_for_each_possible(tc->hairpin_tbl, hpe,
929 			       hairpin_hlist, hash_key) {
930 		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
931 			refcount_inc(&hpe->refcnt);
932 			return hpe;
933 		}
934 	}
935 
936 	return NULL;
937 }
938 
mlx5e_hairpin_put(struct mlx5e_priv * priv,struct mlx5e_hairpin_entry * hpe)939 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
940 			      struct mlx5e_hairpin_entry *hpe)
941 {
942 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
943 	/* no more hairpin flows for us, release the hairpin pair */
944 	if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
945 		return;
946 	hash_del(&hpe->hairpin_hlist);
947 	mutex_unlock(&tc->hairpin_tbl_lock);
948 
949 	if (!IS_ERR_OR_NULL(hpe->hp)) {
950 		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
951 			   dev_name(hpe->hp->pair->peer_mdev->device));
952 
953 		mlx5e_hairpin_destroy(hpe->hp);
954 	}
955 
956 	WARN_ON(!list_empty(&hpe->flows));
957 	kfree(hpe);
958 }
959 
960 #define UNKNOWN_MATCH_PRIO 8
961 
mlx5e_hairpin_get_prio(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,u8 * match_prio,struct netlink_ext_ack * extack)962 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
963 				  struct mlx5_flow_spec *spec, u8 *match_prio,
964 				  struct netlink_ext_ack *extack)
965 {
966 	void *headers_c, *headers_v;
967 	u8 prio_val, prio_mask = 0;
968 	bool vlan_present;
969 
970 #ifdef CONFIG_MLX5_CORE_EN_DCB
971 	if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
972 		NL_SET_ERR_MSG_MOD(extack,
973 				   "only PCP trust state supported for hairpin");
974 		return -EOPNOTSUPP;
975 	}
976 #endif
977 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
978 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
979 
980 	vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
981 	if (vlan_present) {
982 		prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
983 		prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
984 	}
985 
986 	if (!vlan_present || !prio_mask) {
987 		prio_val = UNKNOWN_MATCH_PRIO;
988 	} else if (prio_mask != 0x7) {
989 		NL_SET_ERR_MSG_MOD(extack,
990 				   "masked priority match not supported for hairpin");
991 		return -EOPNOTSUPP;
992 	}
993 
994 	*match_prio = prio_val;
995 	return 0;
996 }
997 
mlx5e_hairpin_flow_add(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)998 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
999 				  struct mlx5e_tc_flow *flow,
1000 				  struct mlx5e_tc_flow_parse_attr *parse_attr,
1001 				  struct netlink_ext_ack *extack)
1002 {
1003 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1004 	int peer_ifindex = parse_attr->mirred_ifindex[0];
1005 	struct mlx5_hairpin_params params;
1006 	struct mlx5_core_dev *peer_mdev;
1007 	struct mlx5e_hairpin_entry *hpe;
1008 	struct mlx5e_hairpin *hp;
1009 	u64 link_speed64;
1010 	u32 link_speed;
1011 	u8 match_prio;
1012 	u16 peer_id;
1013 	int err;
1014 
1015 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
1016 	if (IS_ERR(peer_mdev)) {
1017 		NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
1018 		return PTR_ERR(peer_mdev);
1019 	}
1020 
1021 	if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
1022 		NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
1023 		return -EOPNOTSUPP;
1024 	}
1025 
1026 	peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
1027 	err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
1028 				     extack);
1029 	if (err)
1030 		return err;
1031 
1032 	mutex_lock(&tc->hairpin_tbl_lock);
1033 	hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
1034 	if (hpe) {
1035 		mutex_unlock(&tc->hairpin_tbl_lock);
1036 		wait_for_completion(&hpe->res_ready);
1037 
1038 		if (IS_ERR(hpe->hp)) {
1039 			err = -EREMOTEIO;
1040 			goto out_err;
1041 		}
1042 		goto attach_flow;
1043 	}
1044 
1045 	hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
1046 	if (!hpe) {
1047 		mutex_unlock(&tc->hairpin_tbl_lock);
1048 		return -ENOMEM;
1049 	}
1050 
1051 	spin_lock_init(&hpe->flows_lock);
1052 	INIT_LIST_HEAD(&hpe->flows);
1053 	INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
1054 	hpe->peer_vhca_id = peer_id;
1055 	hpe->prio = match_prio;
1056 	refcount_set(&hpe->refcnt, 1);
1057 	init_completion(&hpe->res_ready);
1058 
1059 	hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
1060 		 hash_hairpin_info(peer_id, match_prio));
1061 	mutex_unlock(&tc->hairpin_tbl_lock);
1062 
1063 	params.log_data_size = 16;
1064 	params.log_data_size = min_t(u8, params.log_data_size,
1065 				     MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
1066 	params.log_data_size = max_t(u8, params.log_data_size,
1067 				     MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
1068 
1069 	params.log_num_packets = params.log_data_size -
1070 				 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
1071 	params.log_num_packets = min_t(u8, params.log_num_packets,
1072 				       MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
1073 
1074 	params.q_counter = priv->q_counter;
1075 	/* set hairpin pair per each 50Gbs share of the link */
1076 	mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
1077 	link_speed = max_t(u32, link_speed, 50000);
1078 	link_speed64 = link_speed;
1079 	do_div(link_speed64, 50000);
1080 	params.num_channels = link_speed64;
1081 
1082 	hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
1083 	hpe->hp = hp;
1084 	complete_all(&hpe->res_ready);
1085 	if (IS_ERR(hp)) {
1086 		err = PTR_ERR(hp);
1087 		goto out_err;
1088 	}
1089 
1090 	netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1091 		   mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1092 		   dev_name(hp->pair->peer_mdev->device),
1093 		   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1094 
1095 attach_flow:
1096 	if (hpe->hp->num_channels > 1) {
1097 		flow_flag_set(flow, HAIRPIN_RSS);
1098 		flow->attr->nic_attr->hairpin_ft =
1099 			mlx5_get_ttc_flow_table(hpe->hp->ttc);
1100 	} else {
1101 		flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1102 	}
1103 
1104 	flow->hpe = hpe;
1105 	spin_lock(&hpe->flows_lock);
1106 	list_add(&flow->hairpin, &hpe->flows);
1107 	spin_unlock(&hpe->flows_lock);
1108 
1109 	return 0;
1110 
1111 out_err:
1112 	mlx5e_hairpin_put(priv, hpe);
1113 	return err;
1114 }
1115 
mlx5e_hairpin_flow_del(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1116 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1117 				   struct mlx5e_tc_flow *flow)
1118 {
1119 	/* flow wasn't fully initialized */
1120 	if (!flow->hpe)
1121 		return;
1122 
1123 	spin_lock(&flow->hpe->flows_lock);
1124 	list_del(&flow->hairpin);
1125 	spin_unlock(&flow->hpe->flows_lock);
1126 
1127 	mlx5e_hairpin_put(priv, flow->hpe);
1128 	flow->hpe = NULL;
1129 }
1130 
1131 struct mlx5_flow_handle *
mlx5e_add_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1132 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1133 			     struct mlx5_flow_spec *spec,
1134 			     struct mlx5_flow_attr *attr)
1135 {
1136 	struct mlx5_flow_context *flow_context = &spec->flow_context;
1137 	struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
1138 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1139 	struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1140 	struct mlx5_flow_destination dest[2] = {};
1141 	struct mlx5_fs_chains *nic_chains;
1142 	struct mlx5_flow_act flow_act = {
1143 		.action = attr->action,
1144 		.flags    = FLOW_ACT_NO_APPEND,
1145 	};
1146 	struct mlx5_flow_handle *rule;
1147 	struct mlx5_flow_table *ft;
1148 	int dest_ix = 0;
1149 
1150 	nic_chains = mlx5e_nic_chains(tc);
1151 	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1152 	flow_context->flow_tag = nic_attr->flow_tag;
1153 
1154 	if (attr->dest_ft) {
1155 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1156 		dest[dest_ix].ft = attr->dest_ft;
1157 		dest_ix++;
1158 	} else if (nic_attr->hairpin_ft) {
1159 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1160 		dest[dest_ix].ft = nic_attr->hairpin_ft;
1161 		dest_ix++;
1162 	} else if (nic_attr->hairpin_tirn) {
1163 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1164 		dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1165 		dest_ix++;
1166 	} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1167 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1168 		if (attr->dest_chain) {
1169 			dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1170 								 attr->dest_chain, 1,
1171 								 MLX5E_TC_FT_LEVEL);
1172 			if (IS_ERR(dest[dest_ix].ft))
1173 				return ERR_CAST(dest[dest_ix].ft);
1174 		} else {
1175 			dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
1176 		}
1177 		dest_ix++;
1178 	}
1179 
1180 	if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1181 	    MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1182 		flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1183 
1184 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1185 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1186 		dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1187 		dest_ix++;
1188 	}
1189 
1190 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1191 		flow_act.modify_hdr = attr->modify_hdr;
1192 
1193 	mutex_lock(&tc->t_lock);
1194 	if (IS_ERR_OR_NULL(tc->t)) {
1195 		/* Create the root table here if doesn't exist yet */
1196 		tc->t =
1197 			mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1198 
1199 		if (IS_ERR(tc->t)) {
1200 			mutex_unlock(&tc->t_lock);
1201 			netdev_err(priv->netdev,
1202 				   "Failed to create tc offload table\n");
1203 			rule = ERR_CAST(tc->t);
1204 			goto err_ft_get;
1205 		}
1206 	}
1207 	mutex_unlock(&tc->t_lock);
1208 
1209 	if (attr->chain || attr->prio)
1210 		ft = mlx5_chains_get_table(nic_chains,
1211 					   attr->chain, attr->prio,
1212 					   MLX5E_TC_FT_LEVEL);
1213 	else
1214 		ft = attr->ft;
1215 
1216 	if (IS_ERR(ft)) {
1217 		rule = ERR_CAST(ft);
1218 		goto err_ft_get;
1219 	}
1220 
1221 	if (attr->outer_match_level != MLX5_MATCH_NONE)
1222 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1223 
1224 	rule = mlx5_add_flow_rules(ft, spec,
1225 				   &flow_act, dest, dest_ix);
1226 	if (IS_ERR(rule))
1227 		goto err_rule;
1228 
1229 	return rule;
1230 
1231 err_rule:
1232 	if (attr->chain || attr->prio)
1233 		mlx5_chains_put_table(nic_chains,
1234 				      attr->chain, attr->prio,
1235 				      MLX5E_TC_FT_LEVEL);
1236 err_ft_get:
1237 	if (attr->dest_chain)
1238 		mlx5_chains_put_table(nic_chains,
1239 				      attr->dest_chain, 1,
1240 				      MLX5E_TC_FT_LEVEL);
1241 
1242 	return ERR_CAST(rule);
1243 }
1244 
1245 static int
alloc_flow_attr_counter(struct mlx5_core_dev * counter_dev,struct mlx5_flow_attr * attr)1246 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1247 			struct mlx5_flow_attr *attr)
1248 
1249 {
1250 	struct mlx5_fc *counter;
1251 
1252 	counter = mlx5_fc_create(counter_dev, true);
1253 	if (IS_ERR(counter))
1254 		return PTR_ERR(counter);
1255 
1256 	attr->counter = counter;
1257 	return 0;
1258 }
1259 
1260 static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1261 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1262 		      struct mlx5e_tc_flow *flow,
1263 		      struct netlink_ext_ack *extack)
1264 {
1265 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1266 	struct mlx5_flow_attr *attr = flow->attr;
1267 	struct mlx5_core_dev *dev = priv->mdev;
1268 	int err;
1269 
1270 	parse_attr = attr->parse_attr;
1271 
1272 	if (flow_flag_test(flow, HAIRPIN)) {
1273 		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1274 		if (err)
1275 			return err;
1276 	}
1277 
1278 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1279 		err = alloc_flow_attr_counter(dev, attr);
1280 		if (err)
1281 			return err;
1282 	}
1283 
1284 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1285 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1286 		mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
1287 		if (err)
1288 			return err;
1289 	}
1290 
1291 	if (attr->flags & MLX5_ATTR_FLAG_CT)
1292 		flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
1293 							attr, &parse_attr->mod_hdr_acts);
1294 	else
1295 		flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1296 							     attr);
1297 
1298 	return PTR_ERR_OR_ZERO(flow->rule[0]);
1299 }
1300 
mlx5e_del_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)1301 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1302 				  struct mlx5_flow_handle *rule,
1303 				  struct mlx5_flow_attr *attr)
1304 {
1305 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1306 	struct mlx5_fs_chains *nic_chains;
1307 
1308 	nic_chains = mlx5e_nic_chains(tc);
1309 	mlx5_del_flow_rules(rule);
1310 
1311 	if (attr->chain || attr->prio)
1312 		mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1313 				      MLX5E_TC_FT_LEVEL);
1314 
1315 	if (attr->dest_chain)
1316 		mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1317 				      MLX5E_TC_FT_LEVEL);
1318 }
1319 
mlx5e_tc_del_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1320 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1321 				  struct mlx5e_tc_flow *flow)
1322 {
1323 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1324 	struct mlx5_flow_attr *attr = flow->attr;
1325 
1326 	flow_flag_clear(flow, OFFLOADED);
1327 
1328 	if (attr->flags & MLX5_ATTR_FLAG_CT)
1329 		mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1330 	else if (!IS_ERR_OR_NULL(flow->rule[0]))
1331 		mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1332 
1333 	/* Remove root table if no rules are left to avoid
1334 	 * extra steering hops.
1335 	 */
1336 	mutex_lock(&tc->t_lock);
1337 	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1338 	    !IS_ERR_OR_NULL(tc->t)) {
1339 		mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
1340 		tc->t = NULL;
1341 	}
1342 	mutex_unlock(&tc->t_lock);
1343 
1344 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1345 		mlx5e_detach_mod_hdr(priv, flow);
1346 
1347 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1348 		mlx5_fc_destroy(priv->mdev, attr->counter);
1349 
1350 	if (flow_flag_test(flow, HAIRPIN))
1351 		mlx5e_hairpin_flow_del(priv, flow);
1352 
1353 	free_flow_post_acts(flow);
1354 
1355 	kvfree(attr->parse_attr);
1356 	kfree(flow->attr);
1357 }
1358 
1359 struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1360 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1361 			   struct mlx5e_tc_flow *flow,
1362 			   struct mlx5_flow_spec *spec,
1363 			   struct mlx5_flow_attr *attr)
1364 {
1365 	struct mlx5_flow_handle *rule;
1366 
1367 	if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1368 		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1369 
1370 	rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1371 
1372 	if (IS_ERR(rule))
1373 		return rule;
1374 
1375 	if (attr->esw_attr->split_count) {
1376 		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1377 		if (IS_ERR(flow->rule[1]))
1378 			goto err_rule1;
1379 	}
1380 
1381 	return rule;
1382 
1383 err_rule1:
1384 	mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1385 	return flow->rule[1];
1386 }
1387 
mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1388 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1389 				  struct mlx5e_tc_flow *flow,
1390 				  struct mlx5_flow_attr *attr)
1391 {
1392 	flow_flag_clear(flow, OFFLOADED);
1393 
1394 	if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1395 		return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1396 
1397 	if (attr->esw_attr->split_count)
1398 		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1399 
1400 	mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1401 }
1402 
1403 struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)1404 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1405 			      struct mlx5e_tc_flow *flow,
1406 			      struct mlx5_flow_spec *spec)
1407 {
1408 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
1409 	struct mlx5e_mod_hdr_handle *mh = NULL;
1410 	struct mlx5_flow_attr *slow_attr;
1411 	struct mlx5_flow_handle *rule;
1412 	bool fwd_and_modify_cap;
1413 	u32 chain_mapping = 0;
1414 	int err;
1415 
1416 	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1417 	if (!slow_attr)
1418 		return ERR_PTR(-ENOMEM);
1419 
1420 	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1421 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1422 	slow_attr->esw_attr->split_count = 0;
1423 	slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1424 
1425 	fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
1426 	if (!fwd_and_modify_cap)
1427 		goto skip_restore;
1428 
1429 	err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
1430 	if (err)
1431 		goto err_get_chain;
1432 
1433 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
1434 					CHAIN_TO_REG, chain_mapping);
1435 	if (err)
1436 		goto err_reg_set;
1437 
1438 	mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
1439 				  MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
1440 	if (IS_ERR(mh)) {
1441 		err = PTR_ERR(mh);
1442 		goto err_attach;
1443 	}
1444 
1445 	slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1446 	slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
1447 
1448 skip_restore:
1449 	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1450 	if (IS_ERR(rule)) {
1451 		err = PTR_ERR(rule);
1452 		goto err_offload;
1453 	}
1454 
1455 	flow->slow_mh = mh;
1456 	flow->chain_mapping = chain_mapping;
1457 	flow_flag_set(flow, SLOW);
1458 
1459 	mlx5e_mod_hdr_dealloc(&mod_acts);
1460 	kfree(slow_attr);
1461 
1462 	return rule;
1463 
1464 err_offload:
1465 	if (fwd_and_modify_cap)
1466 		mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
1467 err_attach:
1468 err_reg_set:
1469 	if (fwd_and_modify_cap)
1470 		mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
1471 err_get_chain:
1472 	mlx5e_mod_hdr_dealloc(&mod_acts);
1473 	kfree(slow_attr);
1474 	return ERR_PTR(err);
1475 }
1476 
mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow)1477 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1478 				       struct mlx5e_tc_flow *flow)
1479 {
1480 	struct mlx5_flow_attr *slow_attr;
1481 
1482 	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1483 	if (!slow_attr) {
1484 		mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1485 		return;
1486 	}
1487 
1488 	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1489 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1490 	slow_attr->esw_attr->split_count = 0;
1491 	slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1492 	if (flow->slow_mh) {
1493 		slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1494 		slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh);
1495 	}
1496 	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1497 	if (flow->slow_mh) {
1498 		mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh);
1499 		mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
1500 		flow->chain_mapping = 0;
1501 		flow->slow_mh = NULL;
1502 	}
1503 	flow_flag_clear(flow, SLOW);
1504 	kfree(slow_attr);
1505 }
1506 
1507 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1508  * function.
1509  */
unready_flow_add(struct mlx5e_tc_flow * flow,struct list_head * unready_flows)1510 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1511 			     struct list_head *unready_flows)
1512 {
1513 	flow_flag_set(flow, NOT_READY);
1514 	list_add_tail(&flow->unready, unready_flows);
1515 }
1516 
1517 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1518  * function.
1519  */
unready_flow_del(struct mlx5e_tc_flow * flow)1520 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1521 {
1522 	list_del(&flow->unready);
1523 	flow_flag_clear(flow, NOT_READY);
1524 }
1525 
add_unready_flow(struct mlx5e_tc_flow * flow)1526 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1527 {
1528 	struct mlx5_rep_uplink_priv *uplink_priv;
1529 	struct mlx5e_rep_priv *rpriv;
1530 	struct mlx5_eswitch *esw;
1531 
1532 	esw = flow->priv->mdev->priv.eswitch;
1533 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1534 	uplink_priv = &rpriv->uplink_priv;
1535 
1536 	mutex_lock(&uplink_priv->unready_flows_lock);
1537 	unready_flow_add(flow, &uplink_priv->unready_flows);
1538 	mutex_unlock(&uplink_priv->unready_flows_lock);
1539 }
1540 
remove_unready_flow(struct mlx5e_tc_flow * flow)1541 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1542 {
1543 	struct mlx5_rep_uplink_priv *uplink_priv;
1544 	struct mlx5e_rep_priv *rpriv;
1545 	struct mlx5_eswitch *esw;
1546 
1547 	esw = flow->priv->mdev->priv.eswitch;
1548 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1549 	uplink_priv = &rpriv->uplink_priv;
1550 
1551 	mutex_lock(&uplink_priv->unready_flows_lock);
1552 	unready_flow_del(flow);
1553 	mutex_unlock(&uplink_priv->unready_flows_lock);
1554 }
1555 
mlx5e_tc_is_vf_tunnel(struct net_device * out_dev,struct net_device * route_dev)1556 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1557 {
1558 	struct mlx5_core_dev *out_mdev, *route_mdev;
1559 	struct mlx5e_priv *out_priv, *route_priv;
1560 
1561 	out_priv = netdev_priv(out_dev);
1562 	out_mdev = out_priv->mdev;
1563 	route_priv = netdev_priv(route_dev);
1564 	route_mdev = route_priv->mdev;
1565 
1566 	if (out_mdev->coredev_type != MLX5_COREDEV_PF)
1567 		return false;
1568 
1569 	if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
1570 	    route_mdev->coredev_type != MLX5_COREDEV_SF)
1571 		return false;
1572 
1573 	return mlx5e_same_hw_devs(out_priv, route_priv);
1574 }
1575 
mlx5e_tc_query_route_vport(struct net_device * out_dev,struct net_device * route_dev,u16 * vport)1576 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1577 {
1578 	struct mlx5e_priv *out_priv, *route_priv;
1579 	struct mlx5_devcom *devcom = NULL;
1580 	struct mlx5_core_dev *route_mdev;
1581 	struct mlx5_eswitch *esw;
1582 	u16 vhca_id;
1583 	int err;
1584 
1585 	out_priv = netdev_priv(out_dev);
1586 	esw = out_priv->mdev->priv.eswitch;
1587 	route_priv = netdev_priv(route_dev);
1588 	route_mdev = route_priv->mdev;
1589 
1590 	vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1591 	if (mlx5_lag_is_active(out_priv->mdev)) {
1592 		/* In lag case we may get devices from different eswitch instances.
1593 		 * If we failed to get vport num, it means, mostly, that we on the wrong
1594 		 * eswitch.
1595 		 */
1596 		err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1597 		if (err != -ENOENT)
1598 			return err;
1599 
1600 		devcom = out_priv->mdev->priv.devcom;
1601 		esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1602 		if (!esw)
1603 			return -ENODEV;
1604 	}
1605 
1606 	err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1607 	if (devcom)
1608 		mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1609 	return err;
1610 }
1611 
mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1612 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
1613 			      struct mlx5e_tc_flow *flow,
1614 			      struct mlx5_flow_attr *attr)
1615 {
1616 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1617 	struct mlx5_modify_hdr *mod_hdr;
1618 
1619 	mod_hdr = mlx5_modify_header_alloc(priv->mdev,
1620 					   mlx5e_get_flow_namespace(flow),
1621 					   mod_hdr_acts->num_actions,
1622 					   mod_hdr_acts->actions);
1623 	if (IS_ERR(mod_hdr))
1624 		return PTR_ERR(mod_hdr);
1625 
1626 	WARN_ON(attr->modify_hdr);
1627 	attr->modify_hdr = mod_hdr;
1628 
1629 	return 0;
1630 }
1631 
1632 static int
set_encap_dests(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack,bool * vf_tun)1633 set_encap_dests(struct mlx5e_priv *priv,
1634 		struct mlx5e_tc_flow *flow,
1635 		struct mlx5_flow_attr *attr,
1636 		struct netlink_ext_ack *extack,
1637 		bool *vf_tun)
1638 {
1639 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1640 	struct mlx5_esw_flow_attr *esw_attr;
1641 	struct net_device *encap_dev = NULL;
1642 	struct mlx5e_rep_priv *rpriv;
1643 	struct mlx5e_priv *out_priv;
1644 	int out_index;
1645 	int err = 0;
1646 
1647 	if (!mlx5e_is_eswitch_flow(flow))
1648 		return 0;
1649 
1650 	parse_attr = attr->parse_attr;
1651 	esw_attr = attr->esw_attr;
1652 	*vf_tun = false;
1653 
1654 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1655 		struct net_device *out_dev;
1656 		int mirred_ifindex;
1657 
1658 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1659 			continue;
1660 
1661 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1662 		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1663 		if (!out_dev) {
1664 			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1665 			err = -ENODEV;
1666 			goto out;
1667 		}
1668 		err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1669 					 extack, &encap_dev);
1670 		dev_put(out_dev);
1671 		if (err)
1672 			goto out;
1673 
1674 		if (esw_attr->dests[out_index].flags &
1675 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1676 		    !esw_attr->dest_int_port)
1677 			*vf_tun = true;
1678 
1679 		out_priv = netdev_priv(encap_dev);
1680 		rpriv = out_priv->ppriv;
1681 		esw_attr->dests[out_index].rep = rpriv->rep;
1682 		esw_attr->dests[out_index].mdev = out_priv->mdev;
1683 	}
1684 
1685 	if (*vf_tun && esw_attr->out_count > 1) {
1686 		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1687 		err = -EOPNOTSUPP;
1688 		goto out;
1689 	}
1690 
1691 out:
1692 	return err;
1693 }
1694 
1695 static void
clean_encap_dests(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,bool * vf_tun)1696 clean_encap_dests(struct mlx5e_priv *priv,
1697 		  struct mlx5e_tc_flow *flow,
1698 		  struct mlx5_flow_attr *attr,
1699 		  bool *vf_tun)
1700 {
1701 	struct mlx5_esw_flow_attr *esw_attr;
1702 	int out_index;
1703 
1704 	if (!mlx5e_is_eswitch_flow(flow))
1705 		return;
1706 
1707 	esw_attr = attr->esw_attr;
1708 	*vf_tun = false;
1709 
1710 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1711 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1712 			continue;
1713 
1714 		if (esw_attr->dests[out_index].flags &
1715 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1716 		    !esw_attr->dest_int_port)
1717 			*vf_tun = true;
1718 
1719 		mlx5e_detach_encap(priv, flow, attr, out_index);
1720 		kfree(attr->parse_attr->tun_info[out_index]);
1721 	}
1722 }
1723 
1724 static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1725 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1726 		      struct mlx5e_tc_flow *flow,
1727 		      struct netlink_ext_ack *extack)
1728 {
1729 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1730 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1731 	struct mlx5_flow_attr *attr = flow->attr;
1732 	struct mlx5_esw_flow_attr *esw_attr;
1733 	u32 max_prio, max_chain;
1734 	bool vf_tun;
1735 	int err = 0;
1736 
1737 	parse_attr = attr->parse_attr;
1738 	esw_attr = attr->esw_attr;
1739 
1740 	/* We check chain range only for tc flows.
1741 	 * For ft flows, we checked attr->chain was originally 0 and set it to
1742 	 * FDB_FT_CHAIN which is outside tc range.
1743 	 * See mlx5e_rep_setup_ft_cb().
1744 	 */
1745 	max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1746 	if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1747 		NL_SET_ERR_MSG_MOD(extack,
1748 				   "Requested chain is out of supported range");
1749 		err = -EOPNOTSUPP;
1750 		goto err_out;
1751 	}
1752 
1753 	max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1754 	if (attr->prio > max_prio) {
1755 		NL_SET_ERR_MSG_MOD(extack,
1756 				   "Requested priority is out of supported range");
1757 		err = -EOPNOTSUPP;
1758 		goto err_out;
1759 	}
1760 
1761 	if (flow_flag_test(flow, TUN_RX)) {
1762 		err = mlx5e_attach_decap_route(priv, flow);
1763 		if (err)
1764 			goto err_out;
1765 
1766 		if (!attr->chain && esw_attr->int_port &&
1767 		    attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1768 			/* If decap route device is internal port, change the
1769 			 * source vport value in reg_c0 back to uplink just in
1770 			 * case the rule performs goto chain > 0. If we have a miss
1771 			 * on chain > 0 we want the metadata regs to hold the
1772 			 * chain id so SW will resume handling of this packet
1773 			 * from the proper chain.
1774 			 */
1775 			u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1776 									esw_attr->in_rep->vport);
1777 
1778 			err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1779 							MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1780 							metadata);
1781 			if (err)
1782 				goto err_out;
1783 
1784 			attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1785 		}
1786 	}
1787 
1788 	if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1789 		err = mlx5e_attach_decap(priv, flow, extack);
1790 		if (err)
1791 			goto err_out;
1792 	}
1793 
1794 	if (netif_is_ovs_master(parse_attr->filter_dev)) {
1795 		struct mlx5e_tc_int_port *int_port;
1796 
1797 		if (attr->chain) {
1798 			NL_SET_ERR_MSG_MOD(extack,
1799 					   "Internal port rule is only supported on chain 0");
1800 			err = -EOPNOTSUPP;
1801 			goto err_out;
1802 		}
1803 
1804 		if (attr->dest_chain) {
1805 			NL_SET_ERR_MSG_MOD(extack,
1806 					   "Internal port rule offload doesn't support goto action");
1807 			err = -EOPNOTSUPP;
1808 			goto err_out;
1809 		}
1810 
1811 		int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1812 						 parse_attr->filter_dev->ifindex,
1813 						 flow_flag_test(flow, EGRESS) ?
1814 						 MLX5E_TC_INT_PORT_EGRESS :
1815 						 MLX5E_TC_INT_PORT_INGRESS);
1816 		if (IS_ERR(int_port)) {
1817 			err = PTR_ERR(int_port);
1818 			goto err_out;
1819 		}
1820 
1821 		esw_attr->int_port = int_port;
1822 	}
1823 
1824 	err = set_encap_dests(priv, flow, attr, extack, &vf_tun);
1825 	if (err)
1826 		goto err_out;
1827 
1828 	err = mlx5_eswitch_add_vlan_action(esw, attr);
1829 	if (err)
1830 		goto err_out;
1831 
1832 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1833 		if (vf_tun) {
1834 			err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1835 			if (err)
1836 				goto err_out;
1837 		} else {
1838 			err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1839 			if (err)
1840 				goto err_out;
1841 		}
1842 	}
1843 
1844 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1845 		err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
1846 		if (err)
1847 			goto err_out;
1848 	}
1849 
1850 	/* we get here if one of the following takes place:
1851 	 * (1) there's no error
1852 	 * (2) there's an encap action and we don't have valid neigh
1853 	 */
1854 	if (flow_flag_test(flow, SLOW))
1855 		flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1856 	else
1857 		flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1858 
1859 	if (IS_ERR(flow->rule[0])) {
1860 		err = PTR_ERR(flow->rule[0]);
1861 		goto err_out;
1862 	}
1863 	flow_flag_set(flow, OFFLOADED);
1864 
1865 	return 0;
1866 
1867 err_out:
1868 	flow_flag_set(flow, FAILED);
1869 	return err;
1870 }
1871 
mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow * flow)1872 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1873 {
1874 	struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1875 	void *headers_v = MLX5_ADDR_OF(fte_match_param,
1876 				       spec->match_value,
1877 				       misc_parameters_3);
1878 	u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1879 					     headers_v,
1880 					     geneve_tlv_option_0_data);
1881 
1882 	return !!geneve_tlv_opt_0_data;
1883 }
1884 
mlx5e_tc_del_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1885 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1886 				  struct mlx5e_tc_flow *flow)
1887 {
1888 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1889 	struct mlx5_flow_attr *attr = flow->attr;
1890 	struct mlx5_esw_flow_attr *esw_attr;
1891 	bool vf_tun;
1892 
1893 	esw_attr = attr->esw_attr;
1894 	mlx5e_put_flow_tunnel_id(flow);
1895 
1896 	if (flow_flag_test(flow, NOT_READY))
1897 		remove_unready_flow(flow);
1898 
1899 	if (mlx5e_is_offloaded_flow(flow)) {
1900 		if (flow_flag_test(flow, SLOW))
1901 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1902 		else
1903 			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1904 	}
1905 	complete_all(&flow->del_hw_done);
1906 
1907 	if (mlx5_flow_has_geneve_opt(flow))
1908 		mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1909 
1910 	mlx5_eswitch_del_vlan_action(esw, attr);
1911 
1912 	if (flow->decap_route)
1913 		mlx5e_detach_decap_route(priv, flow);
1914 
1915 	clean_encap_dests(priv, flow, attr, &vf_tun);
1916 
1917 	mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1918 
1919 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1920 		mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1921 		if (vf_tun && attr->modify_hdr)
1922 			mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1923 		else
1924 			mlx5e_detach_mod_hdr(priv, flow);
1925 	}
1926 
1927 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1928 		mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
1929 
1930 	if (esw_attr->int_port)
1931 		mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
1932 
1933 	if (esw_attr->dest_int_port)
1934 		mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
1935 
1936 	if (flow_flag_test(flow, L3_TO_L2_DECAP))
1937 		mlx5e_detach_decap(priv, flow);
1938 
1939 	free_flow_post_acts(flow);
1940 
1941 	if (flow->attr->lag.count)
1942 		mlx5_lag_del_mpesw_rule(esw->dev);
1943 
1944 	kvfree(attr->esw_attr->rx_tun_attr);
1945 	kvfree(attr->parse_attr);
1946 	kfree(flow->attr);
1947 }
1948 
mlx5e_tc_get_counter(struct mlx5e_tc_flow * flow)1949 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1950 {
1951 	struct mlx5_flow_attr *attr;
1952 
1953 	attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
1954 	return attr->counter;
1955 }
1956 
1957 /* Iterate over tmp_list of flows attached to flow_list head. */
mlx5e_put_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1958 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1959 {
1960 	struct mlx5e_tc_flow *flow, *tmp;
1961 
1962 	list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1963 		mlx5e_flow_put(priv, flow);
1964 }
1965 
__mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow * flow)1966 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1967 {
1968 	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1969 
1970 	if (!flow_flag_test(flow, ESWITCH) ||
1971 	    !flow_flag_test(flow, DUP))
1972 		return;
1973 
1974 	mutex_lock(&esw->offloads.peer_mutex);
1975 	list_del(&flow->peer);
1976 	mutex_unlock(&esw->offloads.peer_mutex);
1977 
1978 	flow_flag_clear(flow, DUP);
1979 
1980 	if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1981 		mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1982 		kfree(flow->peer_flow);
1983 	}
1984 
1985 	flow->peer_flow = NULL;
1986 }
1987 
mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow * flow)1988 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1989 {
1990 	struct mlx5_core_dev *dev = flow->priv->mdev;
1991 	struct mlx5_devcom *devcom = dev->priv.devcom;
1992 	struct mlx5_eswitch *peer_esw;
1993 
1994 	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1995 	if (!peer_esw)
1996 		return;
1997 
1998 	__mlx5e_tc_del_fdb_peer_flow(flow);
1999 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2000 }
2001 
mlx5e_tc_del_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)2002 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
2003 			      struct mlx5e_tc_flow *flow)
2004 {
2005 	if (mlx5e_is_eswitch_flow(flow)) {
2006 		mlx5e_tc_del_fdb_peer_flow(flow);
2007 		mlx5e_tc_del_fdb_flow(priv, flow);
2008 	} else {
2009 		mlx5e_tc_del_nic_flow(priv, flow);
2010 	}
2011 }
2012 
flow_requires_tunnel_mapping(u32 chain,struct flow_cls_offload * f)2013 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
2014 {
2015 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2016 	struct flow_action *flow_action = &rule->action;
2017 	const struct flow_action_entry *act;
2018 	int i;
2019 
2020 	if (chain)
2021 		return false;
2022 
2023 	flow_action_for_each(i, act, flow_action) {
2024 		switch (act->id) {
2025 		case FLOW_ACTION_GOTO:
2026 			return true;
2027 		case FLOW_ACTION_SAMPLE:
2028 			return true;
2029 		default:
2030 			continue;
2031 		}
2032 	}
2033 
2034 	return false;
2035 }
2036 
2037 static int
enc_opts_is_dont_care_or_full_match(struct mlx5e_priv * priv,struct flow_dissector_key_enc_opts * opts,struct netlink_ext_ack * extack,bool * dont_care)2038 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
2039 				    struct flow_dissector_key_enc_opts *opts,
2040 				    struct netlink_ext_ack *extack,
2041 				    bool *dont_care)
2042 {
2043 	struct geneve_opt *opt;
2044 	int off = 0;
2045 
2046 	*dont_care = true;
2047 
2048 	while (opts->len > off) {
2049 		opt = (struct geneve_opt *)&opts->data[off];
2050 
2051 		if (!(*dont_care) || opt->opt_class || opt->type ||
2052 		    memchr_inv(opt->opt_data, 0, opt->length * 4)) {
2053 			*dont_care = false;
2054 
2055 			if (opt->opt_class != htons(U16_MAX) ||
2056 			    opt->type != U8_MAX) {
2057 				NL_SET_ERR_MSG_MOD(extack,
2058 						   "Partial match of tunnel options in chain > 0 isn't supported");
2059 				netdev_warn(priv->netdev,
2060 					    "Partial match of tunnel options in chain > 0 isn't supported");
2061 				return -EOPNOTSUPP;
2062 			}
2063 		}
2064 
2065 		off += sizeof(struct geneve_opt) + opt->length * 4;
2066 	}
2067 
2068 	return 0;
2069 }
2070 
2071 #define COPY_DISSECTOR(rule, diss_key, dst)\
2072 ({ \
2073 	struct flow_rule *__rule = (rule);\
2074 	typeof(dst) __dst = dst;\
2075 \
2076 	memcpy(__dst,\
2077 	       skb_flow_dissector_target(__rule->match.dissector,\
2078 					 diss_key,\
2079 					 __rule->match.key),\
2080 	       sizeof(*__dst));\
2081 })
2082 
mlx5e_get_flow_tunnel_id(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct flow_cls_offload * f,struct net_device * filter_dev)2083 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
2084 				    struct mlx5e_tc_flow *flow,
2085 				    struct flow_cls_offload *f,
2086 				    struct net_device *filter_dev)
2087 {
2088 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2089 	struct netlink_ext_ack *extack = f->common.extack;
2090 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2091 	struct flow_match_enc_opts enc_opts_match;
2092 	struct tunnel_match_enc_opts tun_enc_opts;
2093 	struct mlx5_rep_uplink_priv *uplink_priv;
2094 	struct mlx5_flow_attr *attr = flow->attr;
2095 	struct mlx5e_rep_priv *uplink_rpriv;
2096 	struct tunnel_match_key tunnel_key;
2097 	bool enc_opts_is_dont_care = true;
2098 	u32 tun_id, enc_opts_id = 0;
2099 	struct mlx5_eswitch *esw;
2100 	u32 value, mask;
2101 	int err;
2102 
2103 	esw = priv->mdev->priv.eswitch;
2104 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2105 	uplink_priv = &uplink_rpriv->uplink_priv;
2106 
2107 	memset(&tunnel_key, 0, sizeof(tunnel_key));
2108 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2109 		       &tunnel_key.enc_control);
2110 	if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2111 		COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2112 			       &tunnel_key.enc_ipv4);
2113 	else
2114 		COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2115 			       &tunnel_key.enc_ipv6);
2116 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2117 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2118 		       &tunnel_key.enc_tp);
2119 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2120 		       &tunnel_key.enc_key_id);
2121 	tunnel_key.filter_ifindex = filter_dev->ifindex;
2122 
2123 	err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2124 	if (err)
2125 		return err;
2126 
2127 	flow_rule_match_enc_opts(rule, &enc_opts_match);
2128 	err = enc_opts_is_dont_care_or_full_match(priv,
2129 						  enc_opts_match.mask,
2130 						  extack,
2131 						  &enc_opts_is_dont_care);
2132 	if (err)
2133 		goto err_enc_opts;
2134 
2135 	if (!enc_opts_is_dont_care) {
2136 		memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2137 		memcpy(&tun_enc_opts.key, enc_opts_match.key,
2138 		       sizeof(*enc_opts_match.key));
2139 		memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2140 		       sizeof(*enc_opts_match.mask));
2141 
2142 		err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2143 				  &tun_enc_opts, &enc_opts_id);
2144 		if (err)
2145 			goto err_enc_opts;
2146 	}
2147 
2148 	value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2149 	mask = enc_opts_id ? TUNNEL_ID_MASK :
2150 			     (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2151 
2152 	if (attr->chain) {
2153 		mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2154 					    TUNNEL_TO_REG, value, mask);
2155 	} else {
2156 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2157 		err = mlx5e_tc_match_to_reg_set(priv->mdev,
2158 						mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2159 						TUNNEL_TO_REG, value);
2160 		if (err)
2161 			goto err_set;
2162 
2163 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2164 	}
2165 
2166 	flow->attr->tunnel_id = value;
2167 	return 0;
2168 
2169 err_set:
2170 	if (enc_opts_id)
2171 		mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2172 			       enc_opts_id);
2173 err_enc_opts:
2174 	mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2175 	return err;
2176 }
2177 
mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow * flow)2178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2179 {
2180 	u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2181 	u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2182 	struct mlx5_rep_uplink_priv *uplink_priv;
2183 	struct mlx5e_rep_priv *uplink_rpriv;
2184 	struct mlx5_eswitch *esw;
2185 
2186 	esw = flow->priv->mdev->priv.eswitch;
2187 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2188 	uplink_priv = &uplink_rpriv->uplink_priv;
2189 
2190 	if (tun_id)
2191 		mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2192 	if (enc_opts_id)
2193 		mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2194 			       enc_opts_id);
2195 }
2196 
mlx5e_tc_set_ethertype(struct mlx5_core_dev * mdev,struct flow_match_basic * match,bool outer,void * headers_c,void * headers_v)2197 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2198 			    struct flow_match_basic *match, bool outer,
2199 			    void *headers_c, void *headers_v)
2200 {
2201 	bool ip_version_cap;
2202 
2203 	ip_version_cap = outer ?
2204 		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2205 					  ft_field_support.outer_ip_version) :
2206 		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2207 					  ft_field_support.inner_ip_version);
2208 
2209 	if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2210 	    (match->key->n_proto == htons(ETH_P_IP) ||
2211 	     match->key->n_proto == htons(ETH_P_IPV6))) {
2212 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2213 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2214 			 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2215 	} else {
2216 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2217 			 ntohs(match->mask->n_proto));
2218 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2219 			 ntohs(match->key->n_proto));
2220 	}
2221 }
2222 
mlx5e_tc_get_ip_version(struct mlx5_flow_spec * spec,bool outer)2223 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2224 {
2225 	void *headers_v;
2226 	u16 ethertype;
2227 	u8 ip_version;
2228 
2229 	if (outer)
2230 		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2231 	else
2232 		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2233 
2234 	ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2235 	/* Return ip_version converted from ethertype anyway */
2236 	if (!ip_version) {
2237 		ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2238 		if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2239 			ip_version = 4;
2240 		else if (ethertype == ETH_P_IPV6)
2241 			ip_version = 6;
2242 	}
2243 	return ip_version;
2244 }
2245 
2246 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2247  * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2248  *      +---------+----------------------------------------+
2249  *      |Arriving |         Arriving Outer Header          |
2250  *      |   Inner +---------+---------+---------+----------+
2251  *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
2252  *      +---------+---------+---------+---------+----------+
2253  *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
2254  *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
2255  *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
2256  *      |    CE   |   CE    |  CE     | CE      |   CE     |
2257  *      +---------+---------+---------+---------+----------+
2258  *
2259  * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2260  * the inner ip_ecn value before hardware decap action.
2261  *
2262  * Cells marked are changed from original inner packet ip_ecn value during decap, and
2263  * so matching those values on inner ip_ecn before decap will fail.
2264  *
2265  * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2266  * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2267  * and such we can drop the inner ip_ecn=CE match.
2268  */
2269 
mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv * priv,struct flow_cls_offload * f,bool * match_inner_ecn)2270 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2271 				      struct flow_cls_offload *f,
2272 				      bool *match_inner_ecn)
2273 {
2274 	u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2275 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2276 	struct netlink_ext_ack *extack = f->common.extack;
2277 	struct flow_match_ip match;
2278 
2279 	*match_inner_ecn = true;
2280 
2281 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2282 		flow_rule_match_enc_ip(rule, &match);
2283 		outer_ecn_key = match.key->tos & INET_ECN_MASK;
2284 		outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2285 	}
2286 
2287 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2288 		flow_rule_match_ip(rule, &match);
2289 		inner_ecn_key = match.key->tos & INET_ECN_MASK;
2290 		inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2291 	}
2292 
2293 	if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2294 		NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2295 		netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2296 		return -EOPNOTSUPP;
2297 	}
2298 
2299 	if (!outer_ecn_mask) {
2300 		if (!inner_ecn_mask)
2301 			return 0;
2302 
2303 		NL_SET_ERR_MSG_MOD(extack,
2304 				   "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2305 		netdev_warn(priv->netdev,
2306 			    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2307 		return -EOPNOTSUPP;
2308 	}
2309 
2310 	if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2311 		NL_SET_ERR_MSG_MOD(extack,
2312 				   "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2313 		netdev_warn(priv->netdev,
2314 			    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2315 		return -EOPNOTSUPP;
2316 	}
2317 
2318 	if (!inner_ecn_mask)
2319 		return 0;
2320 
2321 	/* Both inner and outer have full mask on ecn */
2322 
2323 	if (outer_ecn_key == INET_ECN_ECT_1) {
2324 		/* inner ecn might change by DECAP action */
2325 
2326 		NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2327 		netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2328 		return -EOPNOTSUPP;
2329 	}
2330 
2331 	if (outer_ecn_key != INET_ECN_CE)
2332 		return 0;
2333 
2334 	if (inner_ecn_key != INET_ECN_CE) {
2335 		/* Can't happen in software, as packet ecn will be changed to CE after decap */
2336 		NL_SET_ERR_MSG_MOD(extack,
2337 				   "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2338 		netdev_warn(priv->netdev,
2339 			    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2340 		return -EOPNOTSUPP;
2341 	}
2342 
2343 	/* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2344 	 * drop match on inner ecn
2345 	 */
2346 	*match_inner_ecn = false;
2347 
2348 	return 0;
2349 }
2350 
parse_tunnel_attr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * match_level,bool * match_inner)2351 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2352 			     struct mlx5e_tc_flow *flow,
2353 			     struct mlx5_flow_spec *spec,
2354 			     struct flow_cls_offload *f,
2355 			     struct net_device *filter_dev,
2356 			     u8 *match_level,
2357 			     bool *match_inner)
2358 {
2359 	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2360 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2361 	struct netlink_ext_ack *extack = f->common.extack;
2362 	bool needs_mapping, sets_mapping;
2363 	int err;
2364 
2365 	if (!mlx5e_is_eswitch_flow(flow)) {
2366 		NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2367 		return -EOPNOTSUPP;
2368 	}
2369 
2370 	needs_mapping = !!flow->attr->chain;
2371 	sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2372 	*match_inner = !needs_mapping;
2373 
2374 	if ((needs_mapping || sets_mapping) &&
2375 	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2376 		NL_SET_ERR_MSG_MOD(extack,
2377 				   "Chains on tunnel devices isn't supported without register loopback support");
2378 		netdev_warn(priv->netdev,
2379 			    "Chains on tunnel devices isn't supported without register loopback support");
2380 		return -EOPNOTSUPP;
2381 	}
2382 
2383 	if (!flow->attr->chain) {
2384 		err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2385 					 match_level);
2386 		if (err) {
2387 			NL_SET_ERR_MSG_MOD(extack,
2388 					   "Failed to parse tunnel attributes");
2389 			netdev_warn(priv->netdev,
2390 				    "Failed to parse tunnel attributes");
2391 			return err;
2392 		}
2393 
2394 		/* With mpls over udp we decapsulate using packet reformat
2395 		 * object
2396 		 */
2397 		if (!netif_is_bareudp(filter_dev))
2398 			flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2399 		err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2400 		if (err)
2401 			return err;
2402 	} else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
2403 		struct mlx5_flow_spec *tmp_spec;
2404 
2405 		tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2406 		if (!tmp_spec) {
2407 			NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
2408 			netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
2409 			return -ENOMEM;
2410 		}
2411 		memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2412 
2413 		err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2414 		if (err) {
2415 			kvfree(tmp_spec);
2416 			NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2417 			netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2418 			return err;
2419 		}
2420 		err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2421 		kvfree(tmp_spec);
2422 		if (err)
2423 			return err;
2424 	}
2425 
2426 	if (!needs_mapping && !sets_mapping)
2427 		return 0;
2428 
2429 	return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2430 }
2431 
get_match_inner_headers_criteria(struct mlx5_flow_spec * spec)2432 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2433 {
2434 	return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2435 			    inner_headers);
2436 }
2437 
get_match_inner_headers_value(struct mlx5_flow_spec * spec)2438 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2439 {
2440 	return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2441 			    inner_headers);
2442 }
2443 
get_match_outer_headers_criteria(struct mlx5_flow_spec * spec)2444 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2445 {
2446 	return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2447 			    outer_headers);
2448 }
2449 
get_match_outer_headers_value(struct mlx5_flow_spec * spec)2450 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2451 {
2452 	return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2453 			    outer_headers);
2454 }
2455 
mlx5e_get_match_headers_value(u32 flags,struct mlx5_flow_spec * spec)2456 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2457 {
2458 	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2459 		get_match_inner_headers_value(spec) :
2460 		get_match_outer_headers_value(spec);
2461 }
2462 
mlx5e_get_match_headers_criteria(u32 flags,struct mlx5_flow_spec * spec)2463 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2464 {
2465 	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2466 		get_match_inner_headers_criteria(spec) :
2467 		get_match_outer_headers_criteria(spec);
2468 }
2469 
mlx5e_flower_parse_meta(struct net_device * filter_dev,struct flow_cls_offload * f)2470 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2471 				   struct flow_cls_offload *f)
2472 {
2473 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2474 	struct netlink_ext_ack *extack = f->common.extack;
2475 	struct net_device *ingress_dev;
2476 	struct flow_match_meta match;
2477 
2478 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2479 		return 0;
2480 
2481 	flow_rule_match_meta(rule, &match);
2482 	if (!match.mask->ingress_ifindex)
2483 		return 0;
2484 
2485 	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2486 		NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2487 		return -EOPNOTSUPP;
2488 	}
2489 
2490 	ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2491 					 match.key->ingress_ifindex);
2492 	if (!ingress_dev) {
2493 		NL_SET_ERR_MSG_MOD(extack,
2494 				   "Can't find the ingress port to match on");
2495 		return -ENOENT;
2496 	}
2497 
2498 	if (ingress_dev != filter_dev) {
2499 		NL_SET_ERR_MSG_MOD(extack,
2500 				   "Can't match on the ingress filter port");
2501 		return -EOPNOTSUPP;
2502 	}
2503 
2504 	return 0;
2505 }
2506 
skip_key_basic(struct net_device * filter_dev,struct flow_cls_offload * f)2507 static bool skip_key_basic(struct net_device *filter_dev,
2508 			   struct flow_cls_offload *f)
2509 {
2510 	/* When doing mpls over udp decap, the user needs to provide
2511 	 * MPLS_UC as the protocol in order to be able to match on mpls
2512 	 * label fields.  However, the actual ethertype is IP so we want to
2513 	 * avoid matching on this, otherwise we'll fail the match.
2514 	 */
2515 	if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2516 		return true;
2517 
2518 	return false;
2519 }
2520 
__parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * inner_match_level,u8 * outer_match_level)2521 static int __parse_cls_flower(struct mlx5e_priv *priv,
2522 			      struct mlx5e_tc_flow *flow,
2523 			      struct mlx5_flow_spec *spec,
2524 			      struct flow_cls_offload *f,
2525 			      struct net_device *filter_dev,
2526 			      u8 *inner_match_level, u8 *outer_match_level)
2527 {
2528 	struct netlink_ext_ack *extack = f->common.extack;
2529 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2530 				       outer_headers);
2531 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2532 				       outer_headers);
2533 	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2534 				    misc_parameters);
2535 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2536 				    misc_parameters);
2537 	void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2538 				    misc_parameters_3);
2539 	void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2540 				    misc_parameters_3);
2541 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2542 	struct flow_dissector *dissector = rule->match.dissector;
2543 	enum fs_flow_table_type fs_type;
2544 	bool match_inner_ecn = true;
2545 	u16 addr_type = 0;
2546 	u8 ip_proto = 0;
2547 	u8 *match_level;
2548 	int err;
2549 
2550 	fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2551 	match_level = outer_match_level;
2552 
2553 	if (dissector->used_keys &
2554 	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
2555 	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2556 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
2557 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2558 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
2559 	      BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2560 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2561 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2562 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
2563 	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2564 	      BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2565 	      BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2566 	      BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)	|
2567 	      BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2568 	      BIT(FLOW_DISSECTOR_KEY_TCP) |
2569 	      BIT(FLOW_DISSECTOR_KEY_IP)  |
2570 	      BIT(FLOW_DISSECTOR_KEY_CT) |
2571 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2572 	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2573 	      BIT(FLOW_DISSECTOR_KEY_ICMP) |
2574 	      BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2575 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2576 		netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2577 			   dissector->used_keys);
2578 		return -EOPNOTSUPP;
2579 	}
2580 
2581 	if (mlx5e_get_tc_tun(filter_dev)) {
2582 		bool match_inner = false;
2583 
2584 		err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2585 					outer_match_level, &match_inner);
2586 		if (err)
2587 			return err;
2588 
2589 		if (match_inner) {
2590 			/* header pointers should point to the inner headers
2591 			 * if the packet was decapsulated already.
2592 			 * outer headers are set by parse_tunnel_attr.
2593 			 */
2594 			match_level = inner_match_level;
2595 			headers_c = get_match_inner_headers_criteria(spec);
2596 			headers_v = get_match_inner_headers_value(spec);
2597 		}
2598 
2599 		err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2600 		if (err)
2601 			return err;
2602 	}
2603 
2604 	err = mlx5e_flower_parse_meta(filter_dev, f);
2605 	if (err)
2606 		return err;
2607 
2608 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2609 	    !skip_key_basic(filter_dev, f)) {
2610 		struct flow_match_basic match;
2611 
2612 		flow_rule_match_basic(rule, &match);
2613 		mlx5e_tc_set_ethertype(priv->mdev, &match,
2614 				       match_level == outer_match_level,
2615 				       headers_c, headers_v);
2616 
2617 		if (match.mask->n_proto)
2618 			*match_level = MLX5_MATCH_L2;
2619 	}
2620 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2621 	    is_vlan_dev(filter_dev)) {
2622 		struct flow_dissector_key_vlan filter_dev_mask;
2623 		struct flow_dissector_key_vlan filter_dev_key;
2624 		struct flow_match_vlan match;
2625 
2626 		if (is_vlan_dev(filter_dev)) {
2627 			match.key = &filter_dev_key;
2628 			match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2629 			match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2630 			match.key->vlan_priority = 0;
2631 			match.mask = &filter_dev_mask;
2632 			memset(match.mask, 0xff, sizeof(*match.mask));
2633 			match.mask->vlan_priority = 0;
2634 		} else {
2635 			flow_rule_match_vlan(rule, &match);
2636 		}
2637 		if (match.mask->vlan_id ||
2638 		    match.mask->vlan_priority ||
2639 		    match.mask->vlan_tpid) {
2640 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2641 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2642 					 svlan_tag, 1);
2643 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2644 					 svlan_tag, 1);
2645 			} else {
2646 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2647 					 cvlan_tag, 1);
2648 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2649 					 cvlan_tag, 1);
2650 			}
2651 
2652 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2653 				 match.mask->vlan_id);
2654 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2655 				 match.key->vlan_id);
2656 
2657 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2658 				 match.mask->vlan_priority);
2659 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2660 				 match.key->vlan_priority);
2661 
2662 			*match_level = MLX5_MATCH_L2;
2663 
2664 			if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2665 			    match.mask->vlan_eth_type &&
2666 			    MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2667 						    ft_field_support.outer_second_vid,
2668 						    fs_type)) {
2669 				MLX5_SET(fte_match_set_misc, misc_c,
2670 					 outer_second_cvlan_tag, 1);
2671 				spec->match_criteria_enable |=
2672 					MLX5_MATCH_MISC_PARAMETERS;
2673 			}
2674 		}
2675 	} else if (*match_level != MLX5_MATCH_NONE) {
2676 		/* cvlan_tag enabled in match criteria and
2677 		 * disabled in match value means both S & C tags
2678 		 * don't exist (untagged of both)
2679 		 */
2680 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2681 		*match_level = MLX5_MATCH_L2;
2682 	}
2683 
2684 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2685 		struct flow_match_vlan match;
2686 
2687 		flow_rule_match_cvlan(rule, &match);
2688 		if (match.mask->vlan_id ||
2689 		    match.mask->vlan_priority ||
2690 		    match.mask->vlan_tpid) {
2691 			if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2692 						     fs_type)) {
2693 				NL_SET_ERR_MSG_MOD(extack,
2694 						   "Matching on CVLAN is not supported");
2695 				return -EOPNOTSUPP;
2696 			}
2697 
2698 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2699 				MLX5_SET(fte_match_set_misc, misc_c,
2700 					 outer_second_svlan_tag, 1);
2701 				MLX5_SET(fte_match_set_misc, misc_v,
2702 					 outer_second_svlan_tag, 1);
2703 			} else {
2704 				MLX5_SET(fte_match_set_misc, misc_c,
2705 					 outer_second_cvlan_tag, 1);
2706 				MLX5_SET(fte_match_set_misc, misc_v,
2707 					 outer_second_cvlan_tag, 1);
2708 			}
2709 
2710 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2711 				 match.mask->vlan_id);
2712 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2713 				 match.key->vlan_id);
2714 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2715 				 match.mask->vlan_priority);
2716 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2717 				 match.key->vlan_priority);
2718 
2719 			*match_level = MLX5_MATCH_L2;
2720 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2721 		}
2722 	}
2723 
2724 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2725 		struct flow_match_eth_addrs match;
2726 
2727 		flow_rule_match_eth_addrs(rule, &match);
2728 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2729 					     dmac_47_16),
2730 				match.mask->dst);
2731 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2732 					     dmac_47_16),
2733 				match.key->dst);
2734 
2735 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2736 					     smac_47_16),
2737 				match.mask->src);
2738 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2739 					     smac_47_16),
2740 				match.key->src);
2741 
2742 		if (!is_zero_ether_addr(match.mask->src) ||
2743 		    !is_zero_ether_addr(match.mask->dst))
2744 			*match_level = MLX5_MATCH_L2;
2745 	}
2746 
2747 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2748 		struct flow_match_control match;
2749 
2750 		flow_rule_match_control(rule, &match);
2751 		addr_type = match.key->addr_type;
2752 
2753 		/* the HW doesn't support frag first/later */
2754 		if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2755 			NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2756 			return -EOPNOTSUPP;
2757 		}
2758 
2759 		if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2760 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2761 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2762 				 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2763 
2764 			/* the HW doesn't need L3 inline to match on frag=no */
2765 			if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2766 				*match_level = MLX5_MATCH_L2;
2767 	/* ***  L2 attributes parsing up to here *** */
2768 			else
2769 				*match_level = MLX5_MATCH_L3;
2770 		}
2771 	}
2772 
2773 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2774 		struct flow_match_basic match;
2775 
2776 		flow_rule_match_basic(rule, &match);
2777 		ip_proto = match.key->ip_proto;
2778 
2779 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2780 			 match.mask->ip_proto);
2781 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2782 			 match.key->ip_proto);
2783 
2784 		if (match.mask->ip_proto)
2785 			*match_level = MLX5_MATCH_L3;
2786 	}
2787 
2788 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2789 		struct flow_match_ipv4_addrs match;
2790 
2791 		flow_rule_match_ipv4_addrs(rule, &match);
2792 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2793 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2794 		       &match.mask->src, sizeof(match.mask->src));
2795 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2796 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2797 		       &match.key->src, sizeof(match.key->src));
2798 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2799 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2800 		       &match.mask->dst, sizeof(match.mask->dst));
2801 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2802 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2803 		       &match.key->dst, sizeof(match.key->dst));
2804 
2805 		if (match.mask->src || match.mask->dst)
2806 			*match_level = MLX5_MATCH_L3;
2807 	}
2808 
2809 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2810 		struct flow_match_ipv6_addrs match;
2811 
2812 		flow_rule_match_ipv6_addrs(rule, &match);
2813 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2814 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2815 		       &match.mask->src, sizeof(match.mask->src));
2816 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2817 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2818 		       &match.key->src, sizeof(match.key->src));
2819 
2820 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2821 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2822 		       &match.mask->dst, sizeof(match.mask->dst));
2823 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2824 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2825 		       &match.key->dst, sizeof(match.key->dst));
2826 
2827 		if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2828 		    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2829 			*match_level = MLX5_MATCH_L3;
2830 	}
2831 
2832 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2833 		struct flow_match_ip match;
2834 
2835 		flow_rule_match_ip(rule, &match);
2836 		if (match_inner_ecn) {
2837 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2838 				 match.mask->tos & 0x3);
2839 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2840 				 match.key->tos & 0x3);
2841 		}
2842 
2843 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2844 			 match.mask->tos >> 2);
2845 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2846 			 match.key->tos  >> 2);
2847 
2848 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2849 			 match.mask->ttl);
2850 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2851 			 match.key->ttl);
2852 
2853 		if (match.mask->ttl &&
2854 		    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2855 						ft_field_support.outer_ipv4_ttl)) {
2856 			NL_SET_ERR_MSG_MOD(extack,
2857 					   "Matching on TTL is not supported");
2858 			return -EOPNOTSUPP;
2859 		}
2860 
2861 		if (match.mask->tos || match.mask->ttl)
2862 			*match_level = MLX5_MATCH_L3;
2863 	}
2864 
2865 	/* ***  L3 attributes parsing up to here *** */
2866 
2867 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2868 		struct flow_match_ports match;
2869 
2870 		flow_rule_match_ports(rule, &match);
2871 		switch (ip_proto) {
2872 		case IPPROTO_TCP:
2873 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2874 				 tcp_sport, ntohs(match.mask->src));
2875 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2876 				 tcp_sport, ntohs(match.key->src));
2877 
2878 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2879 				 tcp_dport, ntohs(match.mask->dst));
2880 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2881 				 tcp_dport, ntohs(match.key->dst));
2882 			break;
2883 
2884 		case IPPROTO_UDP:
2885 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2886 				 udp_sport, ntohs(match.mask->src));
2887 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2888 				 udp_sport, ntohs(match.key->src));
2889 
2890 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2891 				 udp_dport, ntohs(match.mask->dst));
2892 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2893 				 udp_dport, ntohs(match.key->dst));
2894 			break;
2895 		default:
2896 			NL_SET_ERR_MSG_MOD(extack,
2897 					   "Only UDP and TCP transports are supported for L4 matching");
2898 			netdev_err(priv->netdev,
2899 				   "Only UDP and TCP transport are supported\n");
2900 			return -EINVAL;
2901 		}
2902 
2903 		if (match.mask->src || match.mask->dst)
2904 			*match_level = MLX5_MATCH_L4;
2905 	}
2906 
2907 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2908 		struct flow_match_tcp match;
2909 
2910 		flow_rule_match_tcp(rule, &match);
2911 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2912 			 ntohs(match.mask->flags));
2913 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2914 			 ntohs(match.key->flags));
2915 
2916 		if (match.mask->flags)
2917 			*match_level = MLX5_MATCH_L4;
2918 	}
2919 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2920 		struct flow_match_icmp match;
2921 
2922 		flow_rule_match_icmp(rule, &match);
2923 		switch (ip_proto) {
2924 		case IPPROTO_ICMP:
2925 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2926 			      MLX5_FLEX_PROTO_ICMP)) {
2927 				NL_SET_ERR_MSG_MOD(extack,
2928 						   "Match on Flex protocols for ICMP is not supported");
2929 				return -EOPNOTSUPP;
2930 			}
2931 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2932 				 match.mask->type);
2933 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2934 				 match.key->type);
2935 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2936 				 match.mask->code);
2937 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2938 				 match.key->code);
2939 			break;
2940 		case IPPROTO_ICMPV6:
2941 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2942 			      MLX5_FLEX_PROTO_ICMPV6)) {
2943 				NL_SET_ERR_MSG_MOD(extack,
2944 						   "Match on Flex protocols for ICMPV6 is not supported");
2945 				return -EOPNOTSUPP;
2946 			}
2947 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2948 				 match.mask->type);
2949 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
2950 				 match.key->type);
2951 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
2952 				 match.mask->code);
2953 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
2954 				 match.key->code);
2955 			break;
2956 		default:
2957 			NL_SET_ERR_MSG_MOD(extack,
2958 					   "Code and type matching only with ICMP and ICMPv6");
2959 			netdev_err(priv->netdev,
2960 				   "Code and type matching only with ICMP and ICMPv6\n");
2961 			return -EINVAL;
2962 		}
2963 		if (match.mask->code || match.mask->type) {
2964 			*match_level = MLX5_MATCH_L4;
2965 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
2966 		}
2967 	}
2968 	/* Currently supported only for MPLS over UDP */
2969 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2970 	    !netif_is_bareudp(filter_dev)) {
2971 		NL_SET_ERR_MSG_MOD(extack,
2972 				   "Matching on MPLS is supported only for MPLS over UDP");
2973 		netdev_err(priv->netdev,
2974 			   "Matching on MPLS is supported only for MPLS over UDP\n");
2975 		return -EOPNOTSUPP;
2976 	}
2977 
2978 	return 0;
2979 }
2980 
parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev)2981 static int parse_cls_flower(struct mlx5e_priv *priv,
2982 			    struct mlx5e_tc_flow *flow,
2983 			    struct mlx5_flow_spec *spec,
2984 			    struct flow_cls_offload *f,
2985 			    struct net_device *filter_dev)
2986 {
2987 	u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2988 	struct netlink_ext_ack *extack = f->common.extack;
2989 	struct mlx5_core_dev *dev = priv->mdev;
2990 	struct mlx5_eswitch *esw = dev->priv.eswitch;
2991 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
2992 	struct mlx5_eswitch_rep *rep;
2993 	bool is_eswitch_flow;
2994 	int err;
2995 
2996 	inner_match_level = MLX5_MATCH_NONE;
2997 	outer_match_level = MLX5_MATCH_NONE;
2998 
2999 	err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
3000 				 &inner_match_level, &outer_match_level);
3001 	non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
3002 				 outer_match_level : inner_match_level;
3003 
3004 	is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
3005 	if (!err && is_eswitch_flow) {
3006 		rep = rpriv->rep;
3007 		if (rep->vport != MLX5_VPORT_UPLINK &&
3008 		    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
3009 		    esw->offloads.inline_mode < non_tunnel_match_level)) {
3010 			NL_SET_ERR_MSG_MOD(extack,
3011 					   "Flow is not offloaded due to min inline setting");
3012 			netdev_warn(priv->netdev,
3013 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
3014 				    non_tunnel_match_level, esw->offloads.inline_mode);
3015 			return -EOPNOTSUPP;
3016 		}
3017 	}
3018 
3019 	flow->attr->inner_match_level = inner_match_level;
3020 	flow->attr->outer_match_level = outer_match_level;
3021 
3022 
3023 	return err;
3024 }
3025 
3026 struct mlx5_fields {
3027 	u8  field;
3028 	u8  field_bsize;
3029 	u32 field_mask;
3030 	u32 offset;
3031 	u32 match_offset;
3032 };
3033 
3034 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
3035 		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
3036 		 offsetof(struct pedit_headers, field) + (off), \
3037 		 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
3038 
3039 /* masked values are the same and there are no rewrites that do not have a
3040  * match.
3041  */
3042 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
3043 	type matchmaskx = *(type *)(matchmaskp); \
3044 	type matchvalx = *(type *)(matchvalp); \
3045 	type maskx = *(type *)(maskp); \
3046 	type valx = *(type *)(valp); \
3047 	\
3048 	(valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
3049 								 matchmaskx)); \
3050 })
3051 
cmp_val_mask(void * valp,void * maskp,void * matchvalp,void * matchmaskp,u8 bsize)3052 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
3053 			 void *matchmaskp, u8 bsize)
3054 {
3055 	bool same = false;
3056 
3057 	switch (bsize) {
3058 	case 8:
3059 		same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
3060 		break;
3061 	case 16:
3062 		same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
3063 		break;
3064 	case 32:
3065 		same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
3066 		break;
3067 	}
3068 
3069 	return same;
3070 }
3071 
3072 static struct mlx5_fields fields[] = {
3073 	OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
3074 	OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
3075 	OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
3076 	OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
3077 	OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
3078 	OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
3079 
3080 	OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
3081 	OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
3082 	OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
3083 	OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
3084 
3085 	OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
3086 		src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
3087 	OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
3088 		src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
3089 	OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
3090 		src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
3091 	OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
3092 		src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
3093 	OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
3094 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
3095 	OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
3096 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
3097 	OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
3098 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
3099 	OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
3100 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
3101 	OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
3102 	OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
3103 
3104 	OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
3105 	OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
3106 	/* in linux iphdr tcp_flags is 8 bits long */
3107 	OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
3108 
3109 	OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
3110 	OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
3111 };
3112 
mask_to_le(unsigned long mask,int size)3113 static unsigned long mask_to_le(unsigned long mask, int size)
3114 {
3115 	__be32 mask_be32;
3116 	__be16 mask_be16;
3117 
3118 	if (size == 32) {
3119 		mask_be32 = (__force __be32)(mask);
3120 		mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
3121 	} else if (size == 16) {
3122 		mask_be32 = (__force __be32)(mask);
3123 		mask_be16 = *(__be16 *)&mask_be32;
3124 		mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
3125 	}
3126 
3127 	return mask;
3128 }
3129 
offload_pedit_fields(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)3130 static int offload_pedit_fields(struct mlx5e_priv *priv,
3131 				int namespace,
3132 				struct mlx5e_tc_flow_parse_attr *parse_attr,
3133 				u32 *action_flags,
3134 				struct netlink_ext_ack *extack)
3135 {
3136 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3137 	struct pedit_headers_action *hdrs = parse_attr->hdrs;
3138 	void *headers_c, *headers_v, *action, *vals_p;
3139 	u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
3140 	struct mlx5e_tc_mod_hdr_acts *mod_acts;
3141 	unsigned long mask, field_mask;
3142 	int i, first, last, next_z;
3143 	struct mlx5_fields *f;
3144 	u8 cmd;
3145 
3146 	mod_acts = &parse_attr->mod_hdr_acts;
3147 	headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3148 	headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3149 
3150 	set_masks = &hdrs[0].masks;
3151 	add_masks = &hdrs[1].masks;
3152 	set_vals = &hdrs[0].vals;
3153 	add_vals = &hdrs[1].vals;
3154 
3155 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
3156 		bool skip;
3157 
3158 		f = &fields[i];
3159 		/* avoid seeing bits set from previous iterations */
3160 		s_mask = 0;
3161 		a_mask = 0;
3162 
3163 		s_masks_p = (void *)set_masks + f->offset;
3164 		a_masks_p = (void *)add_masks + f->offset;
3165 
3166 		s_mask = *s_masks_p & f->field_mask;
3167 		a_mask = *a_masks_p & f->field_mask;
3168 
3169 		if (!s_mask && !a_mask) /* nothing to offload here */
3170 			continue;
3171 
3172 		if (s_mask && a_mask) {
3173 			NL_SET_ERR_MSG_MOD(extack,
3174 					   "can't set and add to the same HW field");
3175 			netdev_warn(priv->netdev,
3176 				    "mlx5: can't set and add to the same HW field (%x)\n",
3177 				    f->field);
3178 			return -EOPNOTSUPP;
3179 		}
3180 
3181 		skip = false;
3182 		if (s_mask) {
3183 			void *match_mask = headers_c + f->match_offset;
3184 			void *match_val = headers_v + f->match_offset;
3185 
3186 			cmd  = MLX5_ACTION_TYPE_SET;
3187 			mask = s_mask;
3188 			vals_p = (void *)set_vals + f->offset;
3189 			/* don't rewrite if we have a match on the same value */
3190 			if (cmp_val_mask(vals_p, s_masks_p, match_val,
3191 					 match_mask, f->field_bsize))
3192 				skip = true;
3193 			/* clear to denote we consumed this field */
3194 			*s_masks_p &= ~f->field_mask;
3195 		} else {
3196 			cmd  = MLX5_ACTION_TYPE_ADD;
3197 			mask = a_mask;
3198 			vals_p = (void *)add_vals + f->offset;
3199 			/* add 0 is no change */
3200 			if ((*(u32 *)vals_p & f->field_mask) == 0)
3201 				skip = true;
3202 			/* clear to denote we consumed this field */
3203 			*a_masks_p &= ~f->field_mask;
3204 		}
3205 		if (skip)
3206 			continue;
3207 
3208 		mask = mask_to_le(mask, f->field_bsize);
3209 
3210 		first = find_first_bit(&mask, f->field_bsize);
3211 		next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3212 		last  = find_last_bit(&mask, f->field_bsize);
3213 		if (first < next_z && next_z < last) {
3214 			NL_SET_ERR_MSG_MOD(extack,
3215 					   "rewrite of few sub-fields isn't supported");
3216 			netdev_warn(priv->netdev,
3217 				    "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3218 				    mask);
3219 			return -EOPNOTSUPP;
3220 		}
3221 
3222 		action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3223 		if (IS_ERR(action)) {
3224 			NL_SET_ERR_MSG_MOD(extack,
3225 					   "too many pedit actions, can't offload");
3226 			mlx5_core_warn(priv->mdev,
3227 				       "mlx5: parsed %d pedit actions, can't do more\n",
3228 				       mod_acts->num_actions);
3229 			return PTR_ERR(action);
3230 		}
3231 
3232 		MLX5_SET(set_action_in, action, action_type, cmd);
3233 		MLX5_SET(set_action_in, action, field, f->field);
3234 
3235 		if (cmd == MLX5_ACTION_TYPE_SET) {
3236 			int start;
3237 
3238 			field_mask = mask_to_le(f->field_mask, f->field_bsize);
3239 
3240 			/* if field is bit sized it can start not from first bit */
3241 			start = find_first_bit(&field_mask, f->field_bsize);
3242 
3243 			MLX5_SET(set_action_in, action, offset, first - start);
3244 			/* length is num of bits to be written, zero means length of 32 */
3245 			MLX5_SET(set_action_in, action, length, (last - first + 1));
3246 		}
3247 
3248 		if (f->field_bsize == 32)
3249 			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3250 		else if (f->field_bsize == 16)
3251 			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3252 		else if (f->field_bsize == 8)
3253 			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3254 
3255 		++mod_acts->num_actions;
3256 	}
3257 
3258 	return 0;
3259 }
3260 
3261 static const struct pedit_headers zero_masks = {};
3262 
verify_offload_pedit_fields(struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)3263 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3264 				       struct mlx5e_tc_flow_parse_attr *parse_attr,
3265 				       struct netlink_ext_ack *extack)
3266 {
3267 	struct pedit_headers *cmd_masks;
3268 	u8 cmd;
3269 
3270 	for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3271 		cmd_masks = &parse_attr->hdrs[cmd].masks;
3272 		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3273 			NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3274 			netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3275 			print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3276 				       16, 1, cmd_masks, sizeof(zero_masks), true);
3277 			return -EOPNOTSUPP;
3278 		}
3279 	}
3280 
3281 	return 0;
3282 }
3283 
alloc_tc_pedit_action(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)3284 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3285 				 struct mlx5e_tc_flow_parse_attr *parse_attr,
3286 				 u32 *action_flags,
3287 				 struct netlink_ext_ack *extack)
3288 {
3289 	int err;
3290 
3291 	err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3292 	if (err)
3293 		goto out_dealloc_parsed_actions;
3294 
3295 	err = verify_offload_pedit_fields(priv, parse_attr, extack);
3296 	if (err)
3297 		goto out_dealloc_parsed_actions;
3298 
3299 	return 0;
3300 
3301 out_dealloc_parsed_actions:
3302 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3303 	return err;
3304 }
3305 
3306 struct ip_ttl_word {
3307 	__u8	ttl;
3308 	__u8	protocol;
3309 	__sum16	check;
3310 };
3311 
3312 struct ipv6_hoplimit_word {
3313 	__be16	payload_len;
3314 	__u8	nexthdr;
3315 	__u8	hop_limit;
3316 };
3317 
3318 static bool
is_action_keys_supported(const struct flow_action_entry * act,bool ct_flow,bool * modify_ip_header,bool * modify_tuple,struct netlink_ext_ack * extack)3319 is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
3320 			 bool *modify_ip_header, bool *modify_tuple,
3321 			 struct netlink_ext_ack *extack)
3322 {
3323 	u32 mask, offset;
3324 	u8 htype;
3325 
3326 	htype = act->mangle.htype;
3327 	offset = act->mangle.offset;
3328 	mask = ~act->mangle.mask;
3329 	/* For IPv4 & IPv6 header check 4 byte word,
3330 	 * to determine that modified fields
3331 	 * are NOT ttl & hop_limit only.
3332 	 */
3333 	if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3334 		struct ip_ttl_word *ttl_word =
3335 			(struct ip_ttl_word *)&mask;
3336 
3337 		if (offset != offsetof(struct iphdr, ttl) ||
3338 		    ttl_word->protocol ||
3339 		    ttl_word->check) {
3340 			*modify_ip_header = true;
3341 		}
3342 
3343 		if (offset >= offsetof(struct iphdr, saddr))
3344 			*modify_tuple = true;
3345 
3346 		if (ct_flow && *modify_tuple) {
3347 			NL_SET_ERR_MSG_MOD(extack,
3348 					   "can't offload re-write of ipv4 address with action ct");
3349 			return false;
3350 		}
3351 	} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3352 		struct ipv6_hoplimit_word *hoplimit_word =
3353 			(struct ipv6_hoplimit_word *)&mask;
3354 
3355 		if (offset != offsetof(struct ipv6hdr, payload_len) ||
3356 		    hoplimit_word->payload_len ||
3357 		    hoplimit_word->nexthdr) {
3358 			*modify_ip_header = true;
3359 		}
3360 
3361 		if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3362 			*modify_tuple = true;
3363 
3364 		if (ct_flow && *modify_tuple) {
3365 			NL_SET_ERR_MSG_MOD(extack,
3366 					   "can't offload re-write of ipv6 address with action ct");
3367 			return false;
3368 		}
3369 	} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3370 		   htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3371 		*modify_tuple = true;
3372 		if (ct_flow) {
3373 			NL_SET_ERR_MSG_MOD(extack,
3374 					   "can't offload re-write of transport header ports with action ct");
3375 			return false;
3376 		}
3377 	}
3378 
3379 	return true;
3380 }
3381 
modify_tuple_supported(bool modify_tuple,bool ct_clear,bool ct_flow,struct netlink_ext_ack * extack,struct mlx5e_priv * priv,struct mlx5_flow_spec * spec)3382 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3383 				   bool ct_flow, struct netlink_ext_ack *extack,
3384 				   struct mlx5e_priv *priv,
3385 				   struct mlx5_flow_spec *spec)
3386 {
3387 	if (!modify_tuple || ct_clear)
3388 		return true;
3389 
3390 	if (ct_flow) {
3391 		NL_SET_ERR_MSG_MOD(extack,
3392 				   "can't offload tuple modification with non-clear ct()");
3393 		netdev_info(priv->netdev,
3394 			    "can't offload tuple modification with non-clear ct()");
3395 		return false;
3396 	}
3397 
3398 	/* Add ct_state=-trk match so it will be offloaded for non ct flows
3399 	 * (or after clear action), as otherwise, since the tuple is changed,
3400 	 * we can't restore ct state
3401 	 */
3402 	if (mlx5_tc_ct_add_no_trk_match(spec)) {
3403 		NL_SET_ERR_MSG_MOD(extack,
3404 				   "can't offload tuple modification with ct matches and no ct(clear) action");
3405 		netdev_info(priv->netdev,
3406 			    "can't offload tuple modification with ct matches and no ct(clear) action");
3407 		return false;
3408 	}
3409 
3410 	return true;
3411 }
3412 
modify_header_match_supported(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_action * flow_action,u32 actions,bool ct_flow,bool ct_clear,struct netlink_ext_ack * extack)3413 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3414 					  struct mlx5_flow_spec *spec,
3415 					  struct flow_action *flow_action,
3416 					  u32 actions, bool ct_flow,
3417 					  bool ct_clear,
3418 					  struct netlink_ext_ack *extack)
3419 {
3420 	const struct flow_action_entry *act;
3421 	bool modify_ip_header, modify_tuple;
3422 	void *headers_c;
3423 	void *headers_v;
3424 	u16 ethertype;
3425 	u8 ip_proto;
3426 	int i;
3427 
3428 	headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3429 	headers_v = mlx5e_get_match_headers_value(actions, spec);
3430 	ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3431 
3432 	/* for non-IP we only re-write MACs, so we're okay */
3433 	if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3434 	    ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3435 		goto out_ok;
3436 
3437 	modify_ip_header = false;
3438 	modify_tuple = false;
3439 	flow_action_for_each(i, act, flow_action) {
3440 		if (act->id != FLOW_ACTION_MANGLE &&
3441 		    act->id != FLOW_ACTION_ADD)
3442 			continue;
3443 
3444 		if (!is_action_keys_supported(act, ct_flow,
3445 					      &modify_ip_header,
3446 					      &modify_tuple, extack))
3447 			return false;
3448 	}
3449 
3450 	if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3451 				    priv, spec))
3452 		return false;
3453 
3454 	ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3455 	if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3456 	    ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3457 		NL_SET_ERR_MSG_MOD(extack,
3458 				   "can't offload re-write of non TCP/UDP");
3459 		netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3460 			    ip_proto);
3461 		return false;
3462 	}
3463 
3464 out_ok:
3465 	return true;
3466 }
3467 
3468 static bool
actions_match_supported_fdb(struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3469 actions_match_supported_fdb(struct mlx5e_priv *priv,
3470 			    struct mlx5e_tc_flow_parse_attr *parse_attr,
3471 			    struct mlx5e_tc_flow *flow,
3472 			    struct netlink_ext_ack *extack)
3473 {
3474 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3475 	bool ct_flow, ct_clear;
3476 
3477 	ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3478 	ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3479 
3480 	if (esw_attr->split_count && ct_flow &&
3481 	    !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
3482 		/* All registers used by ct are cleared when using
3483 		 * split rules.
3484 		 */
3485 		NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
3486 		return false;
3487 	}
3488 
3489 	if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3490 		NL_SET_ERR_MSG_MOD(extack,
3491 				   "current firmware doesn't support split rule for port mirroring");
3492 		netdev_warn_once(priv->netdev,
3493 				 "current firmware doesn't support split rule for port mirroring\n");
3494 		return false;
3495 	}
3496 
3497 	return true;
3498 }
3499 
3500 static bool
actions_match_supported(struct mlx5e_priv * priv,struct flow_action * flow_action,u32 actions,struct mlx5e_tc_flow_parse_attr * parse_attr,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3501 actions_match_supported(struct mlx5e_priv *priv,
3502 			struct flow_action *flow_action,
3503 			u32 actions,
3504 			struct mlx5e_tc_flow_parse_attr *parse_attr,
3505 			struct mlx5e_tc_flow *flow,
3506 			struct netlink_ext_ack *extack)
3507 {
3508 	bool ct_flow, ct_clear;
3509 
3510 	ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3511 	ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3512 
3513 	if (!(actions &
3514 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3515 		NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
3516 		return false;
3517 	}
3518 
3519 	if (!(~actions &
3520 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3521 		NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3522 		return false;
3523 	}
3524 
3525 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3526 	    actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3527 		NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3528 		return false;
3529 	}
3530 
3531 	if (!(~actions &
3532 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3533 		NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3534 		return false;
3535 	}
3536 
3537 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3538 	    actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3539 		NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3540 		return false;
3541 	}
3542 
3543 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3544 	    !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
3545 					   actions, ct_flow, ct_clear, extack))
3546 		return false;
3547 
3548 	if (mlx5e_is_eswitch_flow(flow) &&
3549 	    !actions_match_supported_fdb(priv, parse_attr, flow, extack))
3550 		return false;
3551 
3552 	return true;
3553 }
3554 
same_port_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3555 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3556 {
3557 	return priv->mdev == peer_priv->mdev;
3558 }
3559 
mlx5e_same_hw_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3560 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3561 {
3562 	struct mlx5_core_dev *fmdev, *pmdev;
3563 	u64 fsystem_guid, psystem_guid;
3564 
3565 	fmdev = priv->mdev;
3566 	pmdev = peer_priv->mdev;
3567 
3568 	fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3569 	psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3570 
3571 	return (fsystem_guid == psystem_guid);
3572 }
3573 
3574 static int
actions_prepare_mod_hdr_actions(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack)3575 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3576 				struct mlx5e_tc_flow *flow,
3577 				struct mlx5_flow_attr *attr,
3578 				struct netlink_ext_ack *extack)
3579 {
3580 	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3581 	struct pedit_headers_action *hdrs = parse_attr->hdrs;
3582 	enum mlx5_flow_namespace_type ns_type;
3583 	int err;
3584 
3585 	if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3586 	    !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3587 		return 0;
3588 
3589 	ns_type = mlx5e_get_flow_namespace(flow);
3590 
3591 	err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3592 	if (err)
3593 		return err;
3594 
3595 	if (parse_attr->mod_hdr_acts.num_actions > 0)
3596 		return 0;
3597 
3598 	/* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3599 	attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3600 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3601 
3602 	if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3603 		return 0;
3604 
3605 	if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3606 	      (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3607 		attr->esw_attr->split_count = 0;
3608 
3609 	return 0;
3610 }
3611 
3612 static struct mlx5_flow_attr*
mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr * attr,enum mlx5_flow_namespace_type ns_type)3613 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3614 				   enum mlx5_flow_namespace_type ns_type)
3615 {
3616 	struct mlx5e_tc_flow_parse_attr *parse_attr;
3617 	u32 attr_sz = ns_to_attr_sz(ns_type);
3618 	struct mlx5_flow_attr *attr2;
3619 
3620 	attr2 = mlx5_alloc_flow_attr(ns_type);
3621 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3622 	if (!attr2 || !parse_attr) {
3623 		kvfree(parse_attr);
3624 		kfree(attr2);
3625 		return NULL;
3626 	}
3627 
3628 	memcpy(attr2, attr, attr_sz);
3629 	INIT_LIST_HEAD(&attr2->list);
3630 	parse_attr->filter_dev = attr->parse_attr->filter_dev;
3631 	attr2->action = 0;
3632 	attr2->flags = 0;
3633 	attr2->parse_attr = parse_attr;
3634 	attr2->dest_chain = 0;
3635 	attr2->dest_ft = NULL;
3636 
3637 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
3638 		attr2->esw_attr->out_count = 0;
3639 		attr2->esw_attr->split_count = 0;
3640 	}
3641 
3642 	return attr2;
3643 }
3644 
3645 static struct mlx5_core_dev *
get_flow_counter_dev(struct mlx5e_tc_flow * flow)3646 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
3647 {
3648 	return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
3649 }
3650 
3651 struct mlx5_flow_attr *
mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow * flow)3652 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3653 {
3654 	struct mlx5_esw_flow_attr *esw_attr;
3655 	struct mlx5_flow_attr *attr;
3656 	int i;
3657 
3658 	list_for_each_entry(attr, &flow->attrs, list) {
3659 		esw_attr = attr->esw_attr;
3660 		for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3661 			if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3662 				return attr;
3663 		}
3664 	}
3665 
3666 	return NULL;
3667 }
3668 
3669 void
mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow * flow)3670 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3671 {
3672 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3673 	struct mlx5_flow_attr *attr;
3674 
3675 	list_for_each_entry(attr, &flow->attrs, list) {
3676 		if (list_is_last(&attr->list, &flow->attrs))
3677 			break;
3678 
3679 		mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3680 	}
3681 }
3682 
3683 static void
free_flow_post_acts(struct mlx5e_tc_flow * flow)3684 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3685 {
3686 	struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
3687 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3688 	struct mlx5_flow_attr *attr, *tmp;
3689 	bool vf_tun;
3690 
3691 	list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3692 		if (list_is_last(&attr->list, &flow->attrs))
3693 			break;
3694 
3695 		if (attr->post_act_handle)
3696 			mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
3697 
3698 		clean_encap_dests(flow->priv, flow, attr, &vf_tun);
3699 
3700 		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
3701 			mlx5_fc_destroy(counter_dev, attr->counter);
3702 
3703 		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3704 			mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
3705 			if (attr->modify_hdr)
3706 				mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
3707 		}
3708 
3709 		list_del(&attr->list);
3710 		kvfree(attr->parse_attr);
3711 		kfree(attr);
3712 	}
3713 }
3714 
3715 int
mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow * flow)3716 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3717 {
3718 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3719 	struct mlx5_flow_attr *attr;
3720 	int err = 0;
3721 
3722 	list_for_each_entry(attr, &flow->attrs, list) {
3723 		if (list_is_last(&attr->list, &flow->attrs))
3724 			break;
3725 
3726 		err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3727 		if (err)
3728 			break;
3729 	}
3730 
3731 	return err;
3732 }
3733 
3734 /* TC filter rule HW translation:
3735  *
3736  * +---------------------+
3737  * + ft prio (tc chain)  +
3738  * + original match      +
3739  * +---------------------+
3740  *           |
3741  *           | if multi table action
3742  *           |
3743  *           v
3744  * +---------------------+
3745  * + post act ft         |<----.
3746  * + match fte id        |     | split on multi table action
3747  * + do actions          |-----'
3748  * +---------------------+
3749  *           |
3750  *           |
3751  *           v
3752  * Do rest of the actions after last multi table action.
3753  */
3754 static int
alloc_flow_post_acts(struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3755 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3756 {
3757 	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3758 	struct mlx5_flow_attr *attr, *next_attr = NULL;
3759 	struct mlx5e_post_act_handle *handle;
3760 	bool vf_tun;
3761 	int err;
3762 
3763 	/* This is going in reverse order as needed.
3764 	 * The first entry is the last attribute.
3765 	 */
3766 	list_for_each_entry(attr, &flow->attrs, list) {
3767 		if (!next_attr) {
3768 			/* Set counter action on last post act rule. */
3769 			attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3770 		} else {
3771 			err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3772 			if (err)
3773 				goto out_free;
3774 		}
3775 
3776 		/* Don't add post_act rule for first attr (last in the list).
3777 		 * It's being handled by the caller.
3778 		 */
3779 		if (list_is_last(&attr->list, &flow->attrs))
3780 			break;
3781 
3782 		err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
3783 		if (err)
3784 			goto out_free;
3785 
3786 		err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3787 		if (err)
3788 			goto out_free;
3789 
3790 		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3791 			err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
3792 			if (err)
3793 				goto out_free;
3794 		}
3795 
3796 		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
3797 			err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
3798 			if (err)
3799 				goto out_free;
3800 		}
3801 
3802 		handle = mlx5e_tc_post_act_add(post_act, attr);
3803 		if (IS_ERR(handle)) {
3804 			err = PTR_ERR(handle);
3805 			goto out_free;
3806 		}
3807 
3808 		attr->post_act_handle = handle;
3809 		next_attr = attr;
3810 	}
3811 
3812 	if (flow_flag_test(flow, SLOW))
3813 		goto out;
3814 
3815 	err = mlx5e_tc_offload_flow_post_acts(flow);
3816 	if (err)
3817 		goto out_free;
3818 
3819 out:
3820 	return 0;
3821 
3822 out_free:
3823 	free_flow_post_acts(flow);
3824 	return err;
3825 }
3826 
3827 static int
parse_tc_actions(struct mlx5e_tc_act_parse_state * parse_state,struct flow_action * flow_action)3828 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
3829 		 struct flow_action *flow_action)
3830 {
3831 	struct netlink_ext_ack *extack = parse_state->extack;
3832 	struct mlx5e_tc_flow_action flow_action_reorder;
3833 	struct mlx5e_tc_flow *flow = parse_state->flow;
3834 	struct mlx5_flow_attr *attr = flow->attr;
3835 	enum mlx5_flow_namespace_type ns_type;
3836 	struct mlx5e_priv *priv = flow->priv;
3837 	struct flow_action_entry *act, **_act;
3838 	struct mlx5e_tc_act *tc_act;
3839 	int err, i;
3840 
3841 	flow_action_reorder.num_entries = flow_action->num_entries;
3842 	flow_action_reorder.entries = kcalloc(flow_action->num_entries,
3843 					      sizeof(flow_action), GFP_KERNEL);
3844 	if (!flow_action_reorder.entries)
3845 		return -ENOMEM;
3846 
3847 	mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
3848 
3849 	ns_type = mlx5e_get_flow_namespace(flow);
3850 	list_add(&attr->list, &flow->attrs);
3851 
3852 	flow_action_for_each(i, _act, &flow_action_reorder) {
3853 		act = *_act;
3854 		tc_act = mlx5e_tc_act_get(act->id, ns_type);
3855 		if (!tc_act) {
3856 			NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
3857 			err = -EOPNOTSUPP;
3858 			goto out_free;
3859 		}
3860 
3861 		if (!tc_act->can_offload(parse_state, act, i, attr)) {
3862 			err = -EOPNOTSUPP;
3863 			goto out_free;
3864 		}
3865 
3866 		err = tc_act->parse_action(parse_state, act, priv, attr);
3867 		if (err)
3868 			goto out_free;
3869 
3870 		parse_state->actions |= attr->action;
3871 
3872 		/* Split attr for multi table act if not the last act. */
3873 		if (tc_act->is_multi_table_act &&
3874 		    tc_act->is_multi_table_act(priv, act, attr) &&
3875 		    i < flow_action_reorder.num_entries - 1) {
3876 			err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3877 			if (err)
3878 				goto out_free;
3879 
3880 			attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
3881 			if (!attr) {
3882 				err = -ENOMEM;
3883 				goto out_free;
3884 			}
3885 
3886 			list_add(&attr->list, &flow->attrs);
3887 		}
3888 	}
3889 
3890 	kfree(flow_action_reorder.entries);
3891 
3892 	err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3893 	if (err)
3894 		goto out_free_post_acts;
3895 
3896 	err = alloc_flow_post_acts(flow, extack);
3897 	if (err)
3898 		goto out_free_post_acts;
3899 
3900 	return 0;
3901 
3902 out_free:
3903 	kfree(flow_action_reorder.entries);
3904 out_free_post_acts:
3905 	free_flow_post_acts(flow);
3906 
3907 	return err;
3908 }
3909 
3910 static int
flow_action_supported(struct flow_action * flow_action,struct netlink_ext_ack * extack)3911 flow_action_supported(struct flow_action *flow_action,
3912 		      struct netlink_ext_ack *extack)
3913 {
3914 	if (!flow_action_has_entries(flow_action)) {
3915 		NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
3916 		return -EINVAL;
3917 	}
3918 
3919 	if (!flow_action_hw_stats_check(flow_action, extack,
3920 					FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
3921 		NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
3922 		return -EOPNOTSUPP;
3923 	}
3924 
3925 	return 0;
3926 }
3927 
3928 static int
parse_tc_nic_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3929 parse_tc_nic_actions(struct mlx5e_priv *priv,
3930 		     struct flow_action *flow_action,
3931 		     struct mlx5e_tc_flow *flow,
3932 		     struct netlink_ext_ack *extack)
3933 {
3934 	struct mlx5e_tc_act_parse_state *parse_state;
3935 	struct mlx5e_tc_flow_parse_attr *parse_attr;
3936 	struct mlx5_flow_attr *attr = flow->attr;
3937 	int err;
3938 
3939 	err = flow_action_supported(flow_action, extack);
3940 	if (err)
3941 		return err;
3942 
3943 	attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3944 	parse_attr = attr->parse_attr;
3945 	parse_state = &parse_attr->parse_state;
3946 	mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
3947 	parse_state->ct_priv = get_ct_priv(priv);
3948 
3949 	err = parse_tc_actions(parse_state, flow_action);
3950 	if (err)
3951 		return err;
3952 
3953 	err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
3954 	if (err)
3955 		return err;
3956 
3957 	if (!actions_match_supported(priv, flow_action, parse_state->actions,
3958 				     parse_attr, flow, extack))
3959 		return -EOPNOTSUPP;
3960 
3961 	return 0;
3962 }
3963 
is_merged_eswitch_vfs(struct mlx5e_priv * priv,struct net_device * peer_netdev)3964 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3965 				  struct net_device *peer_netdev)
3966 {
3967 	struct mlx5e_priv *peer_priv;
3968 
3969 	peer_priv = netdev_priv(peer_netdev);
3970 
3971 	return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3972 		mlx5e_eswitch_vf_rep(priv->netdev) &&
3973 		mlx5e_eswitch_vf_rep(peer_netdev) &&
3974 		mlx5e_same_hw_devs(priv, peer_priv));
3975 }
3976 
same_hw_reps(struct mlx5e_priv * priv,struct net_device * peer_netdev)3977 static bool same_hw_reps(struct mlx5e_priv *priv,
3978 			 struct net_device *peer_netdev)
3979 {
3980 	struct mlx5e_priv *peer_priv;
3981 
3982 	peer_priv = netdev_priv(peer_netdev);
3983 
3984 	return mlx5e_eswitch_rep(priv->netdev) &&
3985 	       mlx5e_eswitch_rep(peer_netdev) &&
3986 	       mlx5e_same_hw_devs(priv, peer_priv);
3987 }
3988 
is_lag_dev(struct mlx5e_priv * priv,struct net_device * peer_netdev)3989 static bool is_lag_dev(struct mlx5e_priv *priv,
3990 		       struct net_device *peer_netdev)
3991 {
3992 	return ((mlx5_lag_is_sriov(priv->mdev) ||
3993 		 mlx5_lag_is_multipath(priv->mdev)) &&
3994 		 same_hw_reps(priv, peer_netdev));
3995 }
3996 
is_multiport_eligible(struct mlx5e_priv * priv,struct net_device * out_dev)3997 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
3998 {
3999 	if (same_hw_reps(priv, out_dev) &&
4000 	    MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
4001 	    MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
4002 		return true;
4003 
4004 	return false;
4005 }
4006 
mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv * priv,struct net_device * out_dev)4007 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4008 				    struct net_device *out_dev)
4009 {
4010 	if (is_merged_eswitch_vfs(priv, out_dev))
4011 		return true;
4012 
4013 	if (is_multiport_eligible(priv, out_dev))
4014 		return true;
4015 
4016 	if (is_lag_dev(priv, out_dev))
4017 		return true;
4018 
4019 	return mlx5e_eswitch_rep(out_dev) &&
4020 	       same_port_devs(priv, netdev_priv(out_dev));
4021 }
4022 
mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,int ifindex,enum mlx5e_tc_int_port_type type,u32 * action,int out_index)4023 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
4024 				      struct mlx5_flow_attr *attr,
4025 				      int ifindex,
4026 				      enum mlx5e_tc_int_port_type type,
4027 				      u32 *action,
4028 				      int out_index)
4029 {
4030 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4031 	struct mlx5e_tc_int_port_priv *int_port_priv;
4032 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4033 	struct mlx5e_tc_int_port *dest_int_port;
4034 	int err;
4035 
4036 	parse_attr = attr->parse_attr;
4037 	int_port_priv = mlx5e_get_int_port_priv(priv);
4038 
4039 	dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
4040 	if (IS_ERR(dest_int_port))
4041 		return PTR_ERR(dest_int_port);
4042 
4043 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
4044 					MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
4045 					mlx5e_tc_int_port_get_metadata(dest_int_port));
4046 	if (err) {
4047 		mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
4048 		return err;
4049 	}
4050 
4051 	*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4052 
4053 	esw_attr->dest_int_port = dest_int_port;
4054 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
4055 
4056 	/* Forward to root fdb for matching against the new source vport */
4057 	attr->dest_chain = 0;
4058 
4059 	return 0;
4060 }
4061 
4062 static int
parse_tc_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)4063 parse_tc_fdb_actions(struct mlx5e_priv *priv,
4064 		     struct flow_action *flow_action,
4065 		     struct mlx5e_tc_flow *flow,
4066 		     struct netlink_ext_ack *extack)
4067 {
4068 	struct mlx5e_tc_act_parse_state *parse_state;
4069 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4070 	struct mlx5_flow_attr *attr = flow->attr;
4071 	struct mlx5_esw_flow_attr *esw_attr;
4072 	struct net_device *filter_dev;
4073 	int err;
4074 
4075 	err = flow_action_supported(flow_action, extack);
4076 	if (err)
4077 		return err;
4078 
4079 	esw_attr = attr->esw_attr;
4080 	parse_attr = attr->parse_attr;
4081 	filter_dev = parse_attr->filter_dev;
4082 	parse_state = &parse_attr->parse_state;
4083 	mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4084 	parse_state->ct_priv = get_ct_priv(priv);
4085 
4086 	err = parse_tc_actions(parse_state, flow_action);
4087 	if (err)
4088 		return err;
4089 
4090 	/* Forward to/from internal port can only have 1 dest */
4091 	if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
4092 	    esw_attr->out_count > 1) {
4093 		NL_SET_ERR_MSG_MOD(extack,
4094 				   "Rules with internal port can have only one destination");
4095 		return -EOPNOTSUPP;
4096 	}
4097 
4098 	/* Forward from tunnel/internal port to internal port is not supported */
4099 	if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
4100 	    esw_attr->dest_int_port) {
4101 		NL_SET_ERR_MSG_MOD(extack,
4102 				   "Forwarding from tunnel/internal port to internal port is not supported");
4103 		return -EOPNOTSUPP;
4104 	}
4105 
4106 	err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4107 	if (err)
4108 		return err;
4109 
4110 	if (!actions_match_supported(priv, flow_action, parse_state->actions,
4111 				     parse_attr, flow, extack))
4112 		return -EOPNOTSUPP;
4113 
4114 	return 0;
4115 }
4116 
get_flags(int flags,unsigned long * flow_flags)4117 static void get_flags(int flags, unsigned long *flow_flags)
4118 {
4119 	unsigned long __flow_flags = 0;
4120 
4121 	if (flags & MLX5_TC_FLAG(INGRESS))
4122 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4123 	if (flags & MLX5_TC_FLAG(EGRESS))
4124 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4125 
4126 	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4127 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4128 	if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4129 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4130 	if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4131 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4132 
4133 	*flow_flags = __flow_flags;
4134 }
4135 
4136 static const struct rhashtable_params tc_ht_params = {
4137 	.head_offset = offsetof(struct mlx5e_tc_flow, node),
4138 	.key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4139 	.key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4140 	.automatic_shrinking = true,
4141 };
4142 
get_tc_ht(struct mlx5e_priv * priv,unsigned long flags)4143 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4144 				    unsigned long flags)
4145 {
4146 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4147 	struct mlx5e_rep_priv *rpriv;
4148 
4149 	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4150 		rpriv = priv->ppriv;
4151 		return &rpriv->tc_ht;
4152 	} else /* NIC offload */
4153 		return &tc->ht;
4154 }
4155 
is_peer_flow_needed(struct mlx5e_tc_flow * flow)4156 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4157 {
4158 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4159 	struct mlx5_flow_attr *attr = flow->attr;
4160 	bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4161 		flow_flag_test(flow, INGRESS);
4162 	bool act_is_encap = !!(attr->action &
4163 			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4164 	bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4165 						MLX5_DEVCOM_ESW_OFFLOADS);
4166 
4167 	if (!esw_paired)
4168 		return false;
4169 
4170 	if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4171 	     mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4172 	    (is_rep_ingress || act_is_encap))
4173 		return true;
4174 
4175 	return false;
4176 }
4177 
4178 struct mlx5_flow_attr *
mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)4179 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4180 {
4181 	u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4182 				sizeof(struct mlx5_esw_flow_attr) :
4183 				sizeof(struct mlx5_nic_flow_attr);
4184 	struct mlx5_flow_attr *attr;
4185 
4186 	attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4187 	if (!attr)
4188 		return attr;
4189 
4190 	INIT_LIST_HEAD(&attr->list);
4191 	return attr;
4192 }
4193 
4194 static int
mlx5e_alloc_flow(struct mlx5e_priv * priv,int attr_size,struct flow_cls_offload * f,unsigned long flow_flags,struct mlx5e_tc_flow_parse_attr ** __parse_attr,struct mlx5e_tc_flow ** __flow)4195 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4196 		 struct flow_cls_offload *f, unsigned long flow_flags,
4197 		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
4198 		 struct mlx5e_tc_flow **__flow)
4199 {
4200 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4201 	struct mlx5_flow_attr *attr;
4202 	struct mlx5e_tc_flow *flow;
4203 	int err = -ENOMEM;
4204 	int out_index;
4205 
4206 	flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4207 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4208 	if (!parse_attr || !flow)
4209 		goto err_free;
4210 
4211 	flow->flags = flow_flags;
4212 	flow->cookie = f->cookie;
4213 	flow->priv = priv;
4214 
4215 	attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4216 	if (!attr)
4217 		goto err_free;
4218 
4219 	flow->attr = attr;
4220 
4221 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4222 		INIT_LIST_HEAD(&flow->encaps[out_index].list);
4223 	INIT_LIST_HEAD(&flow->hairpin);
4224 	INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4225 	INIT_LIST_HEAD(&flow->attrs);
4226 	refcount_set(&flow->refcnt, 1);
4227 	init_completion(&flow->init_done);
4228 	init_completion(&flow->del_hw_done);
4229 
4230 	*__flow = flow;
4231 	*__parse_attr = parse_attr;
4232 
4233 	return 0;
4234 
4235 err_free:
4236 	kfree(flow);
4237 	kvfree(parse_attr);
4238 	return err;
4239 }
4240 
4241 static void
mlx5e_flow_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f)4242 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4243 		     struct mlx5e_tc_flow_parse_attr *parse_attr,
4244 		     struct flow_cls_offload *f)
4245 {
4246 	attr->parse_attr = parse_attr;
4247 	attr->chain = f->common.chain_index;
4248 	attr->prio = f->common.prio;
4249 }
4250 
4251 static void
mlx5e_flow_esw_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4252 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4253 			 struct mlx5e_priv *priv,
4254 			 struct mlx5e_tc_flow_parse_attr *parse_attr,
4255 			 struct flow_cls_offload *f,
4256 			 struct mlx5_eswitch_rep *in_rep,
4257 			 struct mlx5_core_dev *in_mdev)
4258 {
4259 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4260 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4261 
4262 	mlx5e_flow_attr_init(attr, parse_attr, f);
4263 
4264 	esw_attr->in_rep = in_rep;
4265 	esw_attr->in_mdev = in_mdev;
4266 
4267 	if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4268 	    MLX5_COUNTER_SOURCE_ESWITCH)
4269 		esw_attr->counter_dev = in_mdev;
4270 	else
4271 		esw_attr->counter_dev = priv->mdev;
4272 }
4273 
4274 static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4275 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4276 		     struct flow_cls_offload *f,
4277 		     unsigned long flow_flags,
4278 		     struct net_device *filter_dev,
4279 		     struct mlx5_eswitch_rep *in_rep,
4280 		     struct mlx5_core_dev *in_mdev)
4281 {
4282 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4283 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4284 	struct netlink_ext_ack *extack = f->common.extack;
4285 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4286 	struct mlx5e_tc_flow *flow;
4287 	int attr_size, err;
4288 
4289 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4290 	attr_size  = sizeof(struct mlx5_esw_flow_attr);
4291 	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4292 			       &parse_attr, &flow);
4293 	if (err)
4294 		goto out;
4295 
4296 	parse_attr->filter_dev = filter_dev;
4297 	mlx5e_flow_esw_attr_init(flow->attr,
4298 				 priv, parse_attr,
4299 				 f, in_rep, in_mdev);
4300 
4301 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4302 			       f, filter_dev);
4303 	if (err)
4304 		goto err_free;
4305 
4306 	/* actions validation depends on parsing the ct matches first */
4307 	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4308 				   &flow->attr->ct_attr, extack);
4309 	if (err)
4310 		goto err_free;
4311 
4312 	/* always set IP version for indirect table handling */
4313 	flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4314 
4315 	err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4316 	if (err)
4317 		goto err_free;
4318 
4319 	if (flow->attr->lag.count) {
4320 		err = mlx5_lag_add_mpesw_rule(esw->dev);
4321 		if (err)
4322 			goto err_free;
4323 	}
4324 
4325 	err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4326 	complete_all(&flow->init_done);
4327 	if (err) {
4328 		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4329 			goto err_lag;
4330 
4331 		add_unready_flow(flow);
4332 	}
4333 
4334 	return flow;
4335 
4336 err_lag:
4337 	if (flow->attr->lag.count)
4338 		mlx5_lag_del_mpesw_rule(esw->dev);
4339 err_free:
4340 	mlx5e_flow_put(priv, flow);
4341 out:
4342 	return ERR_PTR(err);
4343 }
4344 
mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload * f,struct mlx5e_tc_flow * flow,unsigned long flow_flags)4345 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4346 				      struct mlx5e_tc_flow *flow,
4347 				      unsigned long flow_flags)
4348 {
4349 	struct mlx5e_priv *priv = flow->priv, *peer_priv;
4350 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4351 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4352 	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4353 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4354 	struct mlx5e_rep_priv *peer_urpriv;
4355 	struct mlx5e_tc_flow *peer_flow;
4356 	struct mlx5_core_dev *in_mdev;
4357 	int err = 0;
4358 
4359 	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4360 	if (!peer_esw)
4361 		return -ENODEV;
4362 
4363 	peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4364 	peer_priv = netdev_priv(peer_urpriv->netdev);
4365 
4366 	/* in_mdev is assigned of which the packet originated from.
4367 	 * So packets redirected to uplink use the same mdev of the
4368 	 * original flow and packets redirected from uplink use the
4369 	 * peer mdev.
4370 	 */
4371 	if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4372 		in_mdev = peer_priv->mdev;
4373 	else
4374 		in_mdev = priv->mdev;
4375 
4376 	parse_attr = flow->attr->parse_attr;
4377 	peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4378 					 parse_attr->filter_dev,
4379 					 attr->in_rep, in_mdev);
4380 	if (IS_ERR(peer_flow)) {
4381 		err = PTR_ERR(peer_flow);
4382 		goto out;
4383 	}
4384 
4385 	flow->peer_flow = peer_flow;
4386 	flow_flag_set(flow, DUP);
4387 	mutex_lock(&esw->offloads.peer_mutex);
4388 	list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4389 	mutex_unlock(&esw->offloads.peer_mutex);
4390 
4391 out:
4392 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4393 	return err;
4394 }
4395 
4396 static int
mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4397 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4398 		   struct flow_cls_offload *f,
4399 		   unsigned long flow_flags,
4400 		   struct net_device *filter_dev,
4401 		   struct mlx5e_tc_flow **__flow)
4402 {
4403 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4404 	struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4405 	struct mlx5_core_dev *in_mdev = priv->mdev;
4406 	struct mlx5e_tc_flow *flow;
4407 	int err;
4408 
4409 	flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4410 				    in_mdev);
4411 	if (IS_ERR(flow))
4412 		return PTR_ERR(flow);
4413 
4414 	if (is_peer_flow_needed(flow)) {
4415 		err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4416 		if (err) {
4417 			mlx5e_tc_del_fdb_flow(priv, flow);
4418 			goto out;
4419 		}
4420 	}
4421 
4422 	*__flow = flow;
4423 
4424 	return 0;
4425 
4426 out:
4427 	return err;
4428 }
4429 
4430 static int
mlx5e_add_nic_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4431 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4432 		   struct flow_cls_offload *f,
4433 		   unsigned long flow_flags,
4434 		   struct net_device *filter_dev,
4435 		   struct mlx5e_tc_flow **__flow)
4436 {
4437 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4438 	struct netlink_ext_ack *extack = f->common.extack;
4439 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4440 	struct mlx5e_tc_flow *flow;
4441 	int attr_size, err;
4442 
4443 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4444 		if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4445 			return -EOPNOTSUPP;
4446 	} else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4447 		return -EOPNOTSUPP;
4448 	}
4449 
4450 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4451 	attr_size  = sizeof(struct mlx5_nic_flow_attr);
4452 	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4453 			       &parse_attr, &flow);
4454 	if (err)
4455 		goto out;
4456 
4457 	parse_attr->filter_dev = filter_dev;
4458 	mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4459 
4460 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4461 			       f, filter_dev);
4462 	if (err)
4463 		goto err_free;
4464 
4465 	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4466 				   &flow->attr->ct_attr, extack);
4467 	if (err)
4468 		goto err_free;
4469 
4470 	err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4471 	if (err)
4472 		goto err_free;
4473 
4474 	err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4475 	if (err)
4476 		goto err_free;
4477 
4478 	flow_flag_set(flow, OFFLOADED);
4479 	*__flow = flow;
4480 
4481 	return 0;
4482 
4483 err_free:
4484 	flow_flag_set(flow, FAILED);
4485 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4486 	mlx5e_flow_put(priv, flow);
4487 out:
4488 	return err;
4489 }
4490 
4491 static int
mlx5e_tc_add_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** flow)4492 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4493 		  struct flow_cls_offload *f,
4494 		  unsigned long flags,
4495 		  struct net_device *filter_dev,
4496 		  struct mlx5e_tc_flow **flow)
4497 {
4498 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4499 	unsigned long flow_flags;
4500 	int err;
4501 
4502 	get_flags(flags, &flow_flags);
4503 
4504 	if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4505 		return -EOPNOTSUPP;
4506 
4507 	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4508 		err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4509 					 filter_dev, flow);
4510 	else
4511 		err = mlx5e_add_nic_flow(priv, f, flow_flags,
4512 					 filter_dev, flow);
4513 
4514 	return err;
4515 }
4516 
is_flow_rule_duplicate_allowed(struct net_device * dev,struct mlx5e_rep_priv * rpriv)4517 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4518 					   struct mlx5e_rep_priv *rpriv)
4519 {
4520 	/* Offloaded flow rule is allowed to duplicate on non-uplink representor
4521 	 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4522 	 * function is called from NIC mode.
4523 	 */
4524 	return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4525 }
4526 
mlx5e_configure_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4527 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4528 			   struct flow_cls_offload *f, unsigned long flags)
4529 {
4530 	struct netlink_ext_ack *extack = f->common.extack;
4531 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4532 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4533 	struct mlx5e_tc_flow *flow;
4534 	int err = 0;
4535 
4536 	if (!mlx5_esw_hold(priv->mdev))
4537 		return -EBUSY;
4538 
4539 	mlx5_esw_get(priv->mdev);
4540 
4541 	rcu_read_lock();
4542 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4543 	if (flow) {
4544 		/* Same flow rule offloaded to non-uplink representor sharing tc block,
4545 		 * just return 0.
4546 		 */
4547 		if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4548 			goto rcu_unlock;
4549 
4550 		NL_SET_ERR_MSG_MOD(extack,
4551 				   "flow cookie already exists, ignoring");
4552 		netdev_warn_once(priv->netdev,
4553 				 "flow cookie %lx already exists, ignoring\n",
4554 				 f->cookie);
4555 		err = -EEXIST;
4556 		goto rcu_unlock;
4557 	}
4558 rcu_unlock:
4559 	rcu_read_unlock();
4560 	if (flow)
4561 		goto out;
4562 
4563 	trace_mlx5e_configure_flower(f);
4564 	err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4565 	if (err)
4566 		goto out;
4567 
4568 	/* Flow rule offloaded to non-uplink representor sharing tc block,
4569 	 * set the flow's owner dev.
4570 	 */
4571 	if (is_flow_rule_duplicate_allowed(dev, rpriv))
4572 		flow->orig_dev = dev;
4573 
4574 	err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4575 	if (err)
4576 		goto err_free;
4577 
4578 	mlx5_esw_release(priv->mdev);
4579 	return 0;
4580 
4581 err_free:
4582 	mlx5e_flow_put(priv, flow);
4583 out:
4584 	mlx5_esw_put(priv->mdev);
4585 	mlx5_esw_release(priv->mdev);
4586 	return err;
4587 }
4588 
same_flow_direction(struct mlx5e_tc_flow * flow,int flags)4589 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4590 {
4591 	bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4592 	bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4593 
4594 	return flow_flag_test(flow, INGRESS) == dir_ingress &&
4595 		flow_flag_test(flow, EGRESS) == dir_egress;
4596 }
4597 
mlx5e_delete_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4598 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4599 			struct flow_cls_offload *f, unsigned long flags)
4600 {
4601 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4602 	struct mlx5e_tc_flow *flow;
4603 	int err;
4604 
4605 	rcu_read_lock();
4606 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4607 	if (!flow || !same_flow_direction(flow, flags)) {
4608 		err = -EINVAL;
4609 		goto errout;
4610 	}
4611 
4612 	/* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4613 	 * set.
4614 	 */
4615 	if (flow_flag_test_and_set(flow, DELETED)) {
4616 		err = -EINVAL;
4617 		goto errout;
4618 	}
4619 	rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4620 	rcu_read_unlock();
4621 
4622 	trace_mlx5e_delete_flower(f);
4623 	mlx5e_flow_put(priv, flow);
4624 
4625 	mlx5_esw_put(priv->mdev);
4626 	return 0;
4627 
4628 errout:
4629 	rcu_read_unlock();
4630 	return err;
4631 }
4632 
mlx5e_stats_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4633 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4634 		       struct flow_cls_offload *f, unsigned long flags)
4635 {
4636 	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4637 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4638 	struct mlx5_eswitch *peer_esw;
4639 	struct mlx5e_tc_flow *flow;
4640 	struct mlx5_fc *counter;
4641 	u64 lastuse = 0;
4642 	u64 packets = 0;
4643 	u64 bytes = 0;
4644 	int err = 0;
4645 
4646 	rcu_read_lock();
4647 	flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4648 						tc_ht_params));
4649 	rcu_read_unlock();
4650 	if (IS_ERR(flow))
4651 		return PTR_ERR(flow);
4652 
4653 	if (!same_flow_direction(flow, flags)) {
4654 		err = -EINVAL;
4655 		goto errout;
4656 	}
4657 
4658 	if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4659 		counter = mlx5e_tc_get_counter(flow);
4660 		if (!counter)
4661 			goto errout;
4662 
4663 		mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4664 	}
4665 
4666 	/* Under multipath it's possible for one rule to be currently
4667 	 * un-offloaded while the other rule is offloaded.
4668 	 */
4669 	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4670 	if (!peer_esw)
4671 		goto out;
4672 
4673 	if (flow_flag_test(flow, DUP) &&
4674 	    flow_flag_test(flow->peer_flow, OFFLOADED)) {
4675 		u64 bytes2;
4676 		u64 packets2;
4677 		u64 lastuse2;
4678 
4679 		counter = mlx5e_tc_get_counter(flow->peer_flow);
4680 		if (!counter)
4681 			goto no_peer_counter;
4682 		mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4683 
4684 		bytes += bytes2;
4685 		packets += packets2;
4686 		lastuse = max_t(u64, lastuse, lastuse2);
4687 	}
4688 
4689 no_peer_counter:
4690 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4691 out:
4692 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4693 			  FLOW_ACTION_HW_STATS_DELAYED);
4694 	trace_mlx5e_stats_flower(f);
4695 errout:
4696 	mlx5e_flow_put(priv, flow);
4697 	return err;
4698 }
4699 
apply_police_params(struct mlx5e_priv * priv,u64 rate,struct netlink_ext_ack * extack)4700 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4701 			       struct netlink_ext_ack *extack)
4702 {
4703 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4704 	struct mlx5_eswitch *esw;
4705 	u32 rate_mbps = 0;
4706 	u16 vport_num;
4707 	int err;
4708 
4709 	vport_num = rpriv->rep->vport;
4710 	if (vport_num >= MLX5_VPORT_ECPF) {
4711 		NL_SET_ERR_MSG_MOD(extack,
4712 				   "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4713 		return -EOPNOTSUPP;
4714 	}
4715 
4716 	esw = priv->mdev->priv.eswitch;
4717 	/* rate is given in bytes/sec.
4718 	 * First convert to bits/sec and then round to the nearest mbit/secs.
4719 	 * mbit means million bits.
4720 	 * Moreover, if rate is non zero we choose to configure to a minimum of
4721 	 * 1 mbit/sec.
4722 	 */
4723 	if (rate) {
4724 		rate = (rate * BITS_PER_BYTE) + 500000;
4725 		do_div(rate, 1000000);
4726 		rate_mbps = max_t(u32, rate, 1);
4727 	}
4728 
4729 	err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4730 	if (err)
4731 		NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4732 
4733 	return err;
4734 }
4735 
mlx5e_policer_validate(const struct flow_action * action,const struct flow_action_entry * act,struct netlink_ext_ack * extack)4736 int mlx5e_policer_validate(const struct flow_action *action,
4737 			   const struct flow_action_entry *act,
4738 			   struct netlink_ext_ack *extack)
4739 {
4740 	if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
4741 		NL_SET_ERR_MSG_MOD(extack,
4742 				   "Offload not supported when exceed action is not drop");
4743 		return -EOPNOTSUPP;
4744 	}
4745 
4746 	if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
4747 	    !flow_action_is_last_entry(action, act)) {
4748 		NL_SET_ERR_MSG_MOD(extack,
4749 				   "Offload not supported when conform action is ok, but action is not last");
4750 		return -EOPNOTSUPP;
4751 	}
4752 
4753 	if (act->police.peakrate_bytes_ps ||
4754 	    act->police.avrate || act->police.overhead) {
4755 		NL_SET_ERR_MSG_MOD(extack,
4756 				   "Offload not supported when peakrate/avrate/overhead is configured");
4757 		return -EOPNOTSUPP;
4758 	}
4759 
4760 	return 0;
4761 }
4762 
scan_tc_matchall_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct netlink_ext_ack * extack)4763 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4764 					struct flow_action *flow_action,
4765 					struct netlink_ext_ack *extack)
4766 {
4767 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4768 	const struct flow_action_entry *act;
4769 	int err;
4770 	int i;
4771 
4772 	if (!flow_action_has_entries(flow_action)) {
4773 		NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4774 		return -EINVAL;
4775 	}
4776 
4777 	if (!flow_offload_has_one_action(flow_action)) {
4778 		NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4779 		return -EOPNOTSUPP;
4780 	}
4781 
4782 	if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
4783 		NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4784 		return -EOPNOTSUPP;
4785 	}
4786 
4787 	flow_action_for_each(i, act, flow_action) {
4788 		switch (act->id) {
4789 		case FLOW_ACTION_POLICE:
4790 			if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
4791 				NL_SET_ERR_MSG_MOD(extack,
4792 						   "Offload not supported when conform action is not continue");
4793 				return -EOPNOTSUPP;
4794 			}
4795 
4796 			err = mlx5e_policer_validate(flow_action, act, extack);
4797 			if (err)
4798 				return err;
4799 
4800 			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4801 			if (err)
4802 				return err;
4803 
4804 			rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4805 			break;
4806 		default:
4807 			NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4808 			return -EOPNOTSUPP;
4809 		}
4810 	}
4811 
4812 	return 0;
4813 }
4814 
mlx5e_tc_configure_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)4815 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4816 				struct tc_cls_matchall_offload *ma)
4817 {
4818 	struct netlink_ext_ack *extack = ma->common.extack;
4819 
4820 	if (ma->common.prio != 1) {
4821 		NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4822 		return -EINVAL;
4823 	}
4824 
4825 	return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4826 }
4827 
mlx5e_tc_delete_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)4828 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4829 			     struct tc_cls_matchall_offload *ma)
4830 {
4831 	struct netlink_ext_ack *extack = ma->common.extack;
4832 
4833 	return apply_police_params(priv, 0, extack);
4834 }
4835 
mlx5e_tc_stats_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)4836 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4837 			     struct tc_cls_matchall_offload *ma)
4838 {
4839 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4840 	struct rtnl_link_stats64 cur_stats;
4841 	u64 dbytes;
4842 	u64 dpkts;
4843 
4844 	cur_stats = priv->stats.vf_vport;
4845 	dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4846 	dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4847 	rpriv->prev_vf_vport_stats = cur_stats;
4848 	flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
4849 			  FLOW_ACTION_HW_STATS_DELAYED);
4850 }
4851 
mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)4852 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4853 					      struct mlx5e_priv *peer_priv)
4854 {
4855 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4856 	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4857 	struct mlx5e_hairpin_entry *hpe, *tmp;
4858 	LIST_HEAD(init_wait_list);
4859 	u16 peer_vhca_id;
4860 	int bkt;
4861 
4862 	if (!mlx5e_same_hw_devs(priv, peer_priv))
4863 		return;
4864 
4865 	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4866 
4867 	mutex_lock(&tc->hairpin_tbl_lock);
4868 	hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
4869 		if (refcount_inc_not_zero(&hpe->refcnt))
4870 			list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4871 	mutex_unlock(&tc->hairpin_tbl_lock);
4872 
4873 	list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4874 		wait_for_completion(&hpe->res_ready);
4875 		if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4876 			mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
4877 
4878 		mlx5e_hairpin_put(priv, hpe);
4879 	}
4880 }
4881 
mlx5e_tc_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)4882 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4883 				 unsigned long event, void *ptr)
4884 {
4885 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4886 	struct mlx5e_priv *peer_priv;
4887 	struct mlx5e_tc_table *tc;
4888 	struct mlx5e_priv *priv;
4889 
4890 	if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4891 	    event != NETDEV_UNREGISTER ||
4892 	    ndev->reg_state == NETREG_REGISTERED)
4893 		return NOTIFY_DONE;
4894 
4895 	tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4896 	priv = tc->priv;
4897 	peer_priv = netdev_priv(ndev);
4898 	if (priv == peer_priv ||
4899 	    !(priv->netdev->features & NETIF_F_HW_TC))
4900 		return NOTIFY_DONE;
4901 
4902 	mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4903 
4904 	return NOTIFY_DONE;
4905 }
4906 
mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev * dev)4907 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
4908 {
4909 	int tc_grp_size, tc_tbl_size;
4910 	u32 max_flow_counter;
4911 
4912 	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
4913 			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
4914 
4915 	tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
4916 
4917 	tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
4918 			    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
4919 
4920 	return tc_tbl_size;
4921 }
4922 
mlx5e_tc_nic_create_miss_table(struct mlx5e_priv * priv)4923 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
4924 {
4925 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4926 	struct mlx5_flow_table **ft = &tc->miss_t;
4927 	struct mlx5_flow_table_attr ft_attr = {};
4928 	struct mlx5_flow_namespace *ns;
4929 	int err = 0;
4930 
4931 	ft_attr.max_fte = 1;
4932 	ft_attr.autogroup.max_num_groups = 1;
4933 	ft_attr.level = MLX5E_TC_MISS_LEVEL;
4934 	ft_attr.prio = 0;
4935 	ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
4936 
4937 	*ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
4938 	if (IS_ERR(*ft)) {
4939 		err = PTR_ERR(*ft);
4940 		netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
4941 	}
4942 
4943 	return err;
4944 }
4945 
mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv * priv)4946 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
4947 {
4948 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4949 
4950 	mlx5_destroy_flow_table(tc->miss_t);
4951 }
4952 
mlx5e_tc_nic_init(struct mlx5e_priv * priv)4953 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4954 {
4955 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4956 	struct mlx5_core_dev *dev = priv->mdev;
4957 	struct mapping_ctx *chains_mapping;
4958 	struct mlx5_chains_attr attr = {};
4959 	u64 mapping_id;
4960 	int err;
4961 
4962 	mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
4963 	mutex_init(&tc->t_lock);
4964 	mutex_init(&tc->hairpin_tbl_lock);
4965 	hash_init(tc->hairpin_tbl);
4966 	tc->priv = priv;
4967 
4968 	err = rhashtable_init(&tc->ht, &tc_ht_params);
4969 	if (err)
4970 		return err;
4971 
4972 	lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
4973 
4974 	mapping_id = mlx5_query_nic_system_image_guid(dev);
4975 
4976 	chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
4977 					       sizeof(struct mlx5_mapped_obj),
4978 					       MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
4979 
4980 	if (IS_ERR(chains_mapping)) {
4981 		err = PTR_ERR(chains_mapping);
4982 		goto err_mapping;
4983 	}
4984 	tc->mapping = chains_mapping;
4985 
4986 	err = mlx5e_tc_nic_create_miss_table(priv);
4987 	if (err)
4988 		goto err_chains;
4989 
4990 	if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
4991 		attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
4992 			MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
4993 	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
4994 	attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
4995 	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
4996 	attr.default_ft = tc->miss_t;
4997 	attr.mapping = chains_mapping;
4998 
4999 	tc->chains = mlx5_chains_create(dev, &attr);
5000 	if (IS_ERR(tc->chains)) {
5001 		err = PTR_ERR(tc->chains);
5002 		goto err_miss;
5003 	}
5004 
5005 	tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
5006 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
5007 				 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
5008 
5009 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5010 	err = register_netdevice_notifier_dev_net(priv->netdev,
5011 						  &tc->netdevice_nb,
5012 						  &tc->netdevice_nn);
5013 	if (err) {
5014 		tc->netdevice_nb.notifier_call = NULL;
5015 		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5016 		goto err_reg;
5017 	}
5018 
5019 	return 0;
5020 
5021 err_reg:
5022 	mlx5_tc_ct_clean(tc->ct);
5023 	mlx5e_tc_post_act_destroy(tc->post_act);
5024 	mlx5_chains_destroy(tc->chains);
5025 err_miss:
5026 	mlx5e_tc_nic_destroy_miss_table(priv);
5027 err_chains:
5028 	mapping_destroy(chains_mapping);
5029 err_mapping:
5030 	rhashtable_destroy(&tc->ht);
5031 	return err;
5032 }
5033 
_mlx5e_tc_del_flow(void * ptr,void * arg)5034 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5035 {
5036 	struct mlx5e_tc_flow *flow = ptr;
5037 	struct mlx5e_priv *priv = flow->priv;
5038 
5039 	mlx5e_tc_del_flow(priv, flow);
5040 	kfree(flow);
5041 }
5042 
mlx5e_tc_nic_cleanup(struct mlx5e_priv * priv)5043 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5044 {
5045 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5046 
5047 	if (tc->netdevice_nb.notifier_call)
5048 		unregister_netdevice_notifier_dev_net(priv->netdev,
5049 						      &tc->netdevice_nb,
5050 						      &tc->netdevice_nn);
5051 
5052 	mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5053 	mutex_destroy(&tc->hairpin_tbl_lock);
5054 
5055 	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5056 
5057 	if (!IS_ERR_OR_NULL(tc->t)) {
5058 		mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5059 		tc->t = NULL;
5060 	}
5061 	mutex_destroy(&tc->t_lock);
5062 
5063 	mlx5_tc_ct_clean(tc->ct);
5064 	mlx5e_tc_post_act_destroy(tc->post_act);
5065 	mapping_destroy(tc->mapping);
5066 	mlx5_chains_destroy(tc->chains);
5067 	mlx5e_tc_nic_destroy_miss_table(priv);
5068 }
5069 
mlx5e_tc_ht_init(struct rhashtable * tc_ht)5070 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
5071 {
5072 	int err;
5073 
5074 	err = rhashtable_init(tc_ht, &tc_ht_params);
5075 	if (err)
5076 		return err;
5077 
5078 	lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5079 
5080 	return 0;
5081 }
5082 
mlx5e_tc_ht_cleanup(struct rhashtable * tc_ht)5083 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
5084 {
5085 	rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5086 }
5087 
mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv * uplink_priv)5088 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
5089 {
5090 	const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5091 	struct mlx5e_rep_priv *rpriv;
5092 	struct mapping_ctx *mapping;
5093 	struct mlx5_eswitch *esw;
5094 	struct mlx5e_priv *priv;
5095 	u64 mapping_id;
5096 	int err = 0;
5097 
5098 	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5099 	priv = netdev_priv(rpriv->netdev);
5100 	esw = priv->mdev->priv.eswitch;
5101 
5102 	uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5103 						       MLX5_FLOW_NAMESPACE_FDB);
5104 	uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5105 					       esw_chains(esw),
5106 					       &esw->offloads.mod_hdr,
5107 					       MLX5_FLOW_NAMESPACE_FDB,
5108 					       uplink_priv->post_act);
5109 
5110 	uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
5111 
5112 	uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5113 
5114 	mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
5115 
5116 	mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
5117 					sizeof(struct tunnel_match_key),
5118 					TUNNEL_INFO_BITS_MASK, true);
5119 
5120 	if (IS_ERR(mapping)) {
5121 		err = PTR_ERR(mapping);
5122 		goto err_tun_mapping;
5123 	}
5124 	uplink_priv->tunnel_mapping = mapping;
5125 
5126 	/* Two last values are reserved for stack devices slow path table mark
5127 	 * and bridge ingress push mark.
5128 	 */
5129 	mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
5130 					sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
5131 	if (IS_ERR(mapping)) {
5132 		err = PTR_ERR(mapping);
5133 		goto err_enc_opts_mapping;
5134 	}
5135 	uplink_priv->tunnel_enc_opts_mapping = mapping;
5136 
5137 	uplink_priv->encap = mlx5e_tc_tun_init(priv);
5138 	if (IS_ERR(uplink_priv->encap)) {
5139 		err = PTR_ERR(uplink_priv->encap);
5140 		goto err_register_fib_notifier;
5141 	}
5142 
5143 	return 0;
5144 
5145 err_register_fib_notifier:
5146 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5147 err_enc_opts_mapping:
5148 	mapping_destroy(uplink_priv->tunnel_mapping);
5149 err_tun_mapping:
5150 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5151 	mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5152 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
5153 	netdev_warn(priv->netdev,
5154 		    "Failed to initialize tc (eswitch), err: %d", err);
5155 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5156 	return err;
5157 }
5158 
mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv * uplink_priv)5159 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5160 {
5161 	mlx5e_tc_tun_cleanup(uplink_priv->encap);
5162 
5163 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5164 	mapping_destroy(uplink_priv->tunnel_mapping);
5165 
5166 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5167 	mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5168 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
5169 	mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5170 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5171 }
5172 
mlx5e_tc_num_filters(struct mlx5e_priv * priv,unsigned long flags)5173 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5174 {
5175 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5176 
5177 	return atomic_read(&tc_ht->nelems);
5178 }
5179 
mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch * esw)5180 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5181 {
5182 	struct mlx5e_tc_flow *flow, *tmp;
5183 
5184 	list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5185 		__mlx5e_tc_del_fdb_peer_flow(flow);
5186 }
5187 
mlx5e_tc_reoffload_flows_work(struct work_struct * work)5188 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5189 {
5190 	struct mlx5_rep_uplink_priv *rpriv =
5191 		container_of(work, struct mlx5_rep_uplink_priv,
5192 			     reoffload_flows_work);
5193 	struct mlx5e_tc_flow *flow, *tmp;
5194 
5195 	mutex_lock(&rpriv->unready_flows_lock);
5196 	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5197 		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5198 			unready_flow_del(flow);
5199 	}
5200 	mutex_unlock(&rpriv->unready_flows_lock);
5201 }
5202 
mlx5e_setup_tc_cls_flower(struct mlx5e_priv * priv,struct flow_cls_offload * cls_flower,unsigned long flags)5203 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5204 				     struct flow_cls_offload *cls_flower,
5205 				     unsigned long flags)
5206 {
5207 	switch (cls_flower->command) {
5208 	case FLOW_CLS_REPLACE:
5209 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5210 					      flags);
5211 	case FLOW_CLS_DESTROY:
5212 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5213 					   flags);
5214 	case FLOW_CLS_STATS:
5215 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5216 					  flags);
5217 	default:
5218 		return -EOPNOTSUPP;
5219 	}
5220 }
5221 
mlx5e_setup_tc_block_cb(enum tc_setup_type type,void * type_data,void * cb_priv)5222 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5223 			    void *cb_priv)
5224 {
5225 	unsigned long flags = MLX5_TC_FLAG(INGRESS);
5226 	struct mlx5e_priv *priv = cb_priv;
5227 
5228 	if (!priv->netdev || !netif_device_present(priv->netdev))
5229 		return -EOPNOTSUPP;
5230 
5231 	if (mlx5e_is_uplink_rep(priv))
5232 		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5233 	else
5234 		flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5235 
5236 	switch (type) {
5237 	case TC_SETUP_CLSFLOWER:
5238 		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5239 	default:
5240 		return -EOPNOTSUPP;
5241 	}
5242 }
5243 
mlx5e_tc_update_skb(struct mlx5_cqe64 * cqe,struct sk_buff * skb)5244 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5245 			 struct sk_buff *skb)
5246 {
5247 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5248 	u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5249 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
5250 	struct mlx5_mapped_obj mapped_obj;
5251 	struct tc_skb_ext *tc_skb_ext;
5252 	struct mlx5e_tc_table *tc;
5253 	int err;
5254 
5255 	reg_b = be32_to_cpu(cqe->ft_metadata);
5256 	tc = mlx5e_fs_get_tc(priv->fs);
5257 	chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5258 
5259 	err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
5260 	if (err) {
5261 		netdev_dbg(priv->netdev,
5262 			   "Couldn't find chain for chain tag: %d, err: %d\n",
5263 			   chain_tag, err);
5264 		return false;
5265 	}
5266 
5267 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
5268 		chain = mapped_obj.chain;
5269 		tc_skb_ext = tc_skb_ext_alloc(skb);
5270 		if (WARN_ON(!tc_skb_ext))
5271 			return false;
5272 
5273 		tc_skb_ext->chain = chain;
5274 
5275 		zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5276 			ESW_ZONE_ID_MASK;
5277 
5278 		if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5279 					      zone_restore_id))
5280 			return false;
5281 	} else {
5282 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5283 		return false;
5284 	}
5285 #endif /* CONFIG_NET_TC_SKB_EXT */
5286 
5287 	return true;
5288 }
5289