1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <linux/etherdevice.h>
5 #include <linux/idr.h>
6 #include <linux/mlx5/driver.h>
7 #include <linux/mlx5/mlx5_ifc.h>
8 #include <linux/mlx5/vport.h>
9 #include <linux/mlx5/fs.h>
10 #include "mlx5_core.h"
11 #include "eswitch.h"
12 #include "en.h"
13 #include "en_tc.h"
14 #include "fs_core.h"
15 #include "esw/indir_table.h"
16 #include "lib/fs_chains.h"
17 #include "en/mod_hdr.h"
18
19 #define MLX5_ESW_INDIR_TABLE_SIZE 128
20 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
21 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
22
23 struct mlx5_esw_indir_table_rule {
24 struct list_head list;
25 struct mlx5_flow_handle *handle;
26 union {
27 __be32 v4;
28 struct in6_addr v6;
29 } dst_ip;
30 u32 vni;
31 struct mlx5_modify_hdr *mh;
32 refcount_t refcnt;
33 };
34
35 struct mlx5_esw_indir_table_entry {
36 struct hlist_node hlist;
37 struct mlx5_flow_table *ft;
38 struct mlx5_flow_group *recirc_grp;
39 struct mlx5_flow_group *fwd_grp;
40 struct mlx5_flow_handle *fwd_rule;
41 struct list_head recirc_rules;
42 int recirc_cnt;
43 int fwd_ref;
44
45 u16 vport;
46 u8 ip_version;
47 };
48
49 struct mlx5_esw_indir_table {
50 struct mutex lock; /* protects table */
51 DECLARE_HASHTABLE(table, 8);
52 };
53
54 struct mlx5_esw_indir_table *
mlx5_esw_indir_table_init(void)55 mlx5_esw_indir_table_init(void)
56 {
57 struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
58
59 if (!indir)
60 return ERR_PTR(-ENOMEM);
61
62 mutex_init(&indir->lock);
63 hash_init(indir->table);
64 return indir;
65 }
66
67 void
mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table * indir)68 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
69 {
70 mutex_destroy(&indir->lock);
71 kvfree(indir);
72 }
73
74 bool
mlx5_esw_indir_table_needed(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,u16 vport_num,struct mlx5_core_dev * dest_mdev)75 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
76 struct mlx5_flow_attr *attr,
77 u16 vport_num,
78 struct mlx5_core_dev *dest_mdev)
79 {
80 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
81 bool vf_sf_vport;
82
83 vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) ||
84 mlx5_esw_is_sf_vport(esw, vport_num);
85
86 /* Use indirect table for all IP traffic from UL to VF with vport
87 * destination when source rewrite flag is set.
88 */
89 return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
90 vf_sf_vport &&
91 esw->dev == dest_mdev &&
92 attr->ip_version &&
93 attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
94 }
95
96 u16
mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr * attr)97 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
98 {
99 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
100
101 return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
102 }
103
104 static struct mlx5_esw_indir_table_rule *
mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry * e,struct mlx5_esw_flow_attr * attr)105 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
106 struct mlx5_esw_flow_attr *attr)
107 {
108 struct mlx5_esw_indir_table_rule *rule;
109
110 list_for_each_entry(rule, &e->recirc_rules, list)
111 if (rule->vni == attr->rx_tun_attr->vni &&
112 !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
113 sizeof(attr->rx_tun_attr->dst_ip)))
114 goto found;
115 return NULL;
116
117 found:
118 refcount_inc(&rule->refcnt);
119 return rule;
120 }
121
mlx5_esw_indir_table_rule_get(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,struct mlx5_esw_indir_table_entry * e)122 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
123 struct mlx5_flow_attr *attr,
124 struct mlx5_flow_spec *spec,
125 struct mlx5_esw_indir_table_entry *e)
126 {
127 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
128 struct mlx5_fs_chains *chains = esw_chains(esw);
129 struct mlx5e_tc_mod_hdr_acts mod_acts = {};
130 struct mlx5_flow_destination dest = {};
131 struct mlx5_esw_indir_table_rule *rule;
132 struct mlx5_flow_act flow_act = {};
133 struct mlx5_flow_spec *rule_spec;
134 struct mlx5_flow_handle *handle;
135 int err = 0;
136 u32 data;
137
138 rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
139 if (rule)
140 return 0;
141
142 if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
143 return -EINVAL;
144
145 rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
146 if (!rule_spec)
147 return -ENOMEM;
148
149 rule = kzalloc(sizeof(*rule), GFP_KERNEL);
150 if (!rule) {
151 err = -ENOMEM;
152 goto out;
153 }
154
155 rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
156 MLX5_MATCH_MISC_PARAMETERS |
157 MLX5_MATCH_MISC_PARAMETERS_2;
158 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
159 MLX5_SET(fte_match_param, rule_spec->match_criteria,
160 outer_headers.ip_version, 0xf);
161 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
162 attr->ip_version);
163 } else if (attr->ip_version) {
164 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
165 outer_headers.ethertype);
166 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
167 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
168 } else {
169 err = -EOPNOTSUPP;
170 goto err_ethertype;
171 }
172
173 if (attr->ip_version == 4) {
174 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
175 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
176 MLX5_SET(fte_match_param, rule_spec->match_value,
177 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
178 ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
179 } else if (attr->ip_version == 6) {
180 int len = sizeof(struct in6_addr);
181
182 memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
183 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
184 0xff, len);
185 memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
186 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
187 &esw_attr->rx_tun_attr->dst_ip.v6, len);
188 }
189
190 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
191 misc_parameters.vxlan_vni);
192 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
193 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
194
195 MLX5_SET(fte_match_param, rule_spec->match_criteria,
196 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
197 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
198 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
199 MLX5_VPORT_UPLINK));
200
201 /* Modify flow source to recirculate packet */
202 data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
203 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
204 VPORT_TO_REG, data);
205 if (err)
206 goto err_mod_hdr_regc0;
207
208 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
209 TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
210 if (err)
211 goto err_mod_hdr_regc1;
212
213 flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
214 mod_acts.num_actions, mod_acts.actions);
215 if (IS_ERR(flow_act.modify_hdr)) {
216 err = PTR_ERR(flow_act.modify_hdr);
217 goto err_mod_hdr_alloc;
218 }
219
220 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
221 flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
222 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
223 dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
224 if (IS_ERR(dest.ft)) {
225 err = PTR_ERR(dest.ft);
226 goto err_table;
227 }
228 handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
229 if (IS_ERR(handle)) {
230 err = PTR_ERR(handle);
231 goto err_handle;
232 }
233
234 mlx5e_mod_hdr_dealloc(&mod_acts);
235 rule->handle = handle;
236 rule->vni = esw_attr->rx_tun_attr->vni;
237 rule->mh = flow_act.modify_hdr;
238 memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
239 sizeof(esw_attr->rx_tun_attr->dst_ip));
240 refcount_set(&rule->refcnt, 1);
241 list_add(&rule->list, &e->recirc_rules);
242 e->recirc_cnt++;
243 goto out;
244
245 err_handle:
246 mlx5_chains_put_table(chains, 0, 1, 0);
247 err_table:
248 mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
249 err_mod_hdr_alloc:
250 err_mod_hdr_regc1:
251 mlx5e_mod_hdr_dealloc(&mod_acts);
252 err_mod_hdr_regc0:
253 err_ethertype:
254 kfree(rule);
255 out:
256 kvfree(rule_spec);
257 return err;
258 }
259
mlx5_esw_indir_table_rule_put(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_esw_indir_table_entry * e)260 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
261 struct mlx5_flow_attr *attr,
262 struct mlx5_esw_indir_table_entry *e)
263 {
264 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
265 struct mlx5_fs_chains *chains = esw_chains(esw);
266 struct mlx5_esw_indir_table_rule *rule;
267
268 list_for_each_entry(rule, &e->recirc_rules, list)
269 if (rule->vni == esw_attr->rx_tun_attr->vni &&
270 !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
271 sizeof(esw_attr->rx_tun_attr->dst_ip)))
272 goto found;
273
274 return;
275
276 found:
277 if (!refcount_dec_and_test(&rule->refcnt))
278 return;
279
280 mlx5_del_flow_rules(rule->handle);
281 mlx5_chains_put_table(chains, 0, 1, 0);
282 mlx5_modify_header_dealloc(esw->dev, rule->mh);
283 list_del(&rule->list);
284 kfree(rule);
285 e->recirc_cnt--;
286 }
287
mlx5_create_indir_recirc_group(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,struct mlx5_esw_indir_table_entry * e)288 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
289 struct mlx5_flow_attr *attr,
290 struct mlx5_flow_spec *spec,
291 struct mlx5_esw_indir_table_entry *e)
292 {
293 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
294 u32 *in, *match;
295
296 in = kvzalloc(inlen, GFP_KERNEL);
297 if (!in)
298 return -ENOMEM;
299
300 MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
301 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
302 match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
303
304 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
305 MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
306 else
307 MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
308
309 if (attr->ip_version == 4) {
310 MLX5_SET_TO_ONES(fte_match_param, match,
311 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
312 } else if (attr->ip_version == 6) {
313 memset(MLX5_ADDR_OF(fte_match_param, match,
314 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
315 0xff, sizeof(struct in6_addr));
316 } else {
317 err = -EOPNOTSUPP;
318 goto out;
319 }
320
321 MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
322 MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
323 mlx5_eswitch_get_vport_metadata_mask());
324 MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
325 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
326 e->recirc_grp = mlx5_create_flow_group(e->ft, in);
327 if (IS_ERR(e->recirc_grp)) {
328 err = PTR_ERR(e->recirc_grp);
329 goto out;
330 }
331
332 INIT_LIST_HEAD(&e->recirc_rules);
333 e->recirc_cnt = 0;
334
335 out:
336 kvfree(in);
337 return err;
338 }
339
mlx5_create_indir_fwd_group(struct mlx5_eswitch * esw,struct mlx5_esw_indir_table_entry * e)340 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
341 struct mlx5_esw_indir_table_entry *e)
342 {
343 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
344 struct mlx5_flow_destination dest = {};
345 struct mlx5_flow_act flow_act = {};
346 struct mlx5_flow_spec *spec;
347 u32 *in;
348
349 in = kvzalloc(inlen, GFP_KERNEL);
350 if (!in)
351 return -ENOMEM;
352
353 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
354 if (!spec) {
355 kvfree(in);
356 return -ENOMEM;
357 }
358
359 /* Hold one entry */
360 MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
361 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
362 e->fwd_grp = mlx5_create_flow_group(e->ft, in);
363 if (IS_ERR(e->fwd_grp)) {
364 err = PTR_ERR(e->fwd_grp);
365 goto err_out;
366 }
367
368 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
369 dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
370 dest.vport.num = e->vport;
371 dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
372 dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
373 e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
374 if (IS_ERR(e->fwd_rule)) {
375 mlx5_destroy_flow_group(e->fwd_grp);
376 err = PTR_ERR(e->fwd_rule);
377 }
378
379 err_out:
380 kvfree(spec);
381 kvfree(in);
382 return err;
383 }
384
385 static struct mlx5_esw_indir_table_entry *
mlx5_esw_indir_table_entry_create(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,u16 vport,bool decap)386 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
387 struct mlx5_flow_spec *spec, u16 vport, bool decap)
388 {
389 struct mlx5_flow_table_attr ft_attr = {};
390 struct mlx5_flow_namespace *root_ns;
391 struct mlx5_esw_indir_table_entry *e;
392 struct mlx5_flow_table *ft;
393 int err = 0;
394
395 root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
396 if (!root_ns)
397 return ERR_PTR(-ENOENT);
398
399 e = kzalloc(sizeof(*e), GFP_KERNEL);
400 if (!e)
401 return ERR_PTR(-ENOMEM);
402
403 ft_attr.prio = FDB_TC_OFFLOAD;
404 ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
405 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
406 ft_attr.level = 1;
407
408 ft = mlx5_create_flow_table(root_ns, &ft_attr);
409 if (IS_ERR(ft)) {
410 err = PTR_ERR(ft);
411 goto tbl_err;
412 }
413 e->ft = ft;
414 e->vport = vport;
415 e->ip_version = attr->ip_version;
416 e->fwd_ref = !decap;
417
418 err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
419 if (err)
420 goto recirc_grp_err;
421
422 if (decap) {
423 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
424 if (err)
425 goto recirc_rule_err;
426 }
427
428 err = mlx5_create_indir_fwd_group(esw, e);
429 if (err)
430 goto fwd_grp_err;
431
432 hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
433 vport << 16 | attr->ip_version);
434
435 return e;
436
437 fwd_grp_err:
438 if (decap)
439 mlx5_esw_indir_table_rule_put(esw, attr, e);
440 recirc_rule_err:
441 mlx5_destroy_flow_group(e->recirc_grp);
442 recirc_grp_err:
443 mlx5_destroy_flow_table(e->ft);
444 tbl_err:
445 kfree(e);
446 return ERR_PTR(err);
447 }
448
449 static struct mlx5_esw_indir_table_entry *
mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch * esw,u16 vport,u8 ip_version)450 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
451 {
452 struct mlx5_esw_indir_table_entry *e;
453 u32 key = vport << 16 | ip_version;
454
455 hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
456 if (e->vport == vport && e->ip_version == ip_version)
457 return e;
458
459 return NULL;
460 }
461
mlx5_esw_indir_table_get(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,u16 vport,bool decap)462 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
463 struct mlx5_flow_attr *attr,
464 struct mlx5_flow_spec *spec,
465 u16 vport, bool decap)
466 {
467 struct mlx5_esw_indir_table_entry *e;
468 int err;
469
470 mutex_lock(&esw->fdb_table.offloads.indir->lock);
471 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
472 if (e) {
473 if (!decap) {
474 e->fwd_ref++;
475 } else {
476 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
477 if (err)
478 goto out_err;
479 }
480 } else {
481 e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
482 if (IS_ERR(e)) {
483 err = PTR_ERR(e);
484 esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
485 goto out_err;
486 }
487 }
488 mutex_unlock(&esw->fdb_table.offloads.indir->lock);
489 return e->ft;
490
491 out_err:
492 mutex_unlock(&esw->fdb_table.offloads.indir->lock);
493 return ERR_PTR(err);
494 }
495
mlx5_esw_indir_table_put(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,u16 vport,bool decap)496 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
497 struct mlx5_flow_attr *attr,
498 u16 vport, bool decap)
499 {
500 struct mlx5_esw_indir_table_entry *e;
501
502 mutex_lock(&esw->fdb_table.offloads.indir->lock);
503 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
504 if (!e)
505 goto out;
506
507 if (!decap)
508 e->fwd_ref--;
509 else
510 mlx5_esw_indir_table_rule_put(esw, attr, e);
511
512 if (e->fwd_ref || e->recirc_cnt)
513 goto out;
514
515 hash_del(&e->hlist);
516 mlx5_destroy_flow_group(e->recirc_grp);
517 mlx5_del_flow_rules(e->fwd_rule);
518 mlx5_destroy_flow_group(e->fwd_grp);
519 mlx5_destroy_flow_table(e->ft);
520 kfree(e);
521 out:
522 mutex_unlock(&esw->fdb_table.offloads.indir->lock);
523 }
524