1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "esw/qos.h"
6 #include "en/port.h"
7 #define CREATE_TRACE_POINTS
8 #include "diag/qos_tracepoint.h"
9
10 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
11 #define MLX5_MIN_BW_SHARE 1
12
13 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
14 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
15
16 struct mlx5_esw_rate_group {
17 u32 tsar_ix;
18 u32 max_rate;
19 u32 min_rate;
20 u32 bw_share;
21 struct list_head list;
22 };
23
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 parent_ix,u32 tsar_ix,u32 max_rate,u32 bw_share)24 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
25 u32 parent_ix, u32 tsar_ix,
26 u32 max_rate, u32 bw_share)
27 {
28 u32 bitmask = 0;
29
30 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
31 return -EOPNOTSUPP;
32
33 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
34 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
35 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
36 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
37 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
38
39 return mlx5_modify_scheduling_element_cmd(dev,
40 SCHEDULING_HIERARCHY_E_SWITCH,
41 sched_ctx,
42 tsar_ix,
43 bitmask);
44 }
45
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)46 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
47 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
48 {
49 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
50 struct mlx5_core_dev *dev = esw->dev;
51 int err;
52
53 err = esw_qos_tsar_config(dev, sched_ctx,
54 esw->qos.root_tsar_ix, group->tsar_ix,
55 max_rate, bw_share);
56 if (err)
57 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
58
59 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
60
61 return err;
62 }
63
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)64 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
65 struct mlx5_vport *vport,
66 u32 max_rate, u32 bw_share,
67 struct netlink_ext_ack *extack)
68 {
69 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
70 struct mlx5_esw_rate_group *group = vport->qos.group;
71 struct mlx5_core_dev *dev = esw->dev;
72 u32 parent_tsar_ix;
73 void *vport_elem;
74 int err;
75
76 if (!vport->qos.enabled)
77 return -EIO;
78
79 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
80 MLX5_SET(scheduling_context, sched_ctx, element_type,
81 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
82 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
83 element_attributes);
84 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
85
86 err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
87 max_rate, bw_share);
88 if (err) {
89 esw_warn(esw->dev,
90 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
91 vport->vport, err);
92 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
93 return err;
94 }
95
96 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
97
98 return 0;
99 }
100
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)101 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
102 struct mlx5_esw_rate_group *group,
103 bool group_level)
104 {
105 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
106 struct mlx5_vport *evport;
107 u32 max_guarantee = 0;
108 unsigned long i;
109
110 if (group_level) {
111 struct mlx5_esw_rate_group *group;
112
113 list_for_each_entry(group, &esw->qos.groups, list) {
114 if (group->min_rate < max_guarantee)
115 continue;
116 max_guarantee = group->min_rate;
117 }
118 } else {
119 mlx5_esw_for_each_vport(esw, i, evport) {
120 if (!evport->enabled || !evport->qos.enabled ||
121 evport->qos.group != group || evport->qos.min_rate < max_guarantee)
122 continue;
123 max_guarantee = evport->qos.min_rate;
124 }
125 }
126
127 if (max_guarantee)
128 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
129
130 /* If vports min rate divider is 0 but their group has bw_share configured, then
131 * need to set bw_share for vports to minimal value.
132 */
133 if (!group_level && !max_guarantee && group && group->bw_share)
134 return 1;
135 return 0;
136 }
137
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)138 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
139 {
140 if (divider)
141 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
142
143 return 0;
144 }
145
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)146 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
147 struct mlx5_esw_rate_group *group,
148 struct netlink_ext_ack *extack)
149 {
150 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
151 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
152 struct mlx5_vport *evport;
153 unsigned long i;
154 u32 bw_share;
155 int err;
156
157 mlx5_esw_for_each_vport(esw, i, evport) {
158 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
159 continue;
160 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
161
162 if (bw_share == evport->qos.bw_share)
163 continue;
164
165 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
166 if (err)
167 return err;
168
169 evport->qos.bw_share = bw_share;
170 }
171
172 return 0;
173 }
174
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)175 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
176 struct netlink_ext_ack *extack)
177 {
178 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
179 struct mlx5_esw_rate_group *group;
180 u32 bw_share;
181 int err;
182
183 list_for_each_entry(group, &esw->qos.groups, list) {
184 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
185
186 if (bw_share == group->bw_share)
187 continue;
188
189 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
190 if (err)
191 return err;
192
193 group->bw_share = bw_share;
194
195 /* All the group's vports need to be set with default bw_share
196 * to enable them with QOS
197 */
198 err = esw_qos_normalize_vports_min_rate(esw, group, extack);
199
200 if (err)
201 return err;
202 }
203
204 return 0;
205 }
206
esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)207 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
208 u32 min_rate, struct netlink_ext_ack *extack)
209 {
210 u32 fw_max_bw_share, previous_min_rate;
211 bool min_rate_supported;
212 int err;
213
214 lockdep_assert_held(&esw->state_lock);
215 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
216 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
217 fw_max_bw_share >= MLX5_MIN_BW_SHARE;
218 if (min_rate && !min_rate_supported)
219 return -EOPNOTSUPP;
220 if (min_rate == evport->qos.min_rate)
221 return 0;
222
223 previous_min_rate = evport->qos.min_rate;
224 evport->qos.min_rate = min_rate;
225 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
226 if (err)
227 evport->qos.min_rate = previous_min_rate;
228
229 return err;
230 }
231
esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)232 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
233 u32 max_rate, struct netlink_ext_ack *extack)
234 {
235 u32 act_max_rate = max_rate;
236 bool max_rate_supported;
237 int err;
238
239 lockdep_assert_held(&esw->state_lock);
240 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
241
242 if (max_rate && !max_rate_supported)
243 return -EOPNOTSUPP;
244 if (max_rate == evport->qos.max_rate)
245 return 0;
246
247 /* If parent group has rate limit need to set to group
248 * value when new max rate is 0.
249 */
250 if (evport->qos.group && !max_rate)
251 act_max_rate = evport->qos.group->max_rate;
252
253 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
254
255 if (!err)
256 evport->qos.max_rate = max_rate;
257
258 return err;
259 }
260
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)261 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
262 u32 min_rate, struct netlink_ext_ack *extack)
263 {
264 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
265 struct mlx5_core_dev *dev = esw->dev;
266 u32 previous_min_rate, divider;
267 int err;
268
269 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
270 return -EOPNOTSUPP;
271
272 if (min_rate == group->min_rate)
273 return 0;
274
275 previous_min_rate = group->min_rate;
276 group->min_rate = min_rate;
277 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
278 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
279 if (err) {
280 group->min_rate = previous_min_rate;
281 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
282
283 /* Attempt restoring previous configuration */
284 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
285 if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
286 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
287 }
288
289 return err;
290 }
291
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)292 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
293 struct mlx5_esw_rate_group *group,
294 u32 max_rate, struct netlink_ext_ack *extack)
295 {
296 struct mlx5_vport *vport;
297 unsigned long i;
298 int err;
299
300 if (group->max_rate == max_rate)
301 return 0;
302
303 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
304 if (err)
305 return err;
306
307 group->max_rate = max_rate;
308
309 /* Any unlimited vports in the group should be set
310 * with the value of the group.
311 */
312 mlx5_esw_for_each_vport(esw, i, vport) {
313 if (!vport->enabled || !vport->qos.enabled ||
314 vport->qos.group != group || vport->qos.max_rate)
315 continue;
316
317 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
318 if (err)
319 NL_SET_ERR_MSG_MOD(extack,
320 "E-Switch vport implicit rate limit setting failed");
321 }
322
323 return err;
324 }
325
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)326 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
327 struct mlx5_vport *vport,
328 u32 max_rate, u32 bw_share)
329 {
330 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
331 struct mlx5_esw_rate_group *group = vport->qos.group;
332 struct mlx5_core_dev *dev = esw->dev;
333 u32 parent_tsar_ix;
334 void *vport_elem;
335 int err;
336
337 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
338 MLX5_SET(scheduling_context, sched_ctx, element_type,
339 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
340 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
341 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
342 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
343 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
344 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
345
346 err = mlx5_create_scheduling_element_cmd(dev,
347 SCHEDULING_HIERARCHY_E_SWITCH,
348 sched_ctx,
349 &vport->qos.esw_tsar_ix);
350 if (err) {
351 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
352 vport->vport, err);
353 return err;
354 }
355
356 return 0;
357 }
358
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)359 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
360 struct mlx5_vport *vport,
361 struct mlx5_esw_rate_group *curr_group,
362 struct mlx5_esw_rate_group *new_group,
363 struct netlink_ext_ack *extack)
364 {
365 u32 max_rate;
366 int err;
367
368 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
369 SCHEDULING_HIERARCHY_E_SWITCH,
370 vport->qos.esw_tsar_ix);
371 if (err) {
372 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
373 return err;
374 }
375
376 vport->qos.group = new_group;
377 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
378
379 /* If vport is unlimited, we set the group's value.
380 * Therefore, if the group is limited it will apply to
381 * the vport as well and if not, vport will remain unlimited.
382 */
383 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
384 if (err) {
385 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
386 goto err_sched;
387 }
388
389 return 0;
390
391 err_sched:
392 vport->qos.group = curr_group;
393 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
394 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
395 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
396 vport->vport);
397
398 return err;
399 }
400
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)401 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
402 struct mlx5_vport *vport,
403 struct mlx5_esw_rate_group *group,
404 struct netlink_ext_ack *extack)
405 {
406 struct mlx5_esw_rate_group *new_group, *curr_group;
407 int err;
408
409 if (!vport->enabled)
410 return -EINVAL;
411
412 curr_group = vport->qos.group;
413 new_group = group ?: esw->qos.group0;
414 if (curr_group == new_group)
415 return 0;
416
417 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
418 if (err)
419 return err;
420
421 /* Recalculate bw share weights of old and new groups */
422 if (vport->qos.bw_share || new_group->bw_share) {
423 esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
424 esw_qos_normalize_vports_min_rate(esw, new_group, extack);
425 }
426
427 return 0;
428 }
429
430 static struct mlx5_esw_rate_group *
__esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)431 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
432 {
433 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
434 struct mlx5_esw_rate_group *group;
435 u32 divider;
436 int err;
437
438 group = kzalloc(sizeof(*group), GFP_KERNEL);
439 if (!group)
440 return ERR_PTR(-ENOMEM);
441
442 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
443 esw->qos.root_tsar_ix);
444 err = mlx5_create_scheduling_element_cmd(esw->dev,
445 SCHEDULING_HIERARCHY_E_SWITCH,
446 tsar_ctx,
447 &group->tsar_ix);
448 if (err) {
449 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
450 goto err_sched_elem;
451 }
452
453 list_add_tail(&group->list, &esw->qos.groups);
454
455 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
456 if (divider) {
457 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
458 if (err) {
459 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
460 goto err_min_rate;
461 }
462 }
463 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
464
465 return group;
466
467 err_min_rate:
468 list_del(&group->list);
469 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
470 SCHEDULING_HIERARCHY_E_SWITCH,
471 group->tsar_ix))
472 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
473 err_sched_elem:
474 kfree(group);
475 return ERR_PTR(err);
476 }
477
478 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
479 static void esw_qos_put(struct mlx5_eswitch *esw);
480
481 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)482 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
483 {
484 struct mlx5_esw_rate_group *group;
485 int err;
486
487 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
488 return ERR_PTR(-EOPNOTSUPP);
489
490 err = esw_qos_get(esw, extack);
491 if (err)
492 return ERR_PTR(err);
493
494 group = __esw_qos_create_rate_group(esw, extack);
495 if (IS_ERR(group))
496 esw_qos_put(esw);
497
498 return group;
499 }
500
__esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)501 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
502 struct mlx5_esw_rate_group *group,
503 struct netlink_ext_ack *extack)
504 {
505 u32 divider;
506 int err;
507
508 list_del(&group->list);
509
510 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
511 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
512 if (err)
513 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
514
515 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
516 SCHEDULING_HIERARCHY_E_SWITCH,
517 group->tsar_ix);
518 if (err)
519 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
520
521 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
522
523 kfree(group);
524
525 return err;
526 }
527
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)528 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
529 struct mlx5_esw_rate_group *group,
530 struct netlink_ext_ack *extack)
531 {
532 int err;
533
534 err = __esw_qos_destroy_rate_group(esw, group, extack);
535 esw_qos_put(esw);
536
537 return err;
538 }
539
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)540 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
541 {
542 switch (type) {
543 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
544 return MLX5_CAP_QOS(dev, esw_element_type) &
545 ELEMENT_TYPE_CAP_MASK_TASR;
546 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
547 return MLX5_CAP_QOS(dev, esw_element_type) &
548 ELEMENT_TYPE_CAP_MASK_VPORT;
549 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
550 return MLX5_CAP_QOS(dev, esw_element_type) &
551 ELEMENT_TYPE_CAP_MASK_VPORT_TC;
552 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
553 return MLX5_CAP_QOS(dev, esw_element_type) &
554 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
555 }
556 return false;
557 }
558
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)559 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
560 {
561 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
562 struct mlx5_core_dev *dev = esw->dev;
563 __be32 *attr;
564 int err;
565
566 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
567 return -EOPNOTSUPP;
568
569 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
570 return -EOPNOTSUPP;
571
572 MLX5_SET(scheduling_context, tsar_ctx, element_type,
573 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
574
575 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
576 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
577
578 err = mlx5_create_scheduling_element_cmd(dev,
579 SCHEDULING_HIERARCHY_E_SWITCH,
580 tsar_ctx,
581 &esw->qos.root_tsar_ix);
582 if (err) {
583 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
584 return err;
585 }
586
587 INIT_LIST_HEAD(&esw->qos.groups);
588 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
589 esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
590 if (IS_ERR(esw->qos.group0)) {
591 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
592 PTR_ERR(esw->qos.group0));
593 err = PTR_ERR(esw->qos.group0);
594 goto err_group0;
595 }
596 }
597 refcount_set(&esw->qos.refcnt, 1);
598
599 return 0;
600
601 err_group0:
602 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
603 esw->qos.root_tsar_ix))
604 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
605
606 return err;
607 }
608
esw_qos_destroy(struct mlx5_eswitch * esw)609 static void esw_qos_destroy(struct mlx5_eswitch *esw)
610 {
611 int err;
612
613 if (esw->qos.group0)
614 __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
615
616 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
617 SCHEDULING_HIERARCHY_E_SWITCH,
618 esw->qos.root_tsar_ix);
619 if (err)
620 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
621 }
622
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)623 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
624 {
625 int err = 0;
626
627 lockdep_assert_held(&esw->state_lock);
628
629 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
630 /* esw_qos_create() set refcount to 1 only on success.
631 * No need to decrement on failure.
632 */
633 err = esw_qos_create(esw, extack);
634 }
635
636 return err;
637 }
638
esw_qos_put(struct mlx5_eswitch * esw)639 static void esw_qos_put(struct mlx5_eswitch *esw)
640 {
641 lockdep_assert_held(&esw->state_lock);
642 if (refcount_dec_and_test(&esw->qos.refcnt))
643 esw_qos_destroy(esw);
644 }
645
esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)646 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
647 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
648 {
649 int err;
650
651 lockdep_assert_held(&esw->state_lock);
652 if (vport->qos.enabled)
653 return 0;
654
655 err = esw_qos_get(esw, extack);
656 if (err)
657 return err;
658
659 vport->qos.group = esw->qos.group0;
660
661 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
662 if (err)
663 goto err_out;
664
665 vport->qos.enabled = true;
666 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
667
668 return 0;
669
670 err_out:
671 esw_qos_put(esw);
672
673 return err;
674 }
675
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)676 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
677 {
678 int err;
679
680 lockdep_assert_held(&esw->state_lock);
681 if (!vport->qos.enabled)
682 return;
683 WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
684 "Disabling QoS on port before detaching it from group");
685
686 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
687 SCHEDULING_HIERARCHY_E_SWITCH,
688 vport->qos.esw_tsar_ix);
689 if (err)
690 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
691 vport->vport, err);
692
693 memset(&vport->qos, 0, sizeof(vport->qos));
694 trace_mlx5_esw_vport_qos_destroy(vport);
695
696 esw_qos_put(esw);
697 }
698
mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 min_rate)699 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
700 u32 max_rate, u32 min_rate)
701 {
702 int err;
703
704 lockdep_assert_held(&esw->state_lock);
705 err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
706 if (err)
707 return err;
708
709 err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
710 if (!err)
711 err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
712
713 return err;
714 }
715
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)716 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
717 {
718 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
719 struct mlx5_vport *vport;
720 u32 bitmask;
721 int err;
722
723 vport = mlx5_eswitch_get_vport(esw, vport_num);
724 if (IS_ERR(vport))
725 return PTR_ERR(vport);
726
727 mutex_lock(&esw->state_lock);
728 if (!vport->qos.enabled) {
729 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
730 err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
731 } else {
732 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
733
734 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
735 err = mlx5_modify_scheduling_element_cmd(esw->dev,
736 SCHEDULING_HIERARCHY_E_SWITCH,
737 ctx,
738 vport->qos.esw_tsar_ix,
739 bitmask);
740 }
741 mutex_unlock(&esw->state_lock);
742
743 return err;
744 }
745
746 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
747
748 /* Converts bytes per second value passed in a pointer into megabits per
749 * second, rewriting last. If converted rate exceed link speed or is not a
750 * fraction of Mbps - returns error.
751 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)752 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
753 u64 *rate, struct netlink_ext_ack *extack)
754 {
755 u32 link_speed_max, reminder;
756 u64 value;
757 int err;
758
759 err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
760 if (err) {
761 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
762 return err;
763 }
764
765 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
766 if (reminder) {
767 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
768 name, *rate);
769 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
770 return -EINVAL;
771 }
772
773 if (value > link_speed_max) {
774 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
775 name, value, link_speed_max);
776 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
777 return -EINVAL;
778 }
779
780 *rate = value;
781 return 0;
782 }
783
784 /* Eswitch devlink rate API */
785
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)786 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
787 u64 tx_share, struct netlink_ext_ack *extack)
788 {
789 struct mlx5_vport *vport = priv;
790 struct mlx5_eswitch *esw;
791 int err;
792
793 esw = vport->dev->priv.eswitch;
794 if (!mlx5_esw_allowed(esw))
795 return -EPERM;
796
797 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
798 if (err)
799 return err;
800
801 mutex_lock(&esw->state_lock);
802 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
803 if (err)
804 goto unlock;
805
806 err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
807 unlock:
808 mutex_unlock(&esw->state_lock);
809 return err;
810 }
811
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)812 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
813 u64 tx_max, struct netlink_ext_ack *extack)
814 {
815 struct mlx5_vport *vport = priv;
816 struct mlx5_eswitch *esw;
817 int err;
818
819 esw = vport->dev->priv.eswitch;
820 if (!mlx5_esw_allowed(esw))
821 return -EPERM;
822
823 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
824 if (err)
825 return err;
826
827 mutex_lock(&esw->state_lock);
828 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
829 if (err)
830 goto unlock;
831
832 err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
833 unlock:
834 mutex_unlock(&esw->state_lock);
835 return err;
836 }
837
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)838 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
839 u64 tx_share, struct netlink_ext_ack *extack)
840 {
841 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
842 struct mlx5_eswitch *esw = dev->priv.eswitch;
843 struct mlx5_esw_rate_group *group = priv;
844 int err;
845
846 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
847 if (err)
848 return err;
849
850 mutex_lock(&esw->state_lock);
851 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
852 mutex_unlock(&esw->state_lock);
853 return err;
854 }
855
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)856 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
857 u64 tx_max, struct netlink_ext_ack *extack)
858 {
859 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
860 struct mlx5_eswitch *esw = dev->priv.eswitch;
861 struct mlx5_esw_rate_group *group = priv;
862 int err;
863
864 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
865 if (err)
866 return err;
867
868 mutex_lock(&esw->state_lock);
869 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
870 mutex_unlock(&esw->state_lock);
871 return err;
872 }
873
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)874 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
875 struct netlink_ext_ack *extack)
876 {
877 struct mlx5_esw_rate_group *group;
878 struct mlx5_eswitch *esw;
879 int err = 0;
880
881 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
882 if (IS_ERR(esw))
883 return PTR_ERR(esw);
884
885 mutex_lock(&esw->state_lock);
886 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
887 NL_SET_ERR_MSG_MOD(extack,
888 "Rate node creation supported only in switchdev mode");
889 err = -EOPNOTSUPP;
890 goto unlock;
891 }
892
893 group = esw_qos_create_rate_group(esw, extack);
894 if (IS_ERR(group)) {
895 err = PTR_ERR(group);
896 goto unlock;
897 }
898
899 *priv = group;
900 unlock:
901 mutex_unlock(&esw->state_lock);
902 return err;
903 }
904
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)905 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
906 struct netlink_ext_ack *extack)
907 {
908 struct mlx5_esw_rate_group *group = priv;
909 struct mlx5_eswitch *esw;
910 int err;
911
912 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
913 if (IS_ERR(esw))
914 return PTR_ERR(esw);
915
916 mutex_lock(&esw->state_lock);
917 err = esw_qos_destroy_rate_group(esw, group, extack);
918 mutex_unlock(&esw->state_lock);
919 return err;
920 }
921
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)922 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
923 struct mlx5_vport *vport,
924 struct mlx5_esw_rate_group *group,
925 struct netlink_ext_ack *extack)
926 {
927 int err = 0;
928
929 mutex_lock(&esw->state_lock);
930 if (!vport->qos.enabled && !group)
931 goto unlock;
932
933 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
934 if (!err)
935 err = esw_qos_vport_update_group(esw, vport, group, extack);
936 unlock:
937 mutex_unlock(&esw->state_lock);
938 return err;
939 }
940
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)941 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
942 struct devlink_rate *parent,
943 void *priv, void *parent_priv,
944 struct netlink_ext_ack *extack)
945 {
946 struct mlx5_esw_rate_group *group;
947 struct mlx5_vport *vport = priv;
948
949 if (!parent)
950 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
951 vport, NULL, extack);
952
953 group = parent_priv;
954 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
955 }
956