1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2017 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/if.h>
9 #include <linux/skbuff.h>
10 #include <linux/ip.h>
11 #include <linux/kernel.h>
12 #include <linux/openvswitch.h>
13 #include <linux/netlink.h>
14 #include <linux/rculist.h>
15 #include <linux/swap.h>
16 
17 #include <net/netlink.h>
18 #include <net/genetlink.h>
19 
20 #include "datapath.h"
21 #include "meter.h"
22 
23 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
24 	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
25 	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
26 	[OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
27 	[OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
28 	[OVS_METER_ATTR_USED] = { .type = NLA_U64 },
29 	[OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
30 	[OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
31 	[OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
32 };
33 
34 static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
35 	[OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
36 	[OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
37 	[OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
38 	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
39 };
40 
meter_hash(struct dp_meter_instance * ti,u32 id)41 static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
42 {
43 	return id % ti->n_meters;
44 }
45 
ovs_meter_free(struct dp_meter * meter)46 static void ovs_meter_free(struct dp_meter *meter)
47 {
48 	if (!meter)
49 		return;
50 
51 	kfree_rcu(meter, rcu);
52 }
53 
54 /* Call with ovs_mutex or RCU read lock. */
lookup_meter(const struct dp_meter_table * tbl,u32 meter_id)55 static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
56 				     u32 meter_id)
57 {
58 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
59 	u32 hash = meter_hash(ti, meter_id);
60 	struct dp_meter *meter;
61 
62 	meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
63 	if (meter && likely(meter->id == meter_id))
64 		return meter;
65 
66 	return NULL;
67 }
68 
dp_meter_instance_alloc(const u32 size)69 static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
70 {
71 	struct dp_meter_instance *ti;
72 
73 	ti = kvzalloc(sizeof(*ti) +
74 		      sizeof(struct dp_meter *) * size,
75 		      GFP_KERNEL);
76 	if (!ti)
77 		return NULL;
78 
79 	ti->n_meters = size;
80 
81 	return ti;
82 }
83 
dp_meter_instance_free(struct dp_meter_instance * ti)84 static void dp_meter_instance_free(struct dp_meter_instance *ti)
85 {
86 	kvfree(ti);
87 }
88 
dp_meter_instance_free_rcu(struct rcu_head * rcu)89 static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
90 {
91 	struct dp_meter_instance *ti;
92 
93 	ti = container_of(rcu, struct dp_meter_instance, rcu);
94 	kvfree(ti);
95 }
96 
97 static int
dp_meter_instance_realloc(struct dp_meter_table * tbl,u32 size)98 dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
99 {
100 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
101 	int n_meters = min(size, ti->n_meters);
102 	struct dp_meter_instance *new_ti;
103 	int i;
104 
105 	new_ti = dp_meter_instance_alloc(size);
106 	if (!new_ti)
107 		return -ENOMEM;
108 
109 	for (i = 0; i < n_meters; i++)
110 		if (rcu_dereference_ovsl(ti->dp_meters[i]))
111 			new_ti->dp_meters[i] = ti->dp_meters[i];
112 
113 	rcu_assign_pointer(tbl->ti, new_ti);
114 	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
115 
116 	return 0;
117 }
118 
dp_meter_instance_insert(struct dp_meter_instance * ti,struct dp_meter * meter)119 static void dp_meter_instance_insert(struct dp_meter_instance *ti,
120 				     struct dp_meter *meter)
121 {
122 	u32 hash;
123 
124 	hash = meter_hash(ti, meter->id);
125 	rcu_assign_pointer(ti->dp_meters[hash], meter);
126 }
127 
dp_meter_instance_remove(struct dp_meter_instance * ti,struct dp_meter * meter)128 static void dp_meter_instance_remove(struct dp_meter_instance *ti,
129 				     struct dp_meter *meter)
130 {
131 	u32 hash;
132 
133 	hash = meter_hash(ti, meter->id);
134 	RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
135 }
136 
attach_meter(struct dp_meter_table * tbl,struct dp_meter * meter)137 static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
138 {
139 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
140 	u32 hash = meter_hash(ti, meter->id);
141 	int err;
142 
143 	/* In generally, slots selected should be empty, because
144 	 * OvS uses id-pool to fetch a available id.
145 	 */
146 	if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
147 		return -EBUSY;
148 
149 	dp_meter_instance_insert(ti, meter);
150 
151 	/* That function is thread-safe. */
152 	tbl->count++;
153 	if (tbl->count >= tbl->max_meters_allowed) {
154 		err = -EFBIG;
155 		goto attach_err;
156 	}
157 
158 	if (tbl->count >= ti->n_meters &&
159 	    dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
160 		err = -ENOMEM;
161 		goto attach_err;
162 	}
163 
164 	return 0;
165 
166 attach_err:
167 	dp_meter_instance_remove(ti, meter);
168 	tbl->count--;
169 	return err;
170 }
171 
detach_meter(struct dp_meter_table * tbl,struct dp_meter * meter)172 static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
173 {
174 	struct dp_meter_instance *ti;
175 
176 	ASSERT_OVSL();
177 	if (!meter)
178 		return 0;
179 
180 	ti = rcu_dereference_ovsl(tbl->ti);
181 	dp_meter_instance_remove(ti, meter);
182 
183 	tbl->count--;
184 
185 	/* Shrink the meter array if necessary. */
186 	if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
187 	    tbl->count <= (ti->n_meters / 4)) {
188 		int half_size = ti->n_meters / 2;
189 		int i;
190 
191 		/* Avoid hash collision, don't move slots to other place.
192 		 * Make sure there are no references of meters in array
193 		 * which will be released.
194 		 */
195 		for (i = half_size; i < ti->n_meters; i++)
196 			if (rcu_dereference_ovsl(ti->dp_meters[i]))
197 				goto out;
198 
199 		if (dp_meter_instance_realloc(tbl, half_size))
200 			goto shrink_err;
201 	}
202 
203 out:
204 	return 0;
205 
206 shrink_err:
207 	dp_meter_instance_insert(ti, meter);
208 	tbl->count++;
209 	return -ENOMEM;
210 }
211 
212 static struct sk_buff *
ovs_meter_cmd_reply_start(struct genl_info * info,u8 cmd,struct ovs_header ** ovs_reply_header)213 ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
214 			  struct ovs_header **ovs_reply_header)
215 {
216 	struct sk_buff *skb;
217 	struct ovs_header *ovs_header = info->userhdr;
218 
219 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
220 	if (!skb)
221 		return ERR_PTR(-ENOMEM);
222 
223 	*ovs_reply_header = genlmsg_put(skb, info->snd_portid,
224 					info->snd_seq,
225 					&dp_meter_genl_family, 0, cmd);
226 	if (!*ovs_reply_header) {
227 		nlmsg_free(skb);
228 		return ERR_PTR(-EMSGSIZE);
229 	}
230 	(*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
231 
232 	return skb;
233 }
234 
ovs_meter_cmd_reply_stats(struct sk_buff * reply,u32 meter_id,struct dp_meter * meter)235 static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
236 				     struct dp_meter *meter)
237 {
238 	struct nlattr *nla;
239 	struct dp_meter_band *band;
240 	u16 i;
241 
242 	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
243 		goto error;
244 
245 	if (nla_put(reply, OVS_METER_ATTR_STATS,
246 		    sizeof(struct ovs_flow_stats), &meter->stats))
247 		goto error;
248 
249 	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
250 			      OVS_METER_ATTR_PAD))
251 		goto error;
252 
253 	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
254 	if (!nla)
255 		goto error;
256 
257 	band = meter->bands;
258 
259 	for (i = 0; i < meter->n_bands; ++i, ++band) {
260 		struct nlattr *band_nla;
261 
262 		band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
263 		if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
264 					 sizeof(struct ovs_flow_stats),
265 					 &band->stats))
266 			goto error;
267 		nla_nest_end(reply, band_nla);
268 	}
269 	nla_nest_end(reply, nla);
270 
271 	return 0;
272 error:
273 	return -EMSGSIZE;
274 }
275 
ovs_meter_cmd_features(struct sk_buff * skb,struct genl_info * info)276 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
277 {
278 	struct ovs_header *ovs_header = info->userhdr;
279 	struct ovs_header *ovs_reply_header;
280 	struct nlattr *nla, *band_nla;
281 	struct sk_buff *reply;
282 	struct datapath *dp;
283 	int err = -EMSGSIZE;
284 
285 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
286 					  &ovs_reply_header);
287 	if (IS_ERR(reply))
288 		return PTR_ERR(reply);
289 
290 	ovs_lock();
291 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
292 	if (!dp) {
293 		err = -ENODEV;
294 		goto exit_unlock;
295 	}
296 
297 	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
298 			dp->meter_tbl.max_meters_allowed))
299 		goto exit_unlock;
300 
301 	ovs_unlock();
302 
303 	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
304 		goto nla_put_failure;
305 
306 	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
307 	if (!nla)
308 		goto nla_put_failure;
309 
310 	band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
311 	if (!band_nla)
312 		goto nla_put_failure;
313 	/* Currently only DROP band type is supported. */
314 	if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
315 		goto nla_put_failure;
316 	nla_nest_end(reply, band_nla);
317 	nla_nest_end(reply, nla);
318 
319 	genlmsg_end(reply, ovs_reply_header);
320 	return genlmsg_reply(reply, info);
321 
322 exit_unlock:
323 	ovs_unlock();
324 nla_put_failure:
325 	nlmsg_free(reply);
326 	return err;
327 }
328 
dp_meter_create(struct nlattr ** a)329 static struct dp_meter *dp_meter_create(struct nlattr **a)
330 {
331 	struct nlattr *nla;
332 	int rem;
333 	u16 n_bands = 0;
334 	struct dp_meter *meter;
335 	struct dp_meter_band *band;
336 	int err;
337 
338 	/* Validate attributes, count the bands. */
339 	if (!a[OVS_METER_ATTR_BANDS])
340 		return ERR_PTR(-EINVAL);
341 
342 	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
343 		if (++n_bands > DP_MAX_BANDS)
344 			return ERR_PTR(-EINVAL);
345 
346 	/* Allocate and set up the meter before locking anything. */
347 	meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
348 	if (!meter)
349 		return ERR_PTR(-ENOMEM);
350 
351 	meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
352 	meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
353 	meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
354 	meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
355 	spin_lock_init(&meter->lock);
356 	if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
357 		meter->stats = *(struct ovs_flow_stats *)
358 			nla_data(a[OVS_METER_ATTR_STATS]);
359 	}
360 	meter->n_bands = n_bands;
361 
362 	/* Set up meter bands. */
363 	band = meter->bands;
364 	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
365 		struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
366 		u32 band_max_delta_t;
367 
368 		err = nla_parse_deprecated((struct nlattr **)&attr,
369 					   OVS_BAND_ATTR_MAX, nla_data(nla),
370 					   nla_len(nla), band_policy, NULL);
371 		if (err)
372 			goto exit_free_meter;
373 
374 		if (!attr[OVS_BAND_ATTR_TYPE] ||
375 		    !attr[OVS_BAND_ATTR_RATE] ||
376 		    !attr[OVS_BAND_ATTR_BURST]) {
377 			err = -EINVAL;
378 			goto exit_free_meter;
379 		}
380 
381 		band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
382 		band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
383 		if (band->rate == 0) {
384 			err = -EINVAL;
385 			goto exit_free_meter;
386 		}
387 
388 		band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
389 		/* Figure out max delta_t that is enough to fill any bucket.
390 		 * Keep max_delta_t size to the bucket units:
391 		 * pkts => 1/1000 packets, kilobits => bits.
392 		 *
393 		 * Start with a full bucket.
394 		 */
395 		band->bucket = (band->burst_size + band->rate) * 1000ULL;
396 		band_max_delta_t = div_u64(band->bucket, band->rate);
397 		if (band_max_delta_t > meter->max_delta_t)
398 			meter->max_delta_t = band_max_delta_t;
399 		band++;
400 	}
401 
402 	return meter;
403 
404 exit_free_meter:
405 	kfree(meter);
406 	return ERR_PTR(err);
407 }
408 
ovs_meter_cmd_set(struct sk_buff * skb,struct genl_info * info)409 static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
410 {
411 	struct nlattr **a = info->attrs;
412 	struct dp_meter *meter, *old_meter;
413 	struct sk_buff *reply;
414 	struct ovs_header *ovs_reply_header;
415 	struct ovs_header *ovs_header = info->userhdr;
416 	struct dp_meter_table *meter_tbl;
417 	struct datapath *dp;
418 	int err;
419 	u32 meter_id;
420 	bool failed;
421 
422 	if (!a[OVS_METER_ATTR_ID])
423 		return -EINVAL;
424 
425 	meter = dp_meter_create(a);
426 	if (IS_ERR_OR_NULL(meter))
427 		return PTR_ERR(meter);
428 
429 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
430 					  &ovs_reply_header);
431 	if (IS_ERR(reply)) {
432 		err = PTR_ERR(reply);
433 		goto exit_free_meter;
434 	}
435 
436 	ovs_lock();
437 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
438 	if (!dp) {
439 		err = -ENODEV;
440 		goto exit_unlock;
441 	}
442 
443 	meter_tbl = &dp->meter_tbl;
444 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
445 
446 	old_meter = lookup_meter(meter_tbl, meter_id);
447 	err = detach_meter(meter_tbl, old_meter);
448 	if (err)
449 		goto exit_unlock;
450 
451 	err = attach_meter(meter_tbl, meter);
452 	if (err)
453 		goto exit_unlock;
454 
455 	ovs_unlock();
456 
457 	/* Build response with the meter_id and stats from
458 	 * the old meter, if any.
459 	 */
460 	failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
461 	WARN_ON(failed);
462 	if (old_meter) {
463 		spin_lock_bh(&old_meter->lock);
464 		if (old_meter->keep_stats) {
465 			err = ovs_meter_cmd_reply_stats(reply, meter_id,
466 							old_meter);
467 			WARN_ON(err);
468 		}
469 		spin_unlock_bh(&old_meter->lock);
470 		ovs_meter_free(old_meter);
471 	}
472 
473 	genlmsg_end(reply, ovs_reply_header);
474 	return genlmsg_reply(reply, info);
475 
476 exit_unlock:
477 	ovs_unlock();
478 	nlmsg_free(reply);
479 exit_free_meter:
480 	kfree(meter);
481 	return err;
482 }
483 
ovs_meter_cmd_get(struct sk_buff * skb,struct genl_info * info)484 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
485 {
486 	struct ovs_header *ovs_header = info->userhdr;
487 	struct ovs_header *ovs_reply_header;
488 	struct nlattr **a = info->attrs;
489 	struct dp_meter *meter;
490 	struct sk_buff *reply;
491 	struct datapath *dp;
492 	u32 meter_id;
493 	int err;
494 
495 	if (!a[OVS_METER_ATTR_ID])
496 		return -EINVAL;
497 
498 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
499 
500 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
501 					  &ovs_reply_header);
502 	if (IS_ERR(reply))
503 		return PTR_ERR(reply);
504 
505 	ovs_lock();
506 
507 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
508 	if (!dp) {
509 		err = -ENODEV;
510 		goto exit_unlock;
511 	}
512 
513 	/* Locate meter, copy stats. */
514 	meter = lookup_meter(&dp->meter_tbl, meter_id);
515 	if (!meter) {
516 		err = -ENOENT;
517 		goto exit_unlock;
518 	}
519 
520 	spin_lock_bh(&meter->lock);
521 	err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
522 	spin_unlock_bh(&meter->lock);
523 	if (err)
524 		goto exit_unlock;
525 
526 	ovs_unlock();
527 
528 	genlmsg_end(reply, ovs_reply_header);
529 	return genlmsg_reply(reply, info);
530 
531 exit_unlock:
532 	ovs_unlock();
533 	nlmsg_free(reply);
534 	return err;
535 }
536 
ovs_meter_cmd_del(struct sk_buff * skb,struct genl_info * info)537 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
538 {
539 	struct ovs_header *ovs_header = info->userhdr;
540 	struct ovs_header *ovs_reply_header;
541 	struct nlattr **a = info->attrs;
542 	struct dp_meter *old_meter;
543 	struct sk_buff *reply;
544 	struct datapath *dp;
545 	u32 meter_id;
546 	int err;
547 
548 	if (!a[OVS_METER_ATTR_ID])
549 		return -EINVAL;
550 
551 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
552 					  &ovs_reply_header);
553 	if (IS_ERR(reply))
554 		return PTR_ERR(reply);
555 
556 	ovs_lock();
557 
558 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
559 	if (!dp) {
560 		err = -ENODEV;
561 		goto exit_unlock;
562 	}
563 
564 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
565 	old_meter = lookup_meter(&dp->meter_tbl, meter_id);
566 	if (old_meter) {
567 		spin_lock_bh(&old_meter->lock);
568 		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
569 		WARN_ON(err);
570 		spin_unlock_bh(&old_meter->lock);
571 
572 		err = detach_meter(&dp->meter_tbl, old_meter);
573 		if (err)
574 			goto exit_unlock;
575 	}
576 
577 	ovs_unlock();
578 	ovs_meter_free(old_meter);
579 	genlmsg_end(reply, ovs_reply_header);
580 	return genlmsg_reply(reply, info);
581 
582 exit_unlock:
583 	ovs_unlock();
584 	nlmsg_free(reply);
585 	return err;
586 }
587 
588 /* Meter action execution.
589  *
590  * Return true 'meter_id' drop band is triggered. The 'skb' should be
591  * dropped by the caller'.
592  */
ovs_meter_execute(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,u32 meter_id)593 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
594 		       struct sw_flow_key *key, u32 meter_id)
595 {
596 	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
597 	long long int long_delta_ms;
598 	struct dp_meter_band *band;
599 	struct dp_meter *meter;
600 	int i, band_exceeded_max = -1;
601 	u32 band_exceeded_rate = 0;
602 	u32 delta_ms;
603 	u32 cost;
604 
605 	meter = lookup_meter(&dp->meter_tbl, meter_id);
606 	/* Do not drop the packet when there is no meter. */
607 	if (!meter)
608 		return false;
609 
610 	/* Lock the meter while using it. */
611 	spin_lock(&meter->lock);
612 
613 	long_delta_ms = (now_ms - meter->used); /* ms */
614 
615 	/* Make sure delta_ms will not be too large, so that bucket will not
616 	 * wrap around below.
617 	 */
618 	delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
619 		   ? meter->max_delta_t : (u32)long_delta_ms;
620 
621 	/* Update meter statistics.
622 	 */
623 	meter->used = now_ms;
624 	meter->stats.n_packets += 1;
625 	meter->stats.n_bytes += skb->len;
626 
627 	/* Bucket rate is either in kilobits per second, or in packets per
628 	 * second.  We maintain the bucket in the units of either bits or
629 	 * 1/1000th of a packet, correspondingly.
630 	 * Then, when rate is multiplied with milliseconds, we get the
631 	 * bucket units:
632 	 * msec * kbps = bits, and
633 	 * msec * packets/sec = 1/1000 packets.
634 	 *
635 	 * 'cost' is the number of bucket units in this packet.
636 	 */
637 	cost = (meter->kbps) ? skb->len * 8 : 1000;
638 
639 	/* Update all bands and find the one hit with the highest rate. */
640 	for (i = 0; i < meter->n_bands; ++i) {
641 		long long int max_bucket_size;
642 
643 		band = &meter->bands[i];
644 		max_bucket_size = (band->burst_size + band->rate) * 1000LL;
645 
646 		band->bucket += delta_ms * band->rate;
647 		if (band->bucket > max_bucket_size)
648 			band->bucket = max_bucket_size;
649 
650 		if (band->bucket >= cost) {
651 			band->bucket -= cost;
652 		} else if (band->rate > band_exceeded_rate) {
653 			band_exceeded_rate = band->rate;
654 			band_exceeded_max = i;
655 		}
656 	}
657 
658 	if (band_exceeded_max >= 0) {
659 		/* Update band statistics. */
660 		band = &meter->bands[band_exceeded_max];
661 		band->stats.n_packets += 1;
662 		band->stats.n_bytes += skb->len;
663 
664 		/* Drop band triggered, let the caller drop the 'skb'.  */
665 		if (band->type == OVS_METER_BAND_TYPE_DROP) {
666 			spin_unlock(&meter->lock);
667 			return true;
668 		}
669 	}
670 
671 	spin_unlock(&meter->lock);
672 	return false;
673 }
674 
675 static const struct genl_small_ops dp_meter_genl_ops[] = {
676 	{ .cmd = OVS_METER_CMD_FEATURES,
677 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
678 		.flags = 0,		  /* OK for unprivileged users. */
679 		.doit = ovs_meter_cmd_features
680 	},
681 	{ .cmd = OVS_METER_CMD_SET,
682 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
683 		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
684 					   *  privilege.
685 					   */
686 		.doit = ovs_meter_cmd_set,
687 	},
688 	{ .cmd = OVS_METER_CMD_GET,
689 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
690 		.flags = 0,		  /* OK for unprivileged users. */
691 		.doit = ovs_meter_cmd_get,
692 	},
693 	{ .cmd = OVS_METER_CMD_DEL,
694 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
695 		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
696 					   *  privilege.
697 					   */
698 		.doit = ovs_meter_cmd_del
699 	},
700 };
701 
702 static const struct genl_multicast_group ovs_meter_multicast_group = {
703 	.name = OVS_METER_MCGROUP,
704 };
705 
706 struct genl_family dp_meter_genl_family __ro_after_init = {
707 	.hdrsize = sizeof(struct ovs_header),
708 	.name = OVS_METER_FAMILY,
709 	.version = OVS_METER_VERSION,
710 	.maxattr = OVS_METER_ATTR_MAX,
711 	.policy = meter_policy,
712 	.netnsok = true,
713 	.parallel_ops = true,
714 	.small_ops = dp_meter_genl_ops,
715 	.n_small_ops = ARRAY_SIZE(dp_meter_genl_ops),
716 	.mcgrps = &ovs_meter_multicast_group,
717 	.n_mcgrps = 1,
718 	.module = THIS_MODULE,
719 };
720 
ovs_meters_init(struct datapath * dp)721 int ovs_meters_init(struct datapath *dp)
722 {
723 	struct dp_meter_table *tbl = &dp->meter_tbl;
724 	struct dp_meter_instance *ti;
725 	unsigned long free_mem_bytes;
726 
727 	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
728 	if (!ti)
729 		return -ENOMEM;
730 
731 	/* Allow meters in a datapath to use ~3.12% of physical memory. */
732 	free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
733 	tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
734 				      DP_METER_NUM_MAX);
735 	if (!tbl->max_meters_allowed)
736 		goto out_err;
737 
738 	rcu_assign_pointer(tbl->ti, ti);
739 	tbl->count = 0;
740 
741 	return 0;
742 
743 out_err:
744 	dp_meter_instance_free(ti);
745 	return -ENOMEM;
746 }
747 
ovs_meters_exit(struct datapath * dp)748 void ovs_meters_exit(struct datapath *dp)
749 {
750 	struct dp_meter_table *tbl = &dp->meter_tbl;
751 	struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
752 	int i;
753 
754 	for (i = 0; i < ti->n_meters; i++)
755 		ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i]));
756 
757 	dp_meter_instance_free(ti);
758 }
759