1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19 #include <sys/mman.h>
20
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
23
24 static bool has_localevents;
25 static bool has_recursiveprot;
26
27 /*
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
30 */
test_memcg_subtree_control(const char * root)31 static int test_memcg_subtree_control(const char *root)
32 {
33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
34 int ret = KSFT_FAIL;
35 char buf[PAGE_SIZE];
36
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, "memcg_test_0");
39 child = cg_name(root, "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
41 goto cleanup_free;
42
43 if (cg_create(parent))
44 goto cleanup_free;
45
46 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
47 goto cleanup_parent;
48
49 if (cg_create(child))
50 goto cleanup_parent;
51
52 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
53 goto cleanup_child;
54
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, "memcg_test_1");
57 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
59 goto cleanup_free2;
60
61 if (cg_create(parent2))
62 goto cleanup_free2;
63
64 if (cg_create(child2))
65 goto cleanup_parent2;
66
67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
68 goto cleanup_all;
69
70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
71 goto cleanup_all;
72
73 ret = KSFT_PASS;
74
75 cleanup_all:
76 cg_destroy(child2);
77 cleanup_parent2:
78 cg_destroy(parent2);
79 cleanup_free2:
80 free(parent2);
81 free(child2);
82 cleanup_child:
83 cg_destroy(child);
84 cleanup_parent:
85 cg_destroy(parent);
86 cleanup_free:
87 free(parent);
88 free(child);
89
90 return ret;
91 }
92
alloc_anon_50M_check(const char * cgroup,void * arg)93 static int alloc_anon_50M_check(const char *cgroup, void *arg)
94 {
95 size_t size = MB(50);
96 char *buf, *ptr;
97 long anon, current;
98 int ret = -1;
99
100 buf = malloc(size);
101 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
102 *ptr = 0;
103
104 current = cg_read_long(cgroup, "memory.current");
105 if (current < size)
106 goto cleanup;
107
108 if (!values_close(size, current, 3))
109 goto cleanup;
110
111 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
112 if (anon < 0)
113 goto cleanup;
114
115 if (!values_close(anon, current, 3))
116 goto cleanup;
117
118 ret = 0;
119 cleanup:
120 free(buf);
121 return ret;
122 }
123
alloc_pagecache_50M_check(const char * cgroup,void * arg)124 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
125 {
126 size_t size = MB(50);
127 int ret = -1;
128 long current, file;
129 int fd;
130
131 fd = get_temp_fd();
132 if (fd < 0)
133 return -1;
134
135 if (alloc_pagecache(fd, size))
136 goto cleanup;
137
138 current = cg_read_long(cgroup, "memory.current");
139 if (current < size)
140 goto cleanup;
141
142 file = cg_read_key_long(cgroup, "memory.stat", "file ");
143 if (file < 0)
144 goto cleanup;
145
146 if (!values_close(file, current, 10))
147 goto cleanup;
148
149 ret = 0;
150
151 cleanup:
152 close(fd);
153 return ret;
154 }
155
156 /*
157 * This test create a memory cgroup, allocates
158 * some anonymous memory and some pagecache
159 * and check memory.current and some memory.stat values.
160 */
test_memcg_current(const char * root)161 static int test_memcg_current(const char *root)
162 {
163 int ret = KSFT_FAIL;
164 long current;
165 char *memcg;
166
167 memcg = cg_name(root, "memcg_test");
168 if (!memcg)
169 goto cleanup;
170
171 if (cg_create(memcg))
172 goto cleanup;
173
174 current = cg_read_long(memcg, "memory.current");
175 if (current != 0)
176 goto cleanup;
177
178 if (cg_run(memcg, alloc_anon_50M_check, NULL))
179 goto cleanup;
180
181 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
182 goto cleanup;
183
184 ret = KSFT_PASS;
185
186 cleanup:
187 cg_destroy(memcg);
188 free(memcg);
189
190 return ret;
191 }
192
alloc_pagecache_50M_noexit(const char * cgroup,void * arg)193 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
194 {
195 int fd = (long)arg;
196 int ppid = getppid();
197
198 if (alloc_pagecache(fd, MB(50)))
199 return -1;
200
201 while (getppid() == ppid)
202 sleep(1);
203
204 return 0;
205 }
206
alloc_anon_noexit(const char * cgroup,void * arg)207 static int alloc_anon_noexit(const char *cgroup, void *arg)
208 {
209 int ppid = getppid();
210 size_t size = (unsigned long)arg;
211 char *buf, *ptr;
212
213 buf = malloc(size);
214 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
215 *ptr = 0;
216
217 while (getppid() == ppid)
218 sleep(1);
219
220 free(buf);
221 return 0;
222 }
223
224 /*
225 * Wait until processes are killed asynchronously by the OOM killer
226 * If we exceed a timeout, fail.
227 */
cg_test_proc_killed(const char * cgroup)228 static int cg_test_proc_killed(const char *cgroup)
229 {
230 int limit;
231
232 for (limit = 10; limit > 0; limit--) {
233 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
234 return 0;
235
236 usleep(100000);
237 }
238 return -1;
239 }
240
241 /*
242 * First, this test creates the following hierarchy:
243 * A memory.min = 0, memory.max = 200M
244 * A/B memory.min = 50M
245 * A/B/C memory.min = 75M, memory.current = 50M
246 * A/B/D memory.min = 25M, memory.current = 50M
247 * A/B/E memory.min = 0, memory.current = 50M
248 * A/B/F memory.min = 500M, memory.current = 0
249 *
250 * (or memory.low if we test soft protection)
251 *
252 * Usages are pagecache and the test keeps a running
253 * process in every leaf cgroup.
254 * Then it creates A/G and creates a significant
255 * memory pressure in A.
256 *
257 * Then it checks actual memory usages and expects that:
258 * A/B memory.current ~= 50M
259 * A/B/C memory.current ~= 29M
260 * A/B/D memory.current ~= 21M
261 * A/B/E memory.current ~= 0
262 * A/B/F memory.current = 0
263 * (for origin of the numbers, see model in memcg_protection.m.)
264 *
265 * After that it tries to allocate more than there is
266 * unprotected memory in A available, and checks that:
267 * a) memory.min protects pagecache even in this case,
268 * b) memory.low allows reclaiming page cache with low events.
269 */
test_memcg_protection(const char * root,bool min)270 static int test_memcg_protection(const char *root, bool min)
271 {
272 int ret = KSFT_FAIL, rc;
273 char *parent[3] = {NULL};
274 char *children[4] = {NULL};
275 const char *attribute = min ? "memory.min" : "memory.low";
276 long c[4];
277 int i, attempts;
278 int fd;
279
280 fd = get_temp_fd();
281 if (fd < 0)
282 goto cleanup;
283
284 parent[0] = cg_name(root, "memcg_test_0");
285 if (!parent[0])
286 goto cleanup;
287
288 parent[1] = cg_name(parent[0], "memcg_test_1");
289 if (!parent[1])
290 goto cleanup;
291
292 parent[2] = cg_name(parent[0], "memcg_test_2");
293 if (!parent[2])
294 goto cleanup;
295
296 if (cg_create(parent[0]))
297 goto cleanup;
298
299 if (cg_read_long(parent[0], attribute)) {
300 /* No memory.min on older kernels is fine */
301 if (min)
302 ret = KSFT_SKIP;
303 goto cleanup;
304 }
305
306 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
307 goto cleanup;
308
309 if (cg_write(parent[0], "memory.max", "200M"))
310 goto cleanup;
311
312 if (cg_write(parent[0], "memory.swap.max", "0"))
313 goto cleanup;
314
315 if (cg_create(parent[1]))
316 goto cleanup;
317
318 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
319 goto cleanup;
320
321 if (cg_create(parent[2]))
322 goto cleanup;
323
324 for (i = 0; i < ARRAY_SIZE(children); i++) {
325 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
326 if (!children[i])
327 goto cleanup;
328
329 if (cg_create(children[i]))
330 goto cleanup;
331
332 if (i > 2)
333 continue;
334
335 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
336 (void *)(long)fd);
337 }
338
339 if (cg_write(parent[1], attribute, "50M"))
340 goto cleanup;
341 if (cg_write(children[0], attribute, "75M"))
342 goto cleanup;
343 if (cg_write(children[1], attribute, "25M"))
344 goto cleanup;
345 if (cg_write(children[2], attribute, "0"))
346 goto cleanup;
347 if (cg_write(children[3], attribute, "500M"))
348 goto cleanup;
349
350 attempts = 0;
351 while (!values_close(cg_read_long(parent[1], "memory.current"),
352 MB(150), 3)) {
353 if (attempts++ > 5)
354 break;
355 sleep(1);
356 }
357
358 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
359 goto cleanup;
360
361 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
362 goto cleanup;
363
364 for (i = 0; i < ARRAY_SIZE(children); i++)
365 c[i] = cg_read_long(children[i], "memory.current");
366
367 if (!values_close(c[0], MB(29), 10))
368 goto cleanup;
369
370 if (!values_close(c[1], MB(21), 10))
371 goto cleanup;
372
373 if (c[3] != 0)
374 goto cleanup;
375
376 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
377 if (min && !rc)
378 goto cleanup;
379 else if (!min && rc) {
380 fprintf(stderr,
381 "memory.low prevents from allocating anon memory\n");
382 goto cleanup;
383 }
384
385 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
386 goto cleanup;
387
388 if (min) {
389 ret = KSFT_PASS;
390 goto cleanup;
391 }
392
393 for (i = 0; i < ARRAY_SIZE(children); i++) {
394 int no_low_events_index = 1;
395 long low, oom;
396
397 oom = cg_read_key_long(children[i], "memory.events", "oom ");
398 low = cg_read_key_long(children[i], "memory.events", "low ");
399
400 if (oom)
401 goto cleanup;
402 if (i <= no_low_events_index && low <= 0)
403 goto cleanup;
404 if (i > no_low_events_index && low)
405 goto cleanup;
406
407 }
408
409 ret = KSFT_PASS;
410
411 cleanup:
412 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
413 if (!children[i])
414 continue;
415
416 cg_destroy(children[i]);
417 free(children[i]);
418 }
419
420 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
421 if (!parent[i])
422 continue;
423
424 cg_destroy(parent[i]);
425 free(parent[i]);
426 }
427 close(fd);
428 return ret;
429 }
430
test_memcg_min(const char * root)431 static int test_memcg_min(const char *root)
432 {
433 return test_memcg_protection(root, true);
434 }
435
test_memcg_low(const char * root)436 static int test_memcg_low(const char *root)
437 {
438 return test_memcg_protection(root, false);
439 }
440
alloc_pagecache_max_30M(const char * cgroup,void * arg)441 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
442 {
443 size_t size = MB(50);
444 int ret = -1;
445 long current, high, max;
446 int fd;
447
448 high = cg_read_long(cgroup, "memory.high");
449 max = cg_read_long(cgroup, "memory.max");
450 if (high != MB(30) && max != MB(30))
451 return -1;
452
453 fd = get_temp_fd();
454 if (fd < 0)
455 return -1;
456
457 if (alloc_pagecache(fd, size))
458 goto cleanup;
459
460 current = cg_read_long(cgroup, "memory.current");
461 if (!values_close(current, MB(30), 5))
462 goto cleanup;
463
464 ret = 0;
465
466 cleanup:
467 close(fd);
468 return ret;
469
470 }
471
472 /*
473 * This test checks that memory.high limits the amount of
474 * memory which can be consumed by either anonymous memory
475 * or pagecache.
476 */
test_memcg_high(const char * root)477 static int test_memcg_high(const char *root)
478 {
479 int ret = KSFT_FAIL;
480 char *memcg;
481 long high;
482
483 memcg = cg_name(root, "memcg_test");
484 if (!memcg)
485 goto cleanup;
486
487 if (cg_create(memcg))
488 goto cleanup;
489
490 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
491 goto cleanup;
492
493 if (cg_write(memcg, "memory.swap.max", "0"))
494 goto cleanup;
495
496 if (cg_write(memcg, "memory.high", "30M"))
497 goto cleanup;
498
499 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
500 goto cleanup;
501
502 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
503 goto cleanup;
504
505 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
506 goto cleanup;
507
508 high = cg_read_key_long(memcg, "memory.events", "high ");
509 if (high <= 0)
510 goto cleanup;
511
512 ret = KSFT_PASS;
513
514 cleanup:
515 cg_destroy(memcg);
516 free(memcg);
517
518 return ret;
519 }
520
alloc_anon_mlock(const char * cgroup,void * arg)521 static int alloc_anon_mlock(const char *cgroup, void *arg)
522 {
523 size_t size = (size_t)arg;
524 void *buf;
525
526 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
527 0, 0);
528 if (buf == MAP_FAILED)
529 return -1;
530
531 mlock(buf, size);
532 munmap(buf, size);
533 return 0;
534 }
535
536 /*
537 * This test checks that memory.high is able to throttle big single shot
538 * allocation i.e. large allocation within one kernel entry.
539 */
test_memcg_high_sync(const char * root)540 static int test_memcg_high_sync(const char *root)
541 {
542 int ret = KSFT_FAIL, pid, fd = -1;
543 char *memcg;
544 long pre_high, pre_max;
545 long post_high, post_max;
546
547 memcg = cg_name(root, "memcg_test");
548 if (!memcg)
549 goto cleanup;
550
551 if (cg_create(memcg))
552 goto cleanup;
553
554 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
555 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
556 if (pre_high < 0 || pre_max < 0)
557 goto cleanup;
558
559 if (cg_write(memcg, "memory.swap.max", "0"))
560 goto cleanup;
561
562 if (cg_write(memcg, "memory.high", "30M"))
563 goto cleanup;
564
565 if (cg_write(memcg, "memory.max", "140M"))
566 goto cleanup;
567
568 fd = memcg_prepare_for_wait(memcg);
569 if (fd < 0)
570 goto cleanup;
571
572 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
573 if (pid < 0)
574 goto cleanup;
575
576 cg_wait_for(fd);
577
578 post_high = cg_read_key_long(memcg, "memory.events", "high ");
579 post_max = cg_read_key_long(memcg, "memory.events", "max ");
580 if (post_high < 0 || post_max < 0)
581 goto cleanup;
582
583 if (pre_high == post_high || pre_max != post_max)
584 goto cleanup;
585
586 ret = KSFT_PASS;
587
588 cleanup:
589 if (fd >= 0)
590 close(fd);
591 cg_destroy(memcg);
592 free(memcg);
593
594 return ret;
595 }
596
597 /*
598 * This test checks that memory.max limits the amount of
599 * memory which can be consumed by either anonymous memory
600 * or pagecache.
601 */
test_memcg_max(const char * root)602 static int test_memcg_max(const char *root)
603 {
604 int ret = KSFT_FAIL;
605 char *memcg;
606 long current, max;
607
608 memcg = cg_name(root, "memcg_test");
609 if (!memcg)
610 goto cleanup;
611
612 if (cg_create(memcg))
613 goto cleanup;
614
615 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
616 goto cleanup;
617
618 if (cg_write(memcg, "memory.swap.max", "0"))
619 goto cleanup;
620
621 if (cg_write(memcg, "memory.max", "30M"))
622 goto cleanup;
623
624 /* Should be killed by OOM killer */
625 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
626 goto cleanup;
627
628 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
629 goto cleanup;
630
631 current = cg_read_long(memcg, "memory.current");
632 if (current > MB(30) || !current)
633 goto cleanup;
634
635 max = cg_read_key_long(memcg, "memory.events", "max ");
636 if (max <= 0)
637 goto cleanup;
638
639 ret = KSFT_PASS;
640
641 cleanup:
642 cg_destroy(memcg);
643 free(memcg);
644
645 return ret;
646 }
647
648 /*
649 * This test checks that memory.reclaim reclaims the given
650 * amount of memory (from both anon and file, if possible).
651 */
test_memcg_reclaim(const char * root)652 static int test_memcg_reclaim(const char *root)
653 {
654 int ret = KSFT_FAIL, fd, retries;
655 char *memcg;
656 long current, expected_usage, to_reclaim;
657 char buf[64];
658
659 memcg = cg_name(root, "memcg_test");
660 if (!memcg)
661 goto cleanup;
662
663 if (cg_create(memcg))
664 goto cleanup;
665
666 current = cg_read_long(memcg, "memory.current");
667 if (current != 0)
668 goto cleanup;
669
670 fd = get_temp_fd();
671 if (fd < 0)
672 goto cleanup;
673
674 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
675
676 /*
677 * If swap is enabled, try to reclaim from both anon and file, else try
678 * to reclaim from file only.
679 */
680 if (is_swap_enabled()) {
681 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
682 expected_usage = MB(100);
683 } else
684 expected_usage = MB(50);
685
686 /*
687 * Wait until current usage reaches the expected usage (or we run out of
688 * retries).
689 */
690 retries = 5;
691 while (!values_close(cg_read_long(memcg, "memory.current"),
692 expected_usage, 10)) {
693 if (retries--) {
694 sleep(1);
695 continue;
696 } else {
697 fprintf(stderr,
698 "failed to allocate %ld for memcg reclaim test\n",
699 expected_usage);
700 goto cleanup;
701 }
702 }
703
704 /*
705 * Reclaim until current reaches 30M, this makes sure we hit both anon
706 * and file if swap is enabled.
707 */
708 retries = 5;
709 while (true) {
710 int err;
711
712 current = cg_read_long(memcg, "memory.current");
713 to_reclaim = current - MB(30);
714
715 /*
716 * We only keep looping if we get EAGAIN, which means we could
717 * not reclaim the full amount.
718 */
719 if (to_reclaim <= 0)
720 goto cleanup;
721
722
723 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
724 err = cg_write(memcg, "memory.reclaim", buf);
725 if (!err) {
726 /*
727 * If writing succeeds, then the written amount should have been
728 * fully reclaimed (and maybe more).
729 */
730 current = cg_read_long(memcg, "memory.current");
731 if (!values_close(current, MB(30), 3) && current > MB(30))
732 goto cleanup;
733 break;
734 }
735
736 /* The kernel could not reclaim the full amount, try again. */
737 if (err == -EAGAIN && retries--)
738 continue;
739
740 /* We got an unexpected error or ran out of retries. */
741 goto cleanup;
742 }
743
744 ret = KSFT_PASS;
745 cleanup:
746 cg_destroy(memcg);
747 free(memcg);
748 close(fd);
749
750 return ret;
751 }
752
alloc_anon_50M_check_swap(const char * cgroup,void * arg)753 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
754 {
755 long mem_max = (long)arg;
756 size_t size = MB(50);
757 char *buf, *ptr;
758 long mem_current, swap_current;
759 int ret = -1;
760
761 buf = malloc(size);
762 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
763 *ptr = 0;
764
765 mem_current = cg_read_long(cgroup, "memory.current");
766 if (!mem_current || !values_close(mem_current, mem_max, 3))
767 goto cleanup;
768
769 swap_current = cg_read_long(cgroup, "memory.swap.current");
770 if (!swap_current ||
771 !values_close(mem_current + swap_current, size, 3))
772 goto cleanup;
773
774 ret = 0;
775 cleanup:
776 free(buf);
777 return ret;
778 }
779
780 /*
781 * This test checks that memory.swap.max limits the amount of
782 * anonymous memory which can be swapped out.
783 */
test_memcg_swap_max(const char * root)784 static int test_memcg_swap_max(const char *root)
785 {
786 int ret = KSFT_FAIL;
787 char *memcg;
788 long max;
789
790 if (!is_swap_enabled())
791 return KSFT_SKIP;
792
793 memcg = cg_name(root, "memcg_test");
794 if (!memcg)
795 goto cleanup;
796
797 if (cg_create(memcg))
798 goto cleanup;
799
800 if (cg_read_long(memcg, "memory.swap.current")) {
801 ret = KSFT_SKIP;
802 goto cleanup;
803 }
804
805 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
806 goto cleanup;
807
808 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
809 goto cleanup;
810
811 if (cg_write(memcg, "memory.swap.max", "30M"))
812 goto cleanup;
813
814 if (cg_write(memcg, "memory.max", "30M"))
815 goto cleanup;
816
817 /* Should be killed by OOM killer */
818 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
819 goto cleanup;
820
821 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
822 goto cleanup;
823
824 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
825 goto cleanup;
826
827 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
828 goto cleanup;
829
830 max = cg_read_key_long(memcg, "memory.events", "max ");
831 if (max <= 0)
832 goto cleanup;
833
834 ret = KSFT_PASS;
835
836 cleanup:
837 cg_destroy(memcg);
838 free(memcg);
839
840 return ret;
841 }
842
843 /*
844 * This test disables swapping and tries to allocate anonymous memory
845 * up to OOM. Then it checks for oom and oom_kill events in
846 * memory.events.
847 */
test_memcg_oom_events(const char * root)848 static int test_memcg_oom_events(const char *root)
849 {
850 int ret = KSFT_FAIL;
851 char *memcg;
852
853 memcg = cg_name(root, "memcg_test");
854 if (!memcg)
855 goto cleanup;
856
857 if (cg_create(memcg))
858 goto cleanup;
859
860 if (cg_write(memcg, "memory.max", "30M"))
861 goto cleanup;
862
863 if (cg_write(memcg, "memory.swap.max", "0"))
864 goto cleanup;
865
866 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
867 goto cleanup;
868
869 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
870 goto cleanup;
871
872 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
873 goto cleanup;
874
875 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
876 goto cleanup;
877
878 ret = KSFT_PASS;
879
880 cleanup:
881 cg_destroy(memcg);
882 free(memcg);
883
884 return ret;
885 }
886
887 struct tcp_server_args {
888 unsigned short port;
889 int ctl[2];
890 };
891
tcp_server(const char * cgroup,void * arg)892 static int tcp_server(const char *cgroup, void *arg)
893 {
894 struct tcp_server_args *srv_args = arg;
895 struct sockaddr_in6 saddr = { 0 };
896 socklen_t slen = sizeof(saddr);
897 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
898
899 close(srv_args->ctl[0]);
900 ctl_fd = srv_args->ctl[1];
901
902 saddr.sin6_family = AF_INET6;
903 saddr.sin6_addr = in6addr_any;
904 saddr.sin6_port = htons(srv_args->port);
905
906 sk = socket(AF_INET6, SOCK_STREAM, 0);
907 if (sk < 0)
908 return ret;
909
910 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
911 goto cleanup;
912
913 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
914 write(ctl_fd, &errno, sizeof(errno));
915 goto cleanup;
916 }
917
918 if (listen(sk, 1))
919 goto cleanup;
920
921 ret = 0;
922 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
923 ret = -1;
924 goto cleanup;
925 }
926
927 client_sk = accept(sk, NULL, NULL);
928 if (client_sk < 0)
929 goto cleanup;
930
931 ret = -1;
932 for (;;) {
933 uint8_t buf[0x100000];
934
935 if (write(client_sk, buf, sizeof(buf)) <= 0) {
936 if (errno == ECONNRESET)
937 ret = 0;
938 break;
939 }
940 }
941
942 close(client_sk);
943
944 cleanup:
945 close(sk);
946 return ret;
947 }
948
tcp_client(const char * cgroup,unsigned short port)949 static int tcp_client(const char *cgroup, unsigned short port)
950 {
951 const char server[] = "localhost";
952 struct addrinfo *ai;
953 char servport[6];
954 int retries = 0x10; /* nice round number */
955 int sk, ret;
956
957 snprintf(servport, sizeof(servport), "%hd", port);
958 ret = getaddrinfo(server, servport, NULL, &ai);
959 if (ret)
960 return ret;
961
962 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
963 if (sk < 0)
964 goto free_ainfo;
965
966 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
967 if (ret < 0)
968 goto close_sk;
969
970 ret = KSFT_FAIL;
971 while (retries--) {
972 uint8_t buf[0x100000];
973 long current, sock;
974
975 if (read(sk, buf, sizeof(buf)) <= 0)
976 goto close_sk;
977
978 current = cg_read_long(cgroup, "memory.current");
979 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
980
981 if (current < 0 || sock < 0)
982 goto close_sk;
983
984 if (values_close(current, sock, 10)) {
985 ret = KSFT_PASS;
986 break;
987 }
988 }
989
990 close_sk:
991 close(sk);
992 free_ainfo:
993 freeaddrinfo(ai);
994 return ret;
995 }
996
997 /*
998 * This test checks socket memory accounting.
999 * The test forks a TCP server listens on a random port between 1000
1000 * and 61000. Once it gets a client connection, it starts writing to
1001 * its socket.
1002 * The TCP client interleaves reads from the socket with check whether
1003 * memory.current and memory.stat.sock are similar.
1004 */
test_memcg_sock(const char * root)1005 static int test_memcg_sock(const char *root)
1006 {
1007 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1008 unsigned short port;
1009 char *memcg;
1010
1011 memcg = cg_name(root, "memcg_test");
1012 if (!memcg)
1013 goto cleanup;
1014
1015 if (cg_create(memcg))
1016 goto cleanup;
1017
1018 while (bind_retries--) {
1019 struct tcp_server_args args;
1020
1021 if (pipe(args.ctl))
1022 goto cleanup;
1023
1024 port = args.port = 1000 + rand() % 60000;
1025
1026 pid = cg_run_nowait(memcg, tcp_server, &args);
1027 if (pid < 0)
1028 goto cleanup;
1029
1030 close(args.ctl[1]);
1031 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1032 goto cleanup;
1033 close(args.ctl[0]);
1034
1035 if (!err)
1036 break;
1037 if (err != EADDRINUSE)
1038 goto cleanup;
1039
1040 waitpid(pid, NULL, 0);
1041 }
1042
1043 if (err == EADDRINUSE) {
1044 ret = KSFT_SKIP;
1045 goto cleanup;
1046 }
1047
1048 if (tcp_client(memcg, port) != KSFT_PASS)
1049 goto cleanup;
1050
1051 waitpid(pid, &err, 0);
1052 if (WEXITSTATUS(err))
1053 goto cleanup;
1054
1055 if (cg_read_long(memcg, "memory.current") < 0)
1056 goto cleanup;
1057
1058 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1059 goto cleanup;
1060
1061 ret = KSFT_PASS;
1062
1063 cleanup:
1064 cg_destroy(memcg);
1065 free(memcg);
1066
1067 return ret;
1068 }
1069
1070 /*
1071 * This test disables swapping and tries to allocate anonymous memory
1072 * up to OOM with memory.group.oom set. Then it checks that all
1073 * processes in the leaf were killed. It also checks that oom_events
1074 * were propagated to the parent level.
1075 */
test_memcg_oom_group_leaf_events(const char * root)1076 static int test_memcg_oom_group_leaf_events(const char *root)
1077 {
1078 int ret = KSFT_FAIL;
1079 char *parent, *child;
1080 long parent_oom_events;
1081
1082 parent = cg_name(root, "memcg_test_0");
1083 child = cg_name(root, "memcg_test_0/memcg_test_1");
1084
1085 if (!parent || !child)
1086 goto cleanup;
1087
1088 if (cg_create(parent))
1089 goto cleanup;
1090
1091 if (cg_create(child))
1092 goto cleanup;
1093
1094 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1095 goto cleanup;
1096
1097 if (cg_write(child, "memory.max", "50M"))
1098 goto cleanup;
1099
1100 if (cg_write(child, "memory.swap.max", "0"))
1101 goto cleanup;
1102
1103 if (cg_write(child, "memory.oom.group", "1"))
1104 goto cleanup;
1105
1106 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1107 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1108 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1109 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1110 goto cleanup;
1111
1112 if (cg_test_proc_killed(child))
1113 goto cleanup;
1114
1115 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1116 goto cleanup;
1117
1118 parent_oom_events = cg_read_key_long(
1119 parent, "memory.events", "oom_kill ");
1120 /*
1121 * If memory_localevents is not enabled (the default), the parent should
1122 * count OOM events in its children groups. Otherwise, it should not
1123 * have observed any events.
1124 */
1125 if (has_localevents && parent_oom_events != 0)
1126 goto cleanup;
1127 else if (!has_localevents && parent_oom_events <= 0)
1128 goto cleanup;
1129
1130 ret = KSFT_PASS;
1131
1132 cleanup:
1133 if (child)
1134 cg_destroy(child);
1135 if (parent)
1136 cg_destroy(parent);
1137 free(child);
1138 free(parent);
1139
1140 return ret;
1141 }
1142
1143 /*
1144 * This test disables swapping and tries to allocate anonymous memory
1145 * up to OOM with memory.group.oom set. Then it checks that all
1146 * processes in the parent and leaf were killed.
1147 */
test_memcg_oom_group_parent_events(const char * root)1148 static int test_memcg_oom_group_parent_events(const char *root)
1149 {
1150 int ret = KSFT_FAIL;
1151 char *parent, *child;
1152
1153 parent = cg_name(root, "memcg_test_0");
1154 child = cg_name(root, "memcg_test_0/memcg_test_1");
1155
1156 if (!parent || !child)
1157 goto cleanup;
1158
1159 if (cg_create(parent))
1160 goto cleanup;
1161
1162 if (cg_create(child))
1163 goto cleanup;
1164
1165 if (cg_write(parent, "memory.max", "80M"))
1166 goto cleanup;
1167
1168 if (cg_write(parent, "memory.swap.max", "0"))
1169 goto cleanup;
1170
1171 if (cg_write(parent, "memory.oom.group", "1"))
1172 goto cleanup;
1173
1174 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1175 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1176 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1177
1178 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1179 goto cleanup;
1180
1181 if (cg_test_proc_killed(child))
1182 goto cleanup;
1183 if (cg_test_proc_killed(parent))
1184 goto cleanup;
1185
1186 ret = KSFT_PASS;
1187
1188 cleanup:
1189 if (child)
1190 cg_destroy(child);
1191 if (parent)
1192 cg_destroy(parent);
1193 free(child);
1194 free(parent);
1195
1196 return ret;
1197 }
1198
1199 /*
1200 * This test disables swapping and tries to allocate anonymous memory
1201 * up to OOM with memory.group.oom set. Then it checks that all
1202 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1203 */
test_memcg_oom_group_score_events(const char * root)1204 static int test_memcg_oom_group_score_events(const char *root)
1205 {
1206 int ret = KSFT_FAIL;
1207 char *memcg;
1208 int safe_pid;
1209
1210 memcg = cg_name(root, "memcg_test_0");
1211
1212 if (!memcg)
1213 goto cleanup;
1214
1215 if (cg_create(memcg))
1216 goto cleanup;
1217
1218 if (cg_write(memcg, "memory.max", "50M"))
1219 goto cleanup;
1220
1221 if (cg_write(memcg, "memory.swap.max", "0"))
1222 goto cleanup;
1223
1224 if (cg_write(memcg, "memory.oom.group", "1"))
1225 goto cleanup;
1226
1227 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1228 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1229 goto cleanup;
1230
1231 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1232 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1233 goto cleanup;
1234
1235 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1236 goto cleanup;
1237
1238 if (kill(safe_pid, SIGKILL))
1239 goto cleanup;
1240
1241 ret = KSFT_PASS;
1242
1243 cleanup:
1244 if (memcg)
1245 cg_destroy(memcg);
1246 free(memcg);
1247
1248 return ret;
1249 }
1250
1251 #define T(x) { x, #x }
1252 struct memcg_test {
1253 int (*fn)(const char *root);
1254 const char *name;
1255 } tests[] = {
1256 T(test_memcg_subtree_control),
1257 T(test_memcg_current),
1258 T(test_memcg_min),
1259 T(test_memcg_low),
1260 T(test_memcg_high),
1261 T(test_memcg_high_sync),
1262 T(test_memcg_max),
1263 T(test_memcg_reclaim),
1264 T(test_memcg_oom_events),
1265 T(test_memcg_swap_max),
1266 T(test_memcg_sock),
1267 T(test_memcg_oom_group_leaf_events),
1268 T(test_memcg_oom_group_parent_events),
1269 T(test_memcg_oom_group_score_events),
1270 };
1271 #undef T
1272
main(int argc,char ** argv)1273 int main(int argc, char **argv)
1274 {
1275 char root[PATH_MAX];
1276 int i, proc_status, ret = EXIT_SUCCESS;
1277
1278 if (cg_find_unified_root(root, sizeof(root)))
1279 ksft_exit_skip("cgroup v2 isn't mounted\n");
1280
1281 /*
1282 * Check that memory controller is available:
1283 * memory is listed in cgroup.controllers
1284 */
1285 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1286 ksft_exit_skip("memory controller isn't available\n");
1287
1288 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1289 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1290 ksft_exit_skip("Failed to set memory controller\n");
1291
1292 proc_status = proc_mount_contains("memory_recursiveprot");
1293 if (proc_status < 0)
1294 ksft_exit_skip("Failed to query cgroup mount option\n");
1295 has_recursiveprot = proc_status;
1296
1297 proc_status = proc_mount_contains("memory_localevents");
1298 if (proc_status < 0)
1299 ksft_exit_skip("Failed to query cgroup mount option\n");
1300 has_localevents = proc_status;
1301
1302 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1303 switch (tests[i].fn(root)) {
1304 case KSFT_PASS:
1305 ksft_test_result_pass("%s\n", tests[i].name);
1306 break;
1307 case KSFT_SKIP:
1308 ksft_test_result_skip("%s\n", tests[i].name);
1309 break;
1310 default:
1311 ret = EXIT_FAILURE;
1312 ksft_test_result_fail("%s\n", tests[i].name);
1313 break;
1314 }
1315 }
1316
1317 return ret;
1318 }
1319