1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <fcntl.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/stat.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12 #include <sys/wait.h>
13 #include <errno.h>
14 #include <sys/sysinfo.h>
15 #include <pthread.h>
16
17 #include "../kselftest.h"
18 #include "cgroup_util.h"
19
20
21 /*
22 * Memory cgroup charging is performed using percpu batches 64 pages
23 * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
24 * the maximum discrepancy between charge and vmstat entries is number
25 * of cpus multiplied by 64 pages.
26 */
27 #define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
28
29
alloc_dcache(const char * cgroup,void * arg)30 static int alloc_dcache(const char *cgroup, void *arg)
31 {
32 unsigned long i;
33 struct stat st;
34 char buf[128];
35
36 for (i = 0; i < (unsigned long)arg; i++) {
37 snprintf(buf, sizeof(buf),
38 "/something-non-existent-with-a-long-name-%64lu-%d",
39 i, getpid());
40 stat(buf, &st);
41 }
42
43 return 0;
44 }
45
46 /*
47 * This test allocates 100000 of negative dentries with long names.
48 * Then it checks that "slab" in memory.stat is larger than 1M.
49 * Then it sets memory.high to 1M and checks that at least 1/2
50 * of slab memory has been reclaimed.
51 */
test_kmem_basic(const char * root)52 static int test_kmem_basic(const char *root)
53 {
54 int ret = KSFT_FAIL;
55 char *cg = NULL;
56 long slab0, slab1, current;
57
58 cg = cg_name(root, "kmem_basic_test");
59 if (!cg)
60 goto cleanup;
61
62 if (cg_create(cg))
63 goto cleanup;
64
65 if (cg_run(cg, alloc_dcache, (void *)100000))
66 goto cleanup;
67
68 slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
69 if (slab0 < (1 << 20))
70 goto cleanup;
71
72 cg_write(cg, "memory.high", "1M");
73
74 /* wait for RCU freeing */
75 sleep(1);
76
77 slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
78 if (slab1 < 0)
79 goto cleanup;
80
81 current = cg_read_long(cg, "memory.current");
82 if (current < 0)
83 goto cleanup;
84
85 if (slab1 < slab0 / 2 && current < slab0 / 2)
86 ret = KSFT_PASS;
87 cleanup:
88 cg_destroy(cg);
89 free(cg);
90
91 return ret;
92 }
93
alloc_kmem_fn(void * arg)94 static void *alloc_kmem_fn(void *arg)
95 {
96 alloc_dcache(NULL, (void *)100);
97 return NULL;
98 }
99
alloc_kmem_smp(const char * cgroup,void * arg)100 static int alloc_kmem_smp(const char *cgroup, void *arg)
101 {
102 int nr_threads = 2 * get_nprocs();
103 pthread_t *tinfo;
104 unsigned long i;
105 int ret = -1;
106
107 tinfo = calloc(nr_threads, sizeof(pthread_t));
108 if (tinfo == NULL)
109 return -1;
110
111 for (i = 0; i < nr_threads; i++) {
112 if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
113 (void *)i)) {
114 free(tinfo);
115 return -1;
116 }
117 }
118
119 for (i = 0; i < nr_threads; i++) {
120 ret = pthread_join(tinfo[i], NULL);
121 if (ret)
122 break;
123 }
124
125 free(tinfo);
126 return ret;
127 }
128
cg_run_in_subcgroups(const char * parent,int (* fn)(const char * cgroup,void * arg),void * arg,int times)129 static int cg_run_in_subcgroups(const char *parent,
130 int (*fn)(const char *cgroup, void *arg),
131 void *arg, int times)
132 {
133 char *child;
134 int i;
135
136 for (i = 0; i < times; i++) {
137 child = cg_name_indexed(parent, "child", i);
138 if (!child)
139 return -1;
140
141 if (cg_create(child)) {
142 cg_destroy(child);
143 free(child);
144 return -1;
145 }
146
147 if (cg_run(child, fn, NULL)) {
148 cg_destroy(child);
149 free(child);
150 return -1;
151 }
152
153 cg_destroy(child);
154 free(child);
155 }
156
157 return 0;
158 }
159
160 /*
161 * The test creates and destroys a large number of cgroups. In each cgroup it
162 * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
163 * threads. Then it checks the sanity of numbers on the parent level:
164 * the total size of the cgroups should be roughly equal to
165 * anon + file + kernel + sock.
166 */
test_kmem_memcg_deletion(const char * root)167 static int test_kmem_memcg_deletion(const char *root)
168 {
169 long current, anon, file, kernel, sock, sum;
170 int ret = KSFT_FAIL;
171 char *parent;
172
173 parent = cg_name(root, "kmem_memcg_deletion_test");
174 if (!parent)
175 goto cleanup;
176
177 if (cg_create(parent))
178 goto cleanup;
179
180 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
181 goto cleanup;
182
183 if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
184 goto cleanup;
185
186 current = cg_read_long(parent, "memory.current");
187 anon = cg_read_key_long(parent, "memory.stat", "anon ");
188 file = cg_read_key_long(parent, "memory.stat", "file ");
189 kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
190 sock = cg_read_key_long(parent, "memory.stat", "sock ");
191 if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
192 goto cleanup;
193
194 sum = anon + file + kernel + sock;
195 if (abs(sum - current) < MAX_VMSTAT_ERROR) {
196 ret = KSFT_PASS;
197 } else {
198 printf("memory.current = %ld\n", current);
199 printf("anon + file + kernel + sock = %ld\n", sum);
200 printf("anon = %ld\n", anon);
201 printf("file = %ld\n", file);
202 printf("kernel = %ld\n", kernel);
203 printf("sock = %ld\n", sock);
204 }
205
206 cleanup:
207 cg_destroy(parent);
208 free(parent);
209
210 return ret;
211 }
212
213 /*
214 * The test reads the entire /proc/kpagecgroup. If the operation went
215 * successfully (and the kernel didn't panic), the test is treated as passed.
216 */
test_kmem_proc_kpagecgroup(const char * root)217 static int test_kmem_proc_kpagecgroup(const char *root)
218 {
219 unsigned long buf[128];
220 int ret = KSFT_FAIL;
221 ssize_t len;
222 int fd;
223
224 fd = open("/proc/kpagecgroup", O_RDONLY);
225 if (fd < 0)
226 return ret;
227
228 do {
229 len = read(fd, buf, sizeof(buf));
230 } while (len > 0);
231
232 if (len == 0)
233 ret = KSFT_PASS;
234
235 close(fd);
236 return ret;
237 }
238
pthread_wait_fn(void * arg)239 static void *pthread_wait_fn(void *arg)
240 {
241 sleep(100);
242 return NULL;
243 }
244
spawn_1000_threads(const char * cgroup,void * arg)245 static int spawn_1000_threads(const char *cgroup, void *arg)
246 {
247 int nr_threads = 1000;
248 pthread_t *tinfo;
249 unsigned long i;
250 long stack;
251 int ret = -1;
252
253 tinfo = calloc(nr_threads, sizeof(pthread_t));
254 if (tinfo == NULL)
255 return -1;
256
257 for (i = 0; i < nr_threads; i++) {
258 if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
259 (void *)i)) {
260 free(tinfo);
261 return(-1);
262 }
263 }
264
265 stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
266 if (stack >= 4096 * 1000)
267 ret = 0;
268
269 free(tinfo);
270 return ret;
271 }
272
273 /*
274 * The test spawns a process, which spawns 1000 threads. Then it checks
275 * that memory.stat's kernel_stack is at least 1000 pages large.
276 */
test_kmem_kernel_stacks(const char * root)277 static int test_kmem_kernel_stacks(const char *root)
278 {
279 int ret = KSFT_FAIL;
280 char *cg = NULL;
281
282 cg = cg_name(root, "kmem_kernel_stacks_test");
283 if (!cg)
284 goto cleanup;
285
286 if (cg_create(cg))
287 goto cleanup;
288
289 if (cg_run(cg, spawn_1000_threads, NULL))
290 goto cleanup;
291
292 ret = KSFT_PASS;
293 cleanup:
294 cg_destroy(cg);
295 free(cg);
296
297 return ret;
298 }
299
300 /*
301 * This test sequentionally creates 30 child cgroups, allocates some
302 * kernel memory in each of them, and deletes them. Then it checks
303 * that the number of dying cgroups on the parent level is 0.
304 */
test_kmem_dead_cgroups(const char * root)305 static int test_kmem_dead_cgroups(const char *root)
306 {
307 int ret = KSFT_FAIL;
308 char *parent;
309 long dead;
310 int i;
311
312 parent = cg_name(root, "kmem_dead_cgroups_test");
313 if (!parent)
314 goto cleanup;
315
316 if (cg_create(parent))
317 goto cleanup;
318
319 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
320 goto cleanup;
321
322 if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
323 goto cleanup;
324
325 for (i = 0; i < 5; i++) {
326 dead = cg_read_key_long(parent, "cgroup.stat",
327 "nr_dying_descendants ");
328 if (dead == 0) {
329 ret = KSFT_PASS;
330 break;
331 }
332 /*
333 * Reclaiming cgroups might take some time,
334 * let's wait a bit and repeat.
335 */
336 sleep(1);
337 }
338
339 cleanup:
340 cg_destroy(parent);
341 free(parent);
342
343 return ret;
344 }
345
346 /*
347 * This test creates a sub-tree with 1000 memory cgroups.
348 * Then it checks that the memory.current on the parent level
349 * is greater than 0 and approximates matches the percpu value
350 * from memory.stat.
351 */
test_percpu_basic(const char * root)352 static int test_percpu_basic(const char *root)
353 {
354 int ret = KSFT_FAIL;
355 char *parent, *child;
356 long current, percpu;
357 int i;
358
359 parent = cg_name(root, "percpu_basic_test");
360 if (!parent)
361 goto cleanup;
362
363 if (cg_create(parent))
364 goto cleanup;
365
366 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
367 goto cleanup;
368
369 for (i = 0; i < 1000; i++) {
370 child = cg_name_indexed(parent, "child", i);
371 if (!child)
372 return -1;
373
374 if (cg_create(child))
375 goto cleanup_children;
376
377 free(child);
378 }
379
380 current = cg_read_long(parent, "memory.current");
381 percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
382
383 if (current > 0 && percpu > 0 && abs(current - percpu) <
384 MAX_VMSTAT_ERROR)
385 ret = KSFT_PASS;
386 else
387 printf("memory.current %ld\npercpu %ld\n",
388 current, percpu);
389
390 cleanup_children:
391 for (i = 0; i < 1000; i++) {
392 child = cg_name_indexed(parent, "child", i);
393 cg_destroy(child);
394 free(child);
395 }
396
397 cleanup:
398 cg_destroy(parent);
399 free(parent);
400
401 return ret;
402 }
403
404 #define T(x) { x, #x }
405 struct kmem_test {
406 int (*fn)(const char *root);
407 const char *name;
408 } tests[] = {
409 T(test_kmem_basic),
410 T(test_kmem_memcg_deletion),
411 T(test_kmem_proc_kpagecgroup),
412 T(test_kmem_kernel_stacks),
413 T(test_kmem_dead_cgroups),
414 T(test_percpu_basic),
415 };
416 #undef T
417
main(int argc,char ** argv)418 int main(int argc, char **argv)
419 {
420 char root[PATH_MAX];
421 int i, ret = EXIT_SUCCESS;
422
423 if (cg_find_unified_root(root, sizeof(root)))
424 ksft_exit_skip("cgroup v2 isn't mounted\n");
425
426 /*
427 * Check that memory controller is available:
428 * memory is listed in cgroup.controllers
429 */
430 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
431 ksft_exit_skip("memory controller isn't available\n");
432
433 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
434 if (cg_write(root, "cgroup.subtree_control", "+memory"))
435 ksft_exit_skip("Failed to set memory controller\n");
436
437 for (i = 0; i < ARRAY_SIZE(tests); i++) {
438 switch (tests[i].fn(root)) {
439 case KSFT_PASS:
440 ksft_test_result_pass("%s\n", tests[i].name);
441 break;
442 case KSFT_SKIP:
443 ksft_test_result_skip("%s\n", tests[i].name);
444 break;
445 default:
446 ret = EXIT_FAILURE;
447 ksft_test_result_fail("%s\n", tests[i].name);
448 break;
449 }
450 }
451
452 return ret;
453 }
454