1 #define _GNU_SOURCE
2 #include <ctype.h>
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <dirent.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <stdbool.h>
11 #include <string.h>
12 #include <unistd.h>
13
14 #include <sys/mman.h>
15 #include <sys/wait.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/sysmacros.h>
19 #include <sys/vfs.h>
20
21 #include "linux/magic.h"
22
23 #include "vm_util.h"
24
25 #ifndef MADV_PAGEOUT
26 #define MADV_PAGEOUT 21
27 #endif
28 #ifndef MADV_POPULATE_READ
29 #define MADV_POPULATE_READ 22
30 #endif
31 #ifndef MADV_COLLAPSE
32 #define MADV_COLLAPSE 25
33 #endif
34
35 #define BASE_ADDR ((void *)(1UL << 30))
36 static unsigned long hpage_pmd_size;
37 static unsigned long page_size;
38 static int hpage_pmd_nr;
39
40 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
41 #define PID_SMAPS "/proc/self/smaps"
42 #define TEST_FILE "collapse_test_file"
43
44 #define MAX_LINE_LENGTH 500
45
46 enum vma_type {
47 VMA_ANON,
48 VMA_FILE,
49 VMA_SHMEM,
50 };
51
52 struct mem_ops {
53 void *(*setup_area)(int nr_hpages);
54 void (*cleanup_area)(void *p, unsigned long size);
55 void (*fault)(void *p, unsigned long start, unsigned long end);
56 bool (*check_huge)(void *addr, int nr_hpages);
57 const char *name;
58 };
59
60 static struct mem_ops *file_ops;
61 static struct mem_ops *anon_ops;
62 static struct mem_ops *shmem_ops;
63
64 struct collapse_context {
65 void (*collapse)(const char *msg, char *p, int nr_hpages,
66 struct mem_ops *ops, bool expect);
67 bool enforce_pte_scan_limits;
68 const char *name;
69 };
70
71 static struct collapse_context *khugepaged_context;
72 static struct collapse_context *madvise_context;
73
74 struct file_info {
75 const char *dir;
76 char path[PATH_MAX];
77 enum vma_type type;
78 int fd;
79 char dev_queue_read_ahead_path[PATH_MAX];
80 };
81
82 static struct file_info finfo;
83
84 enum thp_enabled {
85 THP_ALWAYS,
86 THP_MADVISE,
87 THP_NEVER,
88 };
89
90 static const char *thp_enabled_strings[] = {
91 "always",
92 "madvise",
93 "never",
94 NULL
95 };
96
97 enum thp_defrag {
98 THP_DEFRAG_ALWAYS,
99 THP_DEFRAG_DEFER,
100 THP_DEFRAG_DEFER_MADVISE,
101 THP_DEFRAG_MADVISE,
102 THP_DEFRAG_NEVER,
103 };
104
105 static const char *thp_defrag_strings[] = {
106 "always",
107 "defer",
108 "defer+madvise",
109 "madvise",
110 "never",
111 NULL
112 };
113
114 enum shmem_enabled {
115 SHMEM_ALWAYS,
116 SHMEM_WITHIN_SIZE,
117 SHMEM_ADVISE,
118 SHMEM_NEVER,
119 SHMEM_DENY,
120 SHMEM_FORCE,
121 };
122
123 static const char *shmem_enabled_strings[] = {
124 "always",
125 "within_size",
126 "advise",
127 "never",
128 "deny",
129 "force",
130 NULL
131 };
132
133 struct khugepaged_settings {
134 bool defrag;
135 unsigned int alloc_sleep_millisecs;
136 unsigned int scan_sleep_millisecs;
137 unsigned int max_ptes_none;
138 unsigned int max_ptes_swap;
139 unsigned int max_ptes_shared;
140 unsigned long pages_to_scan;
141 };
142
143 struct settings {
144 enum thp_enabled thp_enabled;
145 enum thp_defrag thp_defrag;
146 enum shmem_enabled shmem_enabled;
147 bool use_zero_page;
148 struct khugepaged_settings khugepaged;
149 unsigned long read_ahead_kb;
150 };
151
152 static struct settings saved_settings;
153 static bool skip_settings_restore;
154
155 static int exit_status;
156
success(const char * msg)157 static void success(const char *msg)
158 {
159 printf(" \e[32m%s\e[0m\n", msg);
160 }
161
fail(const char * msg)162 static void fail(const char *msg)
163 {
164 printf(" \e[31m%s\e[0m\n", msg);
165 exit_status++;
166 }
167
skip(const char * msg)168 static void skip(const char *msg)
169 {
170 printf(" \e[33m%s\e[0m\n", msg);
171 }
172
read_file(const char * path,char * buf,size_t buflen)173 static int read_file(const char *path, char *buf, size_t buflen)
174 {
175 int fd;
176 ssize_t numread;
177
178 fd = open(path, O_RDONLY);
179 if (fd == -1)
180 return 0;
181
182 numread = read(fd, buf, buflen - 1);
183 if (numread < 1) {
184 close(fd);
185 return 0;
186 }
187
188 buf[numread] = '\0';
189 close(fd);
190
191 return (unsigned int) numread;
192 }
193
write_file(const char * path,const char * buf,size_t buflen)194 static int write_file(const char *path, const char *buf, size_t buflen)
195 {
196 int fd;
197 ssize_t numwritten;
198
199 fd = open(path, O_WRONLY);
200 if (fd == -1) {
201 printf("open(%s)\n", path);
202 exit(EXIT_FAILURE);
203 return 0;
204 }
205
206 numwritten = write(fd, buf, buflen - 1);
207 close(fd);
208 if (numwritten < 1) {
209 printf("write(%s)\n", buf);
210 exit(EXIT_FAILURE);
211 return 0;
212 }
213
214 return (unsigned int) numwritten;
215 }
216
read_string(const char * name,const char * strings[])217 static int read_string(const char *name, const char *strings[])
218 {
219 char path[PATH_MAX];
220 char buf[256];
221 char *c;
222 int ret;
223
224 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
225 if (ret >= PATH_MAX) {
226 printf("%s: Pathname is too long\n", __func__);
227 exit(EXIT_FAILURE);
228 }
229
230 if (!read_file(path, buf, sizeof(buf))) {
231 perror(path);
232 exit(EXIT_FAILURE);
233 }
234
235 c = strchr(buf, '[');
236 if (!c) {
237 printf("%s: Parse failure\n", __func__);
238 exit(EXIT_FAILURE);
239 }
240
241 c++;
242 memmove(buf, c, sizeof(buf) - (c - buf));
243
244 c = strchr(buf, ']');
245 if (!c) {
246 printf("%s: Parse failure\n", __func__);
247 exit(EXIT_FAILURE);
248 }
249 *c = '\0';
250
251 ret = 0;
252 while (strings[ret]) {
253 if (!strcmp(strings[ret], buf))
254 return ret;
255 ret++;
256 }
257
258 printf("Failed to parse %s\n", name);
259 exit(EXIT_FAILURE);
260 }
261
write_string(const char * name,const char * val)262 static void write_string(const char *name, const char *val)
263 {
264 char path[PATH_MAX];
265 int ret;
266
267 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
268 if (ret >= PATH_MAX) {
269 printf("%s: Pathname is too long\n", __func__);
270 exit(EXIT_FAILURE);
271 }
272
273 if (!write_file(path, val, strlen(val) + 1)) {
274 perror(path);
275 exit(EXIT_FAILURE);
276 }
277 }
278
_read_num(const char * path)279 static const unsigned long _read_num(const char *path)
280 {
281 char buf[21];
282
283 if (read_file(path, buf, sizeof(buf)) < 0) {
284 perror("read_file(read_num)");
285 exit(EXIT_FAILURE);
286 }
287
288 return strtoul(buf, NULL, 10);
289 }
290
read_num(const char * name)291 static const unsigned long read_num(const char *name)
292 {
293 char path[PATH_MAX];
294 int ret;
295
296 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
297 if (ret >= PATH_MAX) {
298 printf("%s: Pathname is too long\n", __func__);
299 exit(EXIT_FAILURE);
300 }
301 return _read_num(path);
302 }
303
_write_num(const char * path,unsigned long num)304 static void _write_num(const char *path, unsigned long num)
305 {
306 char buf[21];
307
308 sprintf(buf, "%ld", num);
309 if (!write_file(path, buf, strlen(buf) + 1)) {
310 perror(path);
311 exit(EXIT_FAILURE);
312 }
313 }
314
write_num(const char * name,unsigned long num)315 static void write_num(const char *name, unsigned long num)
316 {
317 char path[PATH_MAX];
318 int ret;
319
320 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
321 if (ret >= PATH_MAX) {
322 printf("%s: Pathname is too long\n", __func__);
323 exit(EXIT_FAILURE);
324 }
325 _write_num(path, num);
326 }
327
write_settings(struct settings * settings)328 static void write_settings(struct settings *settings)
329 {
330 struct khugepaged_settings *khugepaged = &settings->khugepaged;
331
332 write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
333 write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
334 write_string("shmem_enabled",
335 shmem_enabled_strings[settings->shmem_enabled]);
336 write_num("use_zero_page", settings->use_zero_page);
337
338 write_num("khugepaged/defrag", khugepaged->defrag);
339 write_num("khugepaged/alloc_sleep_millisecs",
340 khugepaged->alloc_sleep_millisecs);
341 write_num("khugepaged/scan_sleep_millisecs",
342 khugepaged->scan_sleep_millisecs);
343 write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
344 write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
345 write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
346 write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
347
348 if (file_ops && finfo.type == VMA_FILE)
349 _write_num(finfo.dev_queue_read_ahead_path,
350 settings->read_ahead_kb);
351 }
352
353 #define MAX_SETTINGS_DEPTH 4
354 static struct settings settings_stack[MAX_SETTINGS_DEPTH];
355 static int settings_index;
356
current_settings(void)357 static struct settings *current_settings(void)
358 {
359 if (!settings_index) {
360 printf("Fail: No settings set");
361 exit(EXIT_FAILURE);
362 }
363 return settings_stack + settings_index - 1;
364 }
365
push_settings(struct settings * settings)366 static void push_settings(struct settings *settings)
367 {
368 if (settings_index >= MAX_SETTINGS_DEPTH) {
369 printf("Fail: Settings stack exceeded");
370 exit(EXIT_FAILURE);
371 }
372 settings_stack[settings_index++] = *settings;
373 write_settings(current_settings());
374 }
375
pop_settings(void)376 static void pop_settings(void)
377 {
378 if (settings_index <= 0) {
379 printf("Fail: Settings stack empty");
380 exit(EXIT_FAILURE);
381 }
382 --settings_index;
383 write_settings(current_settings());
384 }
385
restore_settings(int sig)386 static void restore_settings(int sig)
387 {
388 if (skip_settings_restore)
389 goto out;
390
391 printf("Restore THP and khugepaged settings...");
392 write_settings(&saved_settings);
393 success("OK");
394 if (sig)
395 exit(EXIT_FAILURE);
396 out:
397 exit(exit_status);
398 }
399
save_settings(void)400 static void save_settings(void)
401 {
402 printf("Save THP and khugepaged settings...");
403 saved_settings = (struct settings) {
404 .thp_enabled = read_string("enabled", thp_enabled_strings),
405 .thp_defrag = read_string("defrag", thp_defrag_strings),
406 .shmem_enabled =
407 read_string("shmem_enabled", shmem_enabled_strings),
408 .use_zero_page = read_num("use_zero_page"),
409 };
410 saved_settings.khugepaged = (struct khugepaged_settings) {
411 .defrag = read_num("khugepaged/defrag"),
412 .alloc_sleep_millisecs =
413 read_num("khugepaged/alloc_sleep_millisecs"),
414 .scan_sleep_millisecs =
415 read_num("khugepaged/scan_sleep_millisecs"),
416 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
417 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
418 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
419 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
420 };
421 if (file_ops && finfo.type == VMA_FILE)
422 saved_settings.read_ahead_kb =
423 _read_num(finfo.dev_queue_read_ahead_path);
424
425 success("OK");
426
427 signal(SIGTERM, restore_settings);
428 signal(SIGINT, restore_settings);
429 signal(SIGHUP, restore_settings);
430 signal(SIGQUIT, restore_settings);
431 }
432
get_finfo(const char * dir)433 static void get_finfo(const char *dir)
434 {
435 struct stat path_stat;
436 struct statfs fs;
437 char buf[1 << 10];
438 char path[PATH_MAX];
439 char *str, *end;
440
441 finfo.dir = dir;
442 stat(finfo.dir, &path_stat);
443 if (!S_ISDIR(path_stat.st_mode)) {
444 printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
445 exit(EXIT_FAILURE);
446 }
447 if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
448 finfo.dir) >= sizeof(finfo.path)) {
449 printf("%s: Pathname is too long\n", __func__);
450 exit(EXIT_FAILURE);
451 }
452 if (statfs(finfo.dir, &fs)) {
453 perror("statfs()");
454 exit(EXIT_FAILURE);
455 }
456 finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
457 if (finfo.type == VMA_SHMEM)
458 return;
459
460 /* Find owning device's queue/read_ahead_kb control */
461 if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
462 major(path_stat.st_dev), minor(path_stat.st_dev))
463 >= sizeof(path)) {
464 printf("%s: Pathname is too long\n", __func__);
465 exit(EXIT_FAILURE);
466 }
467 if (read_file(path, buf, sizeof(buf)) < 0) {
468 perror("read_file(read_num)");
469 exit(EXIT_FAILURE);
470 }
471 if (strstr(buf, "DEVTYPE=disk")) {
472 /* Found it */
473 if (snprintf(finfo.dev_queue_read_ahead_path,
474 sizeof(finfo.dev_queue_read_ahead_path),
475 "/sys/dev/block/%d:%d/queue/read_ahead_kb",
476 major(path_stat.st_dev), minor(path_stat.st_dev))
477 >= sizeof(finfo.dev_queue_read_ahead_path)) {
478 printf("%s: Pathname is too long\n", __func__);
479 exit(EXIT_FAILURE);
480 }
481 return;
482 }
483 if (!strstr(buf, "DEVTYPE=partition")) {
484 printf("%s: Unknown device type: %s\n", __func__, path);
485 exit(EXIT_FAILURE);
486 }
487 /*
488 * Partition of block device - need to find actual device.
489 * Using naming convention that devnameN is partition of
490 * device devname.
491 */
492 str = strstr(buf, "DEVNAME=");
493 if (!str) {
494 printf("%s: Could not read: %s", __func__, path);
495 exit(EXIT_FAILURE);
496 }
497 str += 8;
498 end = str;
499 while (*end) {
500 if (isdigit(*end)) {
501 *end = '\0';
502 if (snprintf(finfo.dev_queue_read_ahead_path,
503 sizeof(finfo.dev_queue_read_ahead_path),
504 "/sys/block/%s/queue/read_ahead_kb",
505 str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
506 printf("%s: Pathname is too long\n", __func__);
507 exit(EXIT_FAILURE);
508 }
509 return;
510 }
511 ++end;
512 }
513 printf("%s: Could not read: %s\n", __func__, path);
514 exit(EXIT_FAILURE);
515 }
516
check_swap(void * addr,unsigned long size)517 static bool check_swap(void *addr, unsigned long size)
518 {
519 bool swap = false;
520 int ret;
521 FILE *fp;
522 char buffer[MAX_LINE_LENGTH];
523 char addr_pattern[MAX_LINE_LENGTH];
524
525 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
526 (unsigned long) addr);
527 if (ret >= MAX_LINE_LENGTH) {
528 printf("%s: Pattern is too long\n", __func__);
529 exit(EXIT_FAILURE);
530 }
531
532
533 fp = fopen(PID_SMAPS, "r");
534 if (!fp) {
535 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
536 exit(EXIT_FAILURE);
537 }
538 if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
539 goto err_out;
540
541 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
542 size >> 10);
543 if (ret >= MAX_LINE_LENGTH) {
544 printf("%s: Pattern is too long\n", __func__);
545 exit(EXIT_FAILURE);
546 }
547 /*
548 * Fetch the Swap: in the same block and check whether it got
549 * the expected number of hugeepages next.
550 */
551 if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
552 goto err_out;
553
554 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
555 goto err_out;
556
557 swap = true;
558 err_out:
559 fclose(fp);
560 return swap;
561 }
562
alloc_mapping(int nr)563 static void *alloc_mapping(int nr)
564 {
565 void *p;
566
567 p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
568 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
569 if (p != BASE_ADDR) {
570 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
571 exit(EXIT_FAILURE);
572 }
573
574 return p;
575 }
576
fill_memory(int * p,unsigned long start,unsigned long end)577 static void fill_memory(int *p, unsigned long start, unsigned long end)
578 {
579 int i;
580
581 for (i = start / page_size; i < end / page_size; i++)
582 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
583 }
584
585 /*
586 * MADV_COLLAPSE is a best-effort request and may fail if an internal
587 * resource is temporarily unavailable, in which case it will set errno to
588 * EAGAIN. In such a case, immediately reattempt the operation one more
589 * time.
590 */
madvise_collapse_retry(void * p,unsigned long size)591 static int madvise_collapse_retry(void *p, unsigned long size)
592 {
593 bool retry = true;
594 int ret;
595
596 retry:
597 ret = madvise(p, size, MADV_COLLAPSE);
598 if (ret && errno == EAGAIN && retry) {
599 retry = false;
600 goto retry;
601 }
602 return ret;
603 }
604
605 /*
606 * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
607 * validate_memory()'able contents.
608 */
alloc_hpage(struct mem_ops * ops)609 static void *alloc_hpage(struct mem_ops *ops)
610 {
611 void *p = ops->setup_area(1);
612
613 ops->fault(p, 0, hpage_pmd_size);
614
615 /*
616 * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
617 * The latter is ineligible for collapse by MADV_COLLAPSE
618 * while the former might cause MADV_COLLAPSE to race with
619 * khugepaged on low-load system (like a test machine), which
620 * would cause MADV_COLLAPSE to fail with EAGAIN.
621 */
622 printf("Allocate huge page...");
623 if (madvise_collapse_retry(p, hpage_pmd_size)) {
624 perror("madvise(MADV_COLLAPSE)");
625 exit(EXIT_FAILURE);
626 }
627 if (!ops->check_huge(p, 1)) {
628 perror("madvise(MADV_COLLAPSE)");
629 exit(EXIT_FAILURE);
630 }
631 if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
632 perror("madvise(MADV_HUGEPAGE)");
633 exit(EXIT_FAILURE);
634 }
635 success("OK");
636 return p;
637 }
638
validate_memory(int * p,unsigned long start,unsigned long end)639 static void validate_memory(int *p, unsigned long start, unsigned long end)
640 {
641 int i;
642
643 for (i = start / page_size; i < end / page_size; i++) {
644 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
645 printf("Page %d is corrupted: %#x\n",
646 i, p[i * page_size / sizeof(*p)]);
647 exit(EXIT_FAILURE);
648 }
649 }
650 }
651
anon_setup_area(int nr_hpages)652 static void *anon_setup_area(int nr_hpages)
653 {
654 return alloc_mapping(nr_hpages);
655 }
656
anon_cleanup_area(void * p,unsigned long size)657 static void anon_cleanup_area(void *p, unsigned long size)
658 {
659 munmap(p, size);
660 }
661
anon_fault(void * p,unsigned long start,unsigned long end)662 static void anon_fault(void *p, unsigned long start, unsigned long end)
663 {
664 fill_memory(p, start, end);
665 }
666
anon_check_huge(void * addr,int nr_hpages)667 static bool anon_check_huge(void *addr, int nr_hpages)
668 {
669 return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
670 }
671
file_setup_area(int nr_hpages)672 static void *file_setup_area(int nr_hpages)
673 {
674 int fd;
675 void *p;
676 unsigned long size;
677
678 unlink(finfo.path); /* Cleanup from previous failed tests */
679 printf("Creating %s for collapse%s...", finfo.path,
680 finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
681 fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
682 777);
683 if (fd < 0) {
684 perror("open()");
685 exit(EXIT_FAILURE);
686 }
687
688 size = nr_hpages * hpage_pmd_size;
689 p = alloc_mapping(nr_hpages);
690 fill_memory(p, 0, size);
691 write(fd, p, size);
692 close(fd);
693 munmap(p, size);
694 success("OK");
695
696 printf("Opening %s read only for collapse...", finfo.path);
697 finfo.fd = open(finfo.path, O_RDONLY, 777);
698 if (finfo.fd < 0) {
699 perror("open()");
700 exit(EXIT_FAILURE);
701 }
702 p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
703 MAP_PRIVATE, finfo.fd, 0);
704 if (p == MAP_FAILED || p != BASE_ADDR) {
705 perror("mmap()");
706 exit(EXIT_FAILURE);
707 }
708
709 /* Drop page cache */
710 write_file("/proc/sys/vm/drop_caches", "3", 2);
711 success("OK");
712 return p;
713 }
714
file_cleanup_area(void * p,unsigned long size)715 static void file_cleanup_area(void *p, unsigned long size)
716 {
717 munmap(p, size);
718 close(finfo.fd);
719 unlink(finfo.path);
720 }
721
file_fault(void * p,unsigned long start,unsigned long end)722 static void file_fault(void *p, unsigned long start, unsigned long end)
723 {
724 if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
725 perror("madvise(MADV_POPULATE_READ");
726 exit(EXIT_FAILURE);
727 }
728 }
729
file_check_huge(void * addr,int nr_hpages)730 static bool file_check_huge(void *addr, int nr_hpages)
731 {
732 switch (finfo.type) {
733 case VMA_FILE:
734 return check_huge_file(addr, nr_hpages, hpage_pmd_size);
735 case VMA_SHMEM:
736 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
737 default:
738 exit(EXIT_FAILURE);
739 return false;
740 }
741 }
742
shmem_setup_area(int nr_hpages)743 static void *shmem_setup_area(int nr_hpages)
744 {
745 void *p;
746 unsigned long size = nr_hpages * hpage_pmd_size;
747
748 finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
749 if (finfo.fd < 0) {
750 perror("memfd_create()");
751 exit(EXIT_FAILURE);
752 }
753 if (ftruncate(finfo.fd, size)) {
754 perror("ftruncate()");
755 exit(EXIT_FAILURE);
756 }
757 p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
758 0);
759 if (p != BASE_ADDR) {
760 perror("mmap()");
761 exit(EXIT_FAILURE);
762 }
763 return p;
764 }
765
shmem_cleanup_area(void * p,unsigned long size)766 static void shmem_cleanup_area(void *p, unsigned long size)
767 {
768 munmap(p, size);
769 close(finfo.fd);
770 }
771
shmem_check_huge(void * addr,int nr_hpages)772 static bool shmem_check_huge(void *addr, int nr_hpages)
773 {
774 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
775 }
776
777 static struct mem_ops __anon_ops = {
778 .setup_area = &anon_setup_area,
779 .cleanup_area = &anon_cleanup_area,
780 .fault = &anon_fault,
781 .check_huge = &anon_check_huge,
782 .name = "anon",
783 };
784
785 static struct mem_ops __file_ops = {
786 .setup_area = &file_setup_area,
787 .cleanup_area = &file_cleanup_area,
788 .fault = &file_fault,
789 .check_huge = &file_check_huge,
790 .name = "file",
791 };
792
793 static struct mem_ops __shmem_ops = {
794 .setup_area = &shmem_setup_area,
795 .cleanup_area = &shmem_cleanup_area,
796 .fault = &anon_fault,
797 .check_huge = &shmem_check_huge,
798 .name = "shmem",
799 };
800
__madvise_collapse(const char * msg,char * p,int nr_hpages,struct mem_ops * ops,bool expect)801 static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
802 struct mem_ops *ops, bool expect)
803 {
804 int ret;
805 struct settings settings = *current_settings();
806
807 printf("%s...", msg);
808
809 /*
810 * Prevent khugepaged interference and tests that MADV_COLLAPSE
811 * ignores /sys/kernel/mm/transparent_hugepage/enabled
812 */
813 settings.thp_enabled = THP_NEVER;
814 settings.shmem_enabled = SHMEM_NEVER;
815 push_settings(&settings);
816
817 /* Clear VM_NOHUGEPAGE */
818 madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
819 ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
820 if (((bool)ret) == expect)
821 fail("Fail: Bad return value");
822 else if (!ops->check_huge(p, expect ? nr_hpages : 0))
823 fail("Fail: check_huge()");
824 else
825 success("OK");
826
827 pop_settings();
828 }
829
madvise_collapse(const char * msg,char * p,int nr_hpages,struct mem_ops * ops,bool expect)830 static void madvise_collapse(const char *msg, char *p, int nr_hpages,
831 struct mem_ops *ops, bool expect)
832 {
833 /* Sanity check */
834 if (!ops->check_huge(p, 0)) {
835 printf("Unexpected huge page\n");
836 exit(EXIT_FAILURE);
837 }
838 __madvise_collapse(msg, p, nr_hpages, ops, expect);
839 }
840
841 #define TICK 500000
wait_for_scan(const char * msg,char * p,int nr_hpages,struct mem_ops * ops)842 static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
843 struct mem_ops *ops)
844 {
845 int full_scans;
846 int timeout = 6; /* 3 seconds */
847
848 /* Sanity check */
849 if (!ops->check_huge(p, 0)) {
850 printf("Unexpected huge page\n");
851 exit(EXIT_FAILURE);
852 }
853
854 madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
855
856 /* Wait until the second full_scan completed */
857 full_scans = read_num("khugepaged/full_scans") + 2;
858
859 printf("%s...", msg);
860 while (timeout--) {
861 if (ops->check_huge(p, nr_hpages))
862 break;
863 if (read_num("khugepaged/full_scans") >= full_scans)
864 break;
865 printf(".");
866 usleep(TICK);
867 }
868
869 madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
870
871 return timeout == -1;
872 }
873
khugepaged_collapse(const char * msg,char * p,int nr_hpages,struct mem_ops * ops,bool expect)874 static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
875 struct mem_ops *ops, bool expect)
876 {
877 if (wait_for_scan(msg, p, nr_hpages, ops)) {
878 if (expect)
879 fail("Timeout");
880 else
881 success("OK");
882 return;
883 }
884
885 /*
886 * For file and shmem memory, khugepaged only retracts pte entries after
887 * putting the new hugepage in the page cache. The hugepage must be
888 * subsequently refaulted to install the pmd mapping for the mm.
889 */
890 if (ops != &__anon_ops)
891 ops->fault(p, 0, nr_hpages * hpage_pmd_size);
892
893 if (ops->check_huge(p, expect ? nr_hpages : 0))
894 success("OK");
895 else
896 fail("Fail");
897 }
898
899 static struct collapse_context __khugepaged_context = {
900 .collapse = &khugepaged_collapse,
901 .enforce_pte_scan_limits = true,
902 .name = "khugepaged",
903 };
904
905 static struct collapse_context __madvise_context = {
906 .collapse = &madvise_collapse,
907 .enforce_pte_scan_limits = false,
908 .name = "madvise",
909 };
910
is_tmpfs(struct mem_ops * ops)911 static bool is_tmpfs(struct mem_ops *ops)
912 {
913 return ops == &__file_ops && finfo.type == VMA_SHMEM;
914 }
915
alloc_at_fault(void)916 static void alloc_at_fault(void)
917 {
918 struct settings settings = *current_settings();
919 char *p;
920
921 settings.thp_enabled = THP_ALWAYS;
922 push_settings(&settings);
923
924 p = alloc_mapping(1);
925 *p = 1;
926 printf("Allocate huge page on fault...");
927 if (check_huge_anon(p, 1, hpage_pmd_size))
928 success("OK");
929 else
930 fail("Fail");
931
932 pop_settings();
933
934 madvise(p, page_size, MADV_DONTNEED);
935 printf("Split huge PMD on MADV_DONTNEED...");
936 if (check_huge_anon(p, 0, hpage_pmd_size))
937 success("OK");
938 else
939 fail("Fail");
940 munmap(p, hpage_pmd_size);
941 }
942
collapse_full(struct collapse_context * c,struct mem_ops * ops)943 static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
944 {
945 void *p;
946 int nr_hpages = 4;
947 unsigned long size = nr_hpages * hpage_pmd_size;
948
949 p = ops->setup_area(nr_hpages);
950 ops->fault(p, 0, size);
951 c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
952 ops, true);
953 validate_memory(p, 0, size);
954 ops->cleanup_area(p, size);
955 }
956
collapse_empty(struct collapse_context * c,struct mem_ops * ops)957 static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
958 {
959 void *p;
960
961 p = ops->setup_area(1);
962 c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
963 ops->cleanup_area(p, hpage_pmd_size);
964 }
965
collapse_single_pte_entry(struct collapse_context * c,struct mem_ops * ops)966 static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
967 {
968 void *p;
969
970 p = ops->setup_area(1);
971 ops->fault(p, 0, page_size);
972 c->collapse("Collapse PTE table with single PTE entry present", p,
973 1, ops, true);
974 ops->cleanup_area(p, hpage_pmd_size);
975 }
976
collapse_max_ptes_none(struct collapse_context * c,struct mem_ops * ops)977 static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
978 {
979 int max_ptes_none = hpage_pmd_nr / 2;
980 struct settings settings = *current_settings();
981 void *p;
982
983 settings.khugepaged.max_ptes_none = max_ptes_none;
984 push_settings(&settings);
985
986 p = ops->setup_area(1);
987
988 if (is_tmpfs(ops)) {
989 /* shmem pages always in the page cache */
990 printf("tmpfs...");
991 skip("Skip");
992 goto skip;
993 }
994
995 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
996 c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
997 ops, !c->enforce_pte_scan_limits);
998 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
999
1000 if (c->enforce_pte_scan_limits) {
1001 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
1002 c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
1003 true);
1004 validate_memory(p, 0,
1005 (hpage_pmd_nr - max_ptes_none) * page_size);
1006 }
1007 skip:
1008 ops->cleanup_area(p, hpage_pmd_size);
1009 pop_settings();
1010 }
1011
collapse_swapin_single_pte(struct collapse_context * c,struct mem_ops * ops)1012 static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
1013 {
1014 void *p;
1015
1016 p = ops->setup_area(1);
1017 ops->fault(p, 0, hpage_pmd_size);
1018
1019 printf("Swapout one page...");
1020 if (madvise(p, page_size, MADV_PAGEOUT)) {
1021 perror("madvise(MADV_PAGEOUT)");
1022 exit(EXIT_FAILURE);
1023 }
1024 if (check_swap(p, page_size)) {
1025 success("OK");
1026 } else {
1027 fail("Fail");
1028 goto out;
1029 }
1030
1031 c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
1032 true);
1033 validate_memory(p, 0, hpage_pmd_size);
1034 out:
1035 ops->cleanup_area(p, hpage_pmd_size);
1036 }
1037
collapse_max_ptes_swap(struct collapse_context * c,struct mem_ops * ops)1038 static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
1039 {
1040 int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
1041 void *p;
1042
1043 p = ops->setup_area(1);
1044 ops->fault(p, 0, hpage_pmd_size);
1045
1046 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
1047 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
1048 perror("madvise(MADV_PAGEOUT)");
1049 exit(EXIT_FAILURE);
1050 }
1051 if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
1052 success("OK");
1053 } else {
1054 fail("Fail");
1055 goto out;
1056 }
1057
1058 c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
1059 !c->enforce_pte_scan_limits);
1060 validate_memory(p, 0, hpage_pmd_size);
1061
1062 if (c->enforce_pte_scan_limits) {
1063 ops->fault(p, 0, hpage_pmd_size);
1064 printf("Swapout %d of %d pages...", max_ptes_swap,
1065 hpage_pmd_nr);
1066 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
1067 perror("madvise(MADV_PAGEOUT)");
1068 exit(EXIT_FAILURE);
1069 }
1070 if (check_swap(p, max_ptes_swap * page_size)) {
1071 success("OK");
1072 } else {
1073 fail("Fail");
1074 goto out;
1075 }
1076
1077 c->collapse("Collapse with max_ptes_swap pages swapped out", p,
1078 1, ops, true);
1079 validate_memory(p, 0, hpage_pmd_size);
1080 }
1081 out:
1082 ops->cleanup_area(p, hpage_pmd_size);
1083 }
1084
collapse_single_pte_entry_compound(struct collapse_context * c,struct mem_ops * ops)1085 static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
1086 {
1087 void *p;
1088
1089 p = alloc_hpage(ops);
1090
1091 if (is_tmpfs(ops)) {
1092 /* MADV_DONTNEED won't evict tmpfs pages */
1093 printf("tmpfs...");
1094 skip("Skip");
1095 goto skip;
1096 }
1097
1098 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1099 printf("Split huge page leaving single PTE mapping compound page...");
1100 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
1101 if (ops->check_huge(p, 0))
1102 success("OK");
1103 else
1104 fail("Fail");
1105
1106 c->collapse("Collapse PTE table with single PTE mapping compound page",
1107 p, 1, ops, true);
1108 validate_memory(p, 0, page_size);
1109 skip:
1110 ops->cleanup_area(p, hpage_pmd_size);
1111 }
1112
collapse_full_of_compound(struct collapse_context * c,struct mem_ops * ops)1113 static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
1114 {
1115 void *p;
1116
1117 p = alloc_hpage(ops);
1118 printf("Split huge page leaving single PTE page table full of compound pages...");
1119 madvise(p, page_size, MADV_NOHUGEPAGE);
1120 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1121 if (ops->check_huge(p, 0))
1122 success("OK");
1123 else
1124 fail("Fail");
1125
1126 c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
1127 true);
1128 validate_memory(p, 0, hpage_pmd_size);
1129 ops->cleanup_area(p, hpage_pmd_size);
1130 }
1131
collapse_compound_extreme(struct collapse_context * c,struct mem_ops * ops)1132 static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
1133 {
1134 void *p;
1135 int i;
1136
1137 p = ops->setup_area(1);
1138 for (i = 0; i < hpage_pmd_nr; i++) {
1139 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
1140 i + 1, hpage_pmd_nr);
1141
1142 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
1143 ops->fault(BASE_ADDR, 0, hpage_pmd_size);
1144 if (!ops->check_huge(BASE_ADDR, 1)) {
1145 printf("Failed to allocate huge page\n");
1146 exit(EXIT_FAILURE);
1147 }
1148 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
1149
1150 p = mremap(BASE_ADDR - i * page_size,
1151 i * page_size + hpage_pmd_size,
1152 (i + 1) * page_size,
1153 MREMAP_MAYMOVE | MREMAP_FIXED,
1154 BASE_ADDR + 2 * hpage_pmd_size);
1155 if (p == MAP_FAILED) {
1156 perror("mremap+unmap");
1157 exit(EXIT_FAILURE);
1158 }
1159
1160 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
1161 (i + 1) * page_size,
1162 (i + 1) * page_size + hpage_pmd_size,
1163 MREMAP_MAYMOVE | MREMAP_FIXED,
1164 BASE_ADDR - (i + 1) * page_size);
1165 if (p == MAP_FAILED) {
1166 perror("mremap+alloc");
1167 exit(EXIT_FAILURE);
1168 }
1169 }
1170
1171 ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
1172 ops->fault(p, 0, hpage_pmd_size);
1173 if (!ops->check_huge(p, 1))
1174 success("OK");
1175 else
1176 fail("Fail");
1177
1178 c->collapse("Collapse PTE table full of different compound pages", p, 1,
1179 ops, true);
1180
1181 validate_memory(p, 0, hpage_pmd_size);
1182 ops->cleanup_area(p, hpage_pmd_size);
1183 }
1184
collapse_fork(struct collapse_context * c,struct mem_ops * ops)1185 static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
1186 {
1187 int wstatus;
1188 void *p;
1189
1190 p = ops->setup_area(1);
1191
1192 printf("Allocate small page...");
1193 ops->fault(p, 0, page_size);
1194 if (ops->check_huge(p, 0))
1195 success("OK");
1196 else
1197 fail("Fail");
1198
1199 printf("Share small page over fork()...");
1200 if (!fork()) {
1201 /* Do not touch settings on child exit */
1202 skip_settings_restore = true;
1203 exit_status = 0;
1204
1205 if (ops->check_huge(p, 0))
1206 success("OK");
1207 else
1208 fail("Fail");
1209
1210 ops->fault(p, page_size, 2 * page_size);
1211 c->collapse("Collapse PTE table with single page shared with parent process",
1212 p, 1, ops, true);
1213
1214 validate_memory(p, 0, page_size);
1215 ops->cleanup_area(p, hpage_pmd_size);
1216 exit(exit_status);
1217 }
1218
1219 wait(&wstatus);
1220 exit_status += WEXITSTATUS(wstatus);
1221
1222 printf("Check if parent still has small page...");
1223 if (ops->check_huge(p, 0))
1224 success("OK");
1225 else
1226 fail("Fail");
1227 validate_memory(p, 0, page_size);
1228 ops->cleanup_area(p, hpage_pmd_size);
1229 }
1230
collapse_fork_compound(struct collapse_context * c,struct mem_ops * ops)1231 static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
1232 {
1233 int wstatus;
1234 void *p;
1235
1236 p = alloc_hpage(ops);
1237 printf("Share huge page over fork()...");
1238 if (!fork()) {
1239 /* Do not touch settings on child exit */
1240 skip_settings_restore = true;
1241 exit_status = 0;
1242
1243 if (ops->check_huge(p, 1))
1244 success("OK");
1245 else
1246 fail("Fail");
1247
1248 printf("Split huge page PMD in child process...");
1249 madvise(p, page_size, MADV_NOHUGEPAGE);
1250 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1251 if (ops->check_huge(p, 0))
1252 success("OK");
1253 else
1254 fail("Fail");
1255 ops->fault(p, 0, page_size);
1256
1257 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
1258 c->collapse("Collapse PTE table full of compound pages in child",
1259 p, 1, ops, true);
1260 write_num("khugepaged/max_ptes_shared",
1261 current_settings()->khugepaged.max_ptes_shared);
1262
1263 validate_memory(p, 0, hpage_pmd_size);
1264 ops->cleanup_area(p, hpage_pmd_size);
1265 exit(exit_status);
1266 }
1267
1268 wait(&wstatus);
1269 exit_status += WEXITSTATUS(wstatus);
1270
1271 printf("Check if parent still has huge page...");
1272 if (ops->check_huge(p, 1))
1273 success("OK");
1274 else
1275 fail("Fail");
1276 validate_memory(p, 0, hpage_pmd_size);
1277 ops->cleanup_area(p, hpage_pmd_size);
1278 }
1279
collapse_max_ptes_shared(struct collapse_context * c,struct mem_ops * ops)1280 static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
1281 {
1282 int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
1283 int wstatus;
1284 void *p;
1285
1286 p = alloc_hpage(ops);
1287 printf("Share huge page over fork()...");
1288 if (!fork()) {
1289 /* Do not touch settings on child exit */
1290 skip_settings_restore = true;
1291 exit_status = 0;
1292
1293 if (ops->check_huge(p, 1))
1294 success("OK");
1295 else
1296 fail("Fail");
1297
1298 printf("Trigger CoW on page %d of %d...",
1299 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
1300 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
1301 if (ops->check_huge(p, 0))
1302 success("OK");
1303 else
1304 fail("Fail");
1305
1306 c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
1307 1, ops, !c->enforce_pte_scan_limits);
1308
1309 if (c->enforce_pte_scan_limits) {
1310 printf("Trigger CoW on page %d of %d...",
1311 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
1312 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
1313 page_size);
1314 if (ops->check_huge(p, 0))
1315 success("OK");
1316 else
1317 fail("Fail");
1318
1319 c->collapse("Collapse with max_ptes_shared PTEs shared",
1320 p, 1, ops, true);
1321 }
1322
1323 validate_memory(p, 0, hpage_pmd_size);
1324 ops->cleanup_area(p, hpage_pmd_size);
1325 exit(exit_status);
1326 }
1327
1328 wait(&wstatus);
1329 exit_status += WEXITSTATUS(wstatus);
1330
1331 printf("Check if parent still has huge page...");
1332 if (ops->check_huge(p, 1))
1333 success("OK");
1334 else
1335 fail("Fail");
1336 validate_memory(p, 0, hpage_pmd_size);
1337 ops->cleanup_area(p, hpage_pmd_size);
1338 }
1339
madvise_collapse_existing_thps(struct collapse_context * c,struct mem_ops * ops)1340 static void madvise_collapse_existing_thps(struct collapse_context *c,
1341 struct mem_ops *ops)
1342 {
1343 void *p;
1344
1345 p = ops->setup_area(1);
1346 ops->fault(p, 0, hpage_pmd_size);
1347 c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
1348 validate_memory(p, 0, hpage_pmd_size);
1349
1350 /* c->collapse() will find a hugepage and complain - call directly. */
1351 __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
1352 validate_memory(p, 0, hpage_pmd_size);
1353 ops->cleanup_area(p, hpage_pmd_size);
1354 }
1355
1356 /*
1357 * Test race with khugepaged where page tables have been retracted and
1358 * pmd cleared.
1359 */
madvise_retracted_page_tables(struct collapse_context * c,struct mem_ops * ops)1360 static void madvise_retracted_page_tables(struct collapse_context *c,
1361 struct mem_ops *ops)
1362 {
1363 void *p;
1364 int nr_hpages = 1;
1365 unsigned long size = nr_hpages * hpage_pmd_size;
1366
1367 p = ops->setup_area(nr_hpages);
1368 ops->fault(p, 0, size);
1369
1370 /* Let khugepaged collapse and leave pmd cleared */
1371 if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
1372 ops)) {
1373 fail("Timeout");
1374 return;
1375 }
1376 success("OK");
1377 c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
1378 true);
1379 validate_memory(p, 0, size);
1380 ops->cleanup_area(p, size);
1381 }
1382
usage(void)1383 static void usage(void)
1384 {
1385 fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n");
1386 fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
1387 fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
1388 fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
1389 fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
1390 fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
1391 fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
1392 fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
1393 fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
1394 exit(1);
1395 }
1396
parse_test_type(int argc,const char ** argv)1397 static void parse_test_type(int argc, const char **argv)
1398 {
1399 char *buf;
1400 const char *token;
1401
1402 if (argc == 1) {
1403 /* Backwards compatibility */
1404 khugepaged_context = &__khugepaged_context;
1405 madvise_context = &__madvise_context;
1406 anon_ops = &__anon_ops;
1407 return;
1408 }
1409
1410 buf = strdup(argv[1]);
1411 token = strsep(&buf, ":");
1412
1413 if (!strcmp(token, "all")) {
1414 khugepaged_context = &__khugepaged_context;
1415 madvise_context = &__madvise_context;
1416 } else if (!strcmp(token, "khugepaged")) {
1417 khugepaged_context = &__khugepaged_context;
1418 } else if (!strcmp(token, "madvise")) {
1419 madvise_context = &__madvise_context;
1420 } else {
1421 usage();
1422 }
1423
1424 if (!buf)
1425 usage();
1426
1427 if (!strcmp(buf, "all")) {
1428 file_ops = &__file_ops;
1429 anon_ops = &__anon_ops;
1430 shmem_ops = &__shmem_ops;
1431 } else if (!strcmp(buf, "anon")) {
1432 anon_ops = &__anon_ops;
1433 } else if (!strcmp(buf, "file")) {
1434 file_ops = &__file_ops;
1435 } else if (!strcmp(buf, "shmem")) {
1436 shmem_ops = &__shmem_ops;
1437 } else {
1438 usage();
1439 }
1440
1441 if (!file_ops)
1442 return;
1443
1444 if (argc != 3)
1445 usage();
1446 }
1447
main(int argc,const char ** argv)1448 int main(int argc, const char **argv)
1449 {
1450 struct settings default_settings = {
1451 .thp_enabled = THP_MADVISE,
1452 .thp_defrag = THP_DEFRAG_ALWAYS,
1453 .shmem_enabled = SHMEM_ADVISE,
1454 .use_zero_page = 0,
1455 .khugepaged = {
1456 .defrag = 1,
1457 .alloc_sleep_millisecs = 10,
1458 .scan_sleep_millisecs = 10,
1459 },
1460 /*
1461 * When testing file-backed memory, the collapse path
1462 * looks at how many pages are found in the page cache, not
1463 * what pages are mapped. Disable read ahead optimization so
1464 * pages don't find their way into the page cache unless
1465 * we mem_ops->fault() them in.
1466 */
1467 .read_ahead_kb = 0,
1468 };
1469
1470 parse_test_type(argc, argv);
1471
1472 if (file_ops)
1473 get_finfo(argv[2]);
1474
1475 setbuf(stdout, NULL);
1476
1477 page_size = getpagesize();
1478 hpage_pmd_size = read_pmd_pagesize();
1479 hpage_pmd_nr = hpage_pmd_size / page_size;
1480
1481 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1482 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1483 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1484 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1485
1486 save_settings();
1487 push_settings(&default_settings);
1488
1489 alloc_at_fault();
1490
1491 #define TEST(t, c, o) do { \
1492 if (c && o) { \
1493 printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
1494 t(c, o); \
1495 } \
1496 } while (0)
1497
1498 TEST(collapse_full, khugepaged_context, anon_ops);
1499 TEST(collapse_full, khugepaged_context, file_ops);
1500 TEST(collapse_full, khugepaged_context, shmem_ops);
1501 TEST(collapse_full, madvise_context, anon_ops);
1502 TEST(collapse_full, madvise_context, file_ops);
1503 TEST(collapse_full, madvise_context, shmem_ops);
1504
1505 TEST(collapse_empty, khugepaged_context, anon_ops);
1506 TEST(collapse_empty, madvise_context, anon_ops);
1507
1508 TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
1509 TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
1510 TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
1511 TEST(collapse_single_pte_entry, madvise_context, anon_ops);
1512 TEST(collapse_single_pte_entry, madvise_context, file_ops);
1513 TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
1514
1515 TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
1516 TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
1517 TEST(collapse_max_ptes_none, madvise_context, anon_ops);
1518 TEST(collapse_max_ptes_none, madvise_context, file_ops);
1519
1520 TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
1521 TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
1522 TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
1523 TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
1524
1525 TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
1526 TEST(collapse_full_of_compound, khugepaged_context, file_ops);
1527 TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
1528 TEST(collapse_full_of_compound, madvise_context, anon_ops);
1529 TEST(collapse_full_of_compound, madvise_context, file_ops);
1530 TEST(collapse_full_of_compound, madvise_context, shmem_ops);
1531
1532 TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
1533 TEST(collapse_compound_extreme, madvise_context, anon_ops);
1534
1535 TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
1536 TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
1537
1538 TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
1539 TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
1540
1541 TEST(collapse_fork, khugepaged_context, anon_ops);
1542 TEST(collapse_fork, madvise_context, anon_ops);
1543
1544 TEST(collapse_fork_compound, khugepaged_context, anon_ops);
1545 TEST(collapse_fork_compound, madvise_context, anon_ops);
1546
1547 TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
1548 TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
1549
1550 TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
1551 TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
1552 TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
1553
1554 TEST(madvise_retracted_page_tables, madvise_context, file_ops);
1555 TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
1556
1557 restore_settings(0);
1558 }
1559