1 #define _GNU_SOURCE
2 #include <fcntl.h>
3 #include <limits.h>
4 #include <signal.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <unistd.h>
10
11 #include <sys/mman.h>
12 #include <sys/wait.h>
13
14 #ifndef MADV_PAGEOUT
15 #define MADV_PAGEOUT 21
16 #endif
17
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size;
20 static unsigned long page_size;
21 static int hpage_pmd_nr;
22
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
25
26 enum thp_enabled {
27 THP_ALWAYS,
28 THP_MADVISE,
29 THP_NEVER,
30 };
31
32 static const char *thp_enabled_strings[] = {
33 "always",
34 "madvise",
35 "never",
36 NULL
37 };
38
39 enum thp_defrag {
40 THP_DEFRAG_ALWAYS,
41 THP_DEFRAG_DEFER,
42 THP_DEFRAG_DEFER_MADVISE,
43 THP_DEFRAG_MADVISE,
44 THP_DEFRAG_NEVER,
45 };
46
47 static const char *thp_defrag_strings[] = {
48 "always",
49 "defer",
50 "defer+madvise",
51 "madvise",
52 "never",
53 NULL
54 };
55
56 enum shmem_enabled {
57 SHMEM_ALWAYS,
58 SHMEM_WITHIN_SIZE,
59 SHMEM_ADVISE,
60 SHMEM_NEVER,
61 SHMEM_DENY,
62 SHMEM_FORCE,
63 };
64
65 static const char *shmem_enabled_strings[] = {
66 "always",
67 "within_size",
68 "advise",
69 "never",
70 "deny",
71 "force",
72 NULL
73 };
74
75 struct khugepaged_settings {
76 bool defrag;
77 unsigned int alloc_sleep_millisecs;
78 unsigned int scan_sleep_millisecs;
79 unsigned int max_ptes_none;
80 unsigned int max_ptes_swap;
81 unsigned int max_ptes_shared;
82 unsigned long pages_to_scan;
83 };
84
85 struct settings {
86 enum thp_enabled thp_enabled;
87 enum thp_defrag thp_defrag;
88 enum shmem_enabled shmem_enabled;
89 bool use_zero_page;
90 struct khugepaged_settings khugepaged;
91 };
92
93 static struct settings default_settings = {
94 .thp_enabled = THP_MADVISE,
95 .thp_defrag = THP_DEFRAG_ALWAYS,
96 .shmem_enabled = SHMEM_NEVER,
97 .use_zero_page = 0,
98 .khugepaged = {
99 .defrag = 1,
100 .alloc_sleep_millisecs = 10,
101 .scan_sleep_millisecs = 10,
102 },
103 };
104
105 static struct settings saved_settings;
106 static bool skip_settings_restore;
107
108 static int exit_status;
109
success(const char * msg)110 static void success(const char *msg)
111 {
112 printf(" \e[32m%s\e[0m\n", msg);
113 }
114
fail(const char * msg)115 static void fail(const char *msg)
116 {
117 printf(" \e[31m%s\e[0m\n", msg);
118 exit_status++;
119 }
120
read_file(const char * path,char * buf,size_t buflen)121 static int read_file(const char *path, char *buf, size_t buflen)
122 {
123 int fd;
124 ssize_t numread;
125
126 fd = open(path, O_RDONLY);
127 if (fd == -1)
128 return 0;
129
130 numread = read(fd, buf, buflen - 1);
131 if (numread < 1) {
132 close(fd);
133 return 0;
134 }
135
136 buf[numread] = '\0';
137 close(fd);
138
139 return (unsigned int) numread;
140 }
141
write_file(const char * path,const char * buf,size_t buflen)142 static int write_file(const char *path, const char *buf, size_t buflen)
143 {
144 int fd;
145 ssize_t numwritten;
146
147 fd = open(path, O_WRONLY);
148 if (fd == -1)
149 return 0;
150
151 numwritten = write(fd, buf, buflen - 1);
152 close(fd);
153 if (numwritten < 1)
154 return 0;
155
156 return (unsigned int) numwritten;
157 }
158
read_string(const char * name,const char * strings[])159 static int read_string(const char *name, const char *strings[])
160 {
161 char path[PATH_MAX];
162 char buf[256];
163 char *c;
164 int ret;
165
166 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
167 if (ret >= PATH_MAX) {
168 printf("%s: Pathname is too long\n", __func__);
169 exit(EXIT_FAILURE);
170 }
171
172 if (!read_file(path, buf, sizeof(buf))) {
173 perror(path);
174 exit(EXIT_FAILURE);
175 }
176
177 c = strchr(buf, '[');
178 if (!c) {
179 printf("%s: Parse failure\n", __func__);
180 exit(EXIT_FAILURE);
181 }
182
183 c++;
184 memmove(buf, c, sizeof(buf) - (c - buf));
185
186 c = strchr(buf, ']');
187 if (!c) {
188 printf("%s: Parse failure\n", __func__);
189 exit(EXIT_FAILURE);
190 }
191 *c = '\0';
192
193 ret = 0;
194 while (strings[ret]) {
195 if (!strcmp(strings[ret], buf))
196 return ret;
197 ret++;
198 }
199
200 printf("Failed to parse %s\n", name);
201 exit(EXIT_FAILURE);
202 }
203
write_string(const char * name,const char * val)204 static void write_string(const char *name, const char *val)
205 {
206 char path[PATH_MAX];
207 int ret;
208
209 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
210 if (ret >= PATH_MAX) {
211 printf("%s: Pathname is too long\n", __func__);
212 exit(EXIT_FAILURE);
213 }
214
215 if (!write_file(path, val, strlen(val) + 1)) {
216 perror(path);
217 exit(EXIT_FAILURE);
218 }
219 }
220
read_num(const char * name)221 static const unsigned long read_num(const char *name)
222 {
223 char path[PATH_MAX];
224 char buf[21];
225 int ret;
226
227 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
228 if (ret >= PATH_MAX) {
229 printf("%s: Pathname is too long\n", __func__);
230 exit(EXIT_FAILURE);
231 }
232
233 ret = read_file(path, buf, sizeof(buf));
234 if (ret < 0) {
235 perror("read_file(read_num)");
236 exit(EXIT_FAILURE);
237 }
238
239 return strtoul(buf, NULL, 10);
240 }
241
write_num(const char * name,unsigned long num)242 static void write_num(const char *name, unsigned long num)
243 {
244 char path[PATH_MAX];
245 char buf[21];
246 int ret;
247
248 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
249 if (ret >= PATH_MAX) {
250 printf("%s: Pathname is too long\n", __func__);
251 exit(EXIT_FAILURE);
252 }
253
254 sprintf(buf, "%ld", num);
255 if (!write_file(path, buf, strlen(buf) + 1)) {
256 perror(path);
257 exit(EXIT_FAILURE);
258 }
259 }
260
write_settings(struct settings * settings)261 static void write_settings(struct settings *settings)
262 {
263 struct khugepaged_settings *khugepaged = &settings->khugepaged;
264
265 write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
266 write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
267 write_string("shmem_enabled",
268 shmem_enabled_strings[settings->shmem_enabled]);
269 write_num("use_zero_page", settings->use_zero_page);
270
271 write_num("khugepaged/defrag", khugepaged->defrag);
272 write_num("khugepaged/alloc_sleep_millisecs",
273 khugepaged->alloc_sleep_millisecs);
274 write_num("khugepaged/scan_sleep_millisecs",
275 khugepaged->scan_sleep_millisecs);
276 write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
277 write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
278 write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
279 write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
280 }
281
restore_settings(int sig)282 static void restore_settings(int sig)
283 {
284 if (skip_settings_restore)
285 goto out;
286
287 printf("Restore THP and khugepaged settings...");
288 write_settings(&saved_settings);
289 success("OK");
290 if (sig)
291 exit(EXIT_FAILURE);
292 out:
293 exit(exit_status);
294 }
295
save_settings(void)296 static void save_settings(void)
297 {
298 printf("Save THP and khugepaged settings...");
299 saved_settings = (struct settings) {
300 .thp_enabled = read_string("enabled", thp_enabled_strings),
301 .thp_defrag = read_string("defrag", thp_defrag_strings),
302 .shmem_enabled =
303 read_string("shmem_enabled", shmem_enabled_strings),
304 .use_zero_page = read_num("use_zero_page"),
305 };
306 saved_settings.khugepaged = (struct khugepaged_settings) {
307 .defrag = read_num("khugepaged/defrag"),
308 .alloc_sleep_millisecs =
309 read_num("khugepaged/alloc_sleep_millisecs"),
310 .scan_sleep_millisecs =
311 read_num("khugepaged/scan_sleep_millisecs"),
312 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
313 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
314 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
315 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
316 };
317 success("OK");
318
319 signal(SIGTERM, restore_settings);
320 signal(SIGINT, restore_settings);
321 signal(SIGHUP, restore_settings);
322 signal(SIGQUIT, restore_settings);
323 }
324
adjust_settings(void)325 static void adjust_settings(void)
326 {
327
328 printf("Adjust settings...");
329 write_settings(&default_settings);
330 success("OK");
331 }
332
333 #define MAX_LINE_LENGTH 500
334
check_for_pattern(FILE * fp,char * pattern,char * buf)335 static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
336 {
337 while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
338 if (!strncmp(buf, pattern, strlen(pattern)))
339 return true;
340 }
341 return false;
342 }
343
check_huge(void * addr)344 static bool check_huge(void *addr)
345 {
346 bool thp = false;
347 int ret;
348 FILE *fp;
349 char buffer[MAX_LINE_LENGTH];
350 char addr_pattern[MAX_LINE_LENGTH];
351
352 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
353 (unsigned long) addr);
354 if (ret >= MAX_LINE_LENGTH) {
355 printf("%s: Pattern is too long\n", __func__);
356 exit(EXIT_FAILURE);
357 }
358
359
360 fp = fopen(PID_SMAPS, "r");
361 if (!fp) {
362 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
363 exit(EXIT_FAILURE);
364 }
365 if (!check_for_pattern(fp, addr_pattern, buffer))
366 goto err_out;
367
368 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
369 hpage_pmd_size >> 10);
370 if (ret >= MAX_LINE_LENGTH) {
371 printf("%s: Pattern is too long\n", __func__);
372 exit(EXIT_FAILURE);
373 }
374 /*
375 * Fetch the AnonHugePages: in the same block and check whether it got
376 * the expected number of hugeepages next.
377 */
378 if (!check_for_pattern(fp, "AnonHugePages:", buffer))
379 goto err_out;
380
381 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
382 goto err_out;
383
384 thp = true;
385 err_out:
386 fclose(fp);
387 return thp;
388 }
389
390
check_swap(void * addr,unsigned long size)391 static bool check_swap(void *addr, unsigned long size)
392 {
393 bool swap = false;
394 int ret;
395 FILE *fp;
396 char buffer[MAX_LINE_LENGTH];
397 char addr_pattern[MAX_LINE_LENGTH];
398
399 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
400 (unsigned long) addr);
401 if (ret >= MAX_LINE_LENGTH) {
402 printf("%s: Pattern is too long\n", __func__);
403 exit(EXIT_FAILURE);
404 }
405
406
407 fp = fopen(PID_SMAPS, "r");
408 if (!fp) {
409 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
410 exit(EXIT_FAILURE);
411 }
412 if (!check_for_pattern(fp, addr_pattern, buffer))
413 goto err_out;
414
415 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
416 size >> 10);
417 if (ret >= MAX_LINE_LENGTH) {
418 printf("%s: Pattern is too long\n", __func__);
419 exit(EXIT_FAILURE);
420 }
421 /*
422 * Fetch the Swap: in the same block and check whether it got
423 * the expected number of hugeepages next.
424 */
425 if (!check_for_pattern(fp, "Swap:", buffer))
426 goto err_out;
427
428 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
429 goto err_out;
430
431 swap = true;
432 err_out:
433 fclose(fp);
434 return swap;
435 }
436
alloc_mapping(void)437 static void *alloc_mapping(void)
438 {
439 void *p;
440
441 p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
442 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
443 if (p != BASE_ADDR) {
444 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
445 exit(EXIT_FAILURE);
446 }
447
448 return p;
449 }
450
fill_memory(int * p,unsigned long start,unsigned long end)451 static void fill_memory(int *p, unsigned long start, unsigned long end)
452 {
453 int i;
454
455 for (i = start / page_size; i < end / page_size; i++)
456 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
457 }
458
validate_memory(int * p,unsigned long start,unsigned long end)459 static void validate_memory(int *p, unsigned long start, unsigned long end)
460 {
461 int i;
462
463 for (i = start / page_size; i < end / page_size; i++) {
464 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
465 printf("Page %d is corrupted: %#x\n",
466 i, p[i * page_size / sizeof(*p)]);
467 exit(EXIT_FAILURE);
468 }
469 }
470 }
471
472 #define TICK 500000
wait_for_scan(const char * msg,char * p)473 static bool wait_for_scan(const char *msg, char *p)
474 {
475 int full_scans;
476 int timeout = 6; /* 3 seconds */
477
478 /* Sanity check */
479 if (check_huge(p)) {
480 printf("Unexpected huge page\n");
481 exit(EXIT_FAILURE);
482 }
483
484 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
485
486 /* Wait until the second full_scan completed */
487 full_scans = read_num("khugepaged/full_scans") + 2;
488
489 printf("%s...", msg);
490 while (timeout--) {
491 if (check_huge(p))
492 break;
493 if (read_num("khugepaged/full_scans") >= full_scans)
494 break;
495 printf(".");
496 usleep(TICK);
497 }
498
499 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
500
501 return timeout == -1;
502 }
503
alloc_at_fault(void)504 static void alloc_at_fault(void)
505 {
506 struct settings settings = default_settings;
507 char *p;
508
509 settings.thp_enabled = THP_ALWAYS;
510 write_settings(&settings);
511
512 p = alloc_mapping();
513 *p = 1;
514 printf("Allocate huge page on fault...");
515 if (check_huge(p))
516 success("OK");
517 else
518 fail("Fail");
519
520 write_settings(&default_settings);
521
522 madvise(p, page_size, MADV_DONTNEED);
523 printf("Split huge PMD on MADV_DONTNEED...");
524 if (!check_huge(p))
525 success("OK");
526 else
527 fail("Fail");
528 munmap(p, hpage_pmd_size);
529 }
530
collapse_full(void)531 static void collapse_full(void)
532 {
533 void *p;
534
535 p = alloc_mapping();
536 fill_memory(p, 0, hpage_pmd_size);
537 if (wait_for_scan("Collapse fully populated PTE table", p))
538 fail("Timeout");
539 else if (check_huge(p))
540 success("OK");
541 else
542 fail("Fail");
543 validate_memory(p, 0, hpage_pmd_size);
544 munmap(p, hpage_pmd_size);
545 }
546
collapse_empty(void)547 static void collapse_empty(void)
548 {
549 void *p;
550
551 p = alloc_mapping();
552 if (wait_for_scan("Do not collapse empty PTE table", p))
553 fail("Timeout");
554 else if (check_huge(p))
555 fail("Fail");
556 else
557 success("OK");
558 munmap(p, hpage_pmd_size);
559 }
560
collapse_single_pte_entry(void)561 static void collapse_single_pte_entry(void)
562 {
563 void *p;
564
565 p = alloc_mapping();
566 fill_memory(p, 0, page_size);
567 if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
568 fail("Timeout");
569 else if (check_huge(p))
570 success("OK");
571 else
572 fail("Fail");
573 validate_memory(p, 0, page_size);
574 munmap(p, hpage_pmd_size);
575 }
576
collapse_max_ptes_none(void)577 static void collapse_max_ptes_none(void)
578 {
579 int max_ptes_none = hpage_pmd_nr / 2;
580 struct settings settings = default_settings;
581 void *p;
582
583 settings.khugepaged.max_ptes_none = max_ptes_none;
584 write_settings(&settings);
585
586 p = alloc_mapping();
587
588 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
589 if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
590 fail("Timeout");
591 else if (check_huge(p))
592 fail("Fail");
593 else
594 success("OK");
595 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
596
597 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
598 if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
599 fail("Timeout");
600 else if (check_huge(p))
601 success("OK");
602 else
603 fail("Fail");
604 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
605
606 munmap(p, hpage_pmd_size);
607 write_settings(&default_settings);
608 }
609
collapse_swapin_single_pte(void)610 static void collapse_swapin_single_pte(void)
611 {
612 void *p;
613 p = alloc_mapping();
614 fill_memory(p, 0, hpage_pmd_size);
615
616 printf("Swapout one page...");
617 if (madvise(p, page_size, MADV_PAGEOUT)) {
618 perror("madvise(MADV_PAGEOUT)");
619 exit(EXIT_FAILURE);
620 }
621 if (check_swap(p, page_size)) {
622 success("OK");
623 } else {
624 fail("Fail");
625 goto out;
626 }
627
628 if (wait_for_scan("Collapse with swapping in single PTE entry", p))
629 fail("Timeout");
630 else if (check_huge(p))
631 success("OK");
632 else
633 fail("Fail");
634 validate_memory(p, 0, hpage_pmd_size);
635 out:
636 munmap(p, hpage_pmd_size);
637 }
638
collapse_max_ptes_swap(void)639 static void collapse_max_ptes_swap(void)
640 {
641 int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
642 void *p;
643
644 p = alloc_mapping();
645
646 fill_memory(p, 0, hpage_pmd_size);
647 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
648 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
649 perror("madvise(MADV_PAGEOUT)");
650 exit(EXIT_FAILURE);
651 }
652 if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
653 success("OK");
654 } else {
655 fail("Fail");
656 goto out;
657 }
658
659 if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
660 fail("Timeout");
661 else if (check_huge(p))
662 fail("Fail");
663 else
664 success("OK");
665 validate_memory(p, 0, hpage_pmd_size);
666
667 fill_memory(p, 0, hpage_pmd_size);
668 printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
669 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
670 perror("madvise(MADV_PAGEOUT)");
671 exit(EXIT_FAILURE);
672 }
673 if (check_swap(p, max_ptes_swap * page_size)) {
674 success("OK");
675 } else {
676 fail("Fail");
677 goto out;
678 }
679
680 if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
681 fail("Timeout");
682 else if (check_huge(p))
683 success("OK");
684 else
685 fail("Fail");
686 validate_memory(p, 0, hpage_pmd_size);
687 out:
688 munmap(p, hpage_pmd_size);
689 }
690
collapse_single_pte_entry_compound(void)691 static void collapse_single_pte_entry_compound(void)
692 {
693 void *p;
694
695 p = alloc_mapping();
696
697 printf("Allocate huge page...");
698 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
699 fill_memory(p, 0, hpage_pmd_size);
700 if (check_huge(p))
701 success("OK");
702 else
703 fail("Fail");
704 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
705
706 printf("Split huge page leaving single PTE mapping compound page...");
707 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
708 if (!check_huge(p))
709 success("OK");
710 else
711 fail("Fail");
712
713 if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
714 fail("Timeout");
715 else if (check_huge(p))
716 success("OK");
717 else
718 fail("Fail");
719 validate_memory(p, 0, page_size);
720 munmap(p, hpage_pmd_size);
721 }
722
collapse_full_of_compound(void)723 static void collapse_full_of_compound(void)
724 {
725 void *p;
726
727 p = alloc_mapping();
728
729 printf("Allocate huge page...");
730 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
731 fill_memory(p, 0, hpage_pmd_size);
732 if (check_huge(p))
733 success("OK");
734 else
735 fail("Fail");
736
737 printf("Split huge page leaving single PTE page table full of compound pages...");
738 madvise(p, page_size, MADV_NOHUGEPAGE);
739 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
740 if (!check_huge(p))
741 success("OK");
742 else
743 fail("Fail");
744
745 if (wait_for_scan("Collapse PTE table full of compound pages", p))
746 fail("Timeout");
747 else if (check_huge(p))
748 success("OK");
749 else
750 fail("Fail");
751 validate_memory(p, 0, hpage_pmd_size);
752 munmap(p, hpage_pmd_size);
753 }
754
collapse_compound_extreme(void)755 static void collapse_compound_extreme(void)
756 {
757 void *p;
758 int i;
759
760 p = alloc_mapping();
761 for (i = 0; i < hpage_pmd_nr; i++) {
762 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
763 i + 1, hpage_pmd_nr);
764
765 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
766 fill_memory(BASE_ADDR, 0, hpage_pmd_size);
767 if (!check_huge(BASE_ADDR)) {
768 printf("Failed to allocate huge page\n");
769 exit(EXIT_FAILURE);
770 }
771 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
772
773 p = mremap(BASE_ADDR - i * page_size,
774 i * page_size + hpage_pmd_size,
775 (i + 1) * page_size,
776 MREMAP_MAYMOVE | MREMAP_FIXED,
777 BASE_ADDR + 2 * hpage_pmd_size);
778 if (p == MAP_FAILED) {
779 perror("mremap+unmap");
780 exit(EXIT_FAILURE);
781 }
782
783 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
784 (i + 1) * page_size,
785 (i + 1) * page_size + hpage_pmd_size,
786 MREMAP_MAYMOVE | MREMAP_FIXED,
787 BASE_ADDR - (i + 1) * page_size);
788 if (p == MAP_FAILED) {
789 perror("mremap+alloc");
790 exit(EXIT_FAILURE);
791 }
792 }
793
794 munmap(BASE_ADDR, hpage_pmd_size);
795 fill_memory(p, 0, hpage_pmd_size);
796 if (!check_huge(p))
797 success("OK");
798 else
799 fail("Fail");
800
801 if (wait_for_scan("Collapse PTE table full of different compound pages", p))
802 fail("Timeout");
803 else if (check_huge(p))
804 success("OK");
805 else
806 fail("Fail");
807
808 validate_memory(p, 0, hpage_pmd_size);
809 munmap(p, hpage_pmd_size);
810 }
811
collapse_fork(void)812 static void collapse_fork(void)
813 {
814 int wstatus;
815 void *p;
816
817 p = alloc_mapping();
818
819 printf("Allocate small page...");
820 fill_memory(p, 0, page_size);
821 if (!check_huge(p))
822 success("OK");
823 else
824 fail("Fail");
825
826 printf("Share small page over fork()...");
827 if (!fork()) {
828 /* Do not touch settings on child exit */
829 skip_settings_restore = true;
830 exit_status = 0;
831
832 if (!check_huge(p))
833 success("OK");
834 else
835 fail("Fail");
836
837 fill_memory(p, page_size, 2 * page_size);
838
839 if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
840 fail("Timeout");
841 else if (check_huge(p))
842 success("OK");
843 else
844 fail("Fail");
845
846 validate_memory(p, 0, page_size);
847 munmap(p, hpage_pmd_size);
848 exit(exit_status);
849 }
850
851 wait(&wstatus);
852 exit_status += WEXITSTATUS(wstatus);
853
854 printf("Check if parent still has small page...");
855 if (!check_huge(p))
856 success("OK");
857 else
858 fail("Fail");
859 validate_memory(p, 0, page_size);
860 munmap(p, hpage_pmd_size);
861 }
862
collapse_fork_compound(void)863 static void collapse_fork_compound(void)
864 {
865 int wstatus;
866 void *p;
867
868 p = alloc_mapping();
869
870 printf("Allocate huge page...");
871 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
872 fill_memory(p, 0, hpage_pmd_size);
873 if (check_huge(p))
874 success("OK");
875 else
876 fail("Fail");
877
878 printf("Share huge page over fork()...");
879 if (!fork()) {
880 /* Do not touch settings on child exit */
881 skip_settings_restore = true;
882 exit_status = 0;
883
884 if (check_huge(p))
885 success("OK");
886 else
887 fail("Fail");
888
889 printf("Split huge page PMD in child process...");
890 madvise(p, page_size, MADV_NOHUGEPAGE);
891 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
892 if (!check_huge(p))
893 success("OK");
894 else
895 fail("Fail");
896 fill_memory(p, 0, page_size);
897
898 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
899 if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
900 fail("Timeout");
901 else if (check_huge(p))
902 success("OK");
903 else
904 fail("Fail");
905 write_num("khugepaged/max_ptes_shared",
906 default_settings.khugepaged.max_ptes_shared);
907
908 validate_memory(p, 0, hpage_pmd_size);
909 munmap(p, hpage_pmd_size);
910 exit(exit_status);
911 }
912
913 wait(&wstatus);
914 exit_status += WEXITSTATUS(wstatus);
915
916 printf("Check if parent still has huge page...");
917 if (check_huge(p))
918 success("OK");
919 else
920 fail("Fail");
921 validate_memory(p, 0, hpage_pmd_size);
922 munmap(p, hpage_pmd_size);
923 }
924
collapse_max_ptes_shared()925 static void collapse_max_ptes_shared()
926 {
927 int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
928 int wstatus;
929 void *p;
930
931 p = alloc_mapping();
932
933 printf("Allocate huge page...");
934 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
935 fill_memory(p, 0, hpage_pmd_size);
936 if (check_huge(p))
937 success("OK");
938 else
939 fail("Fail");
940
941 printf("Share huge page over fork()...");
942 if (!fork()) {
943 /* Do not touch settings on child exit */
944 skip_settings_restore = true;
945 exit_status = 0;
946
947 if (check_huge(p))
948 success("OK");
949 else
950 fail("Fail");
951
952 printf("Trigger CoW on page %d of %d...",
953 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
954 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
955 if (!check_huge(p))
956 success("OK");
957 else
958 fail("Fail");
959
960 if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
961 fail("Timeout");
962 else if (!check_huge(p))
963 success("OK");
964 else
965 fail("Fail");
966
967 printf("Trigger CoW on page %d of %d...",
968 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
969 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
970 if (!check_huge(p))
971 success("OK");
972 else
973 fail("Fail");
974
975
976 if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
977 fail("Timeout");
978 else if (check_huge(p))
979 success("OK");
980 else
981 fail("Fail");
982
983 validate_memory(p, 0, hpage_pmd_size);
984 munmap(p, hpage_pmd_size);
985 exit(exit_status);
986 }
987
988 wait(&wstatus);
989 exit_status += WEXITSTATUS(wstatus);
990
991 printf("Check if parent still has huge page...");
992 if (check_huge(p))
993 success("OK");
994 else
995 fail("Fail");
996 validate_memory(p, 0, hpage_pmd_size);
997 munmap(p, hpage_pmd_size);
998 }
999
main(void)1000 int main(void)
1001 {
1002 setbuf(stdout, NULL);
1003
1004 page_size = getpagesize();
1005 hpage_pmd_size = read_num("hpage_pmd_size");
1006 hpage_pmd_nr = hpage_pmd_size / page_size;
1007
1008 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1009 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1010 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1011 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1012
1013 save_settings();
1014 adjust_settings();
1015
1016 alloc_at_fault();
1017 collapse_full();
1018 collapse_empty();
1019 collapse_single_pte_entry();
1020 collapse_max_ptes_none();
1021 collapse_swapin_single_pte();
1022 collapse_max_ptes_swap();
1023 collapse_single_pte_entry_compound();
1024 collapse_full_of_compound();
1025 collapse_compound_extreme();
1026 collapse_fork();
1027 collapse_fork_compound();
1028 collapse_max_ptes_shared();
1029
1030 restore_settings(0);
1031 }
1032