1 #define _GNU_SOURCE
2 #include <fcntl.h>
3 #include <limits.h>
4 #include <signal.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <unistd.h>
10 
11 #include <sys/mman.h>
12 #include <sys/wait.h>
13 
14 #ifndef MADV_PAGEOUT
15 #define MADV_PAGEOUT 21
16 #endif
17 
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size;
20 static unsigned long page_size;
21 static int hpage_pmd_nr;
22 
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
25 
26 enum thp_enabled {
27 	THP_ALWAYS,
28 	THP_MADVISE,
29 	THP_NEVER,
30 };
31 
32 static const char *thp_enabled_strings[] = {
33 	"always",
34 	"madvise",
35 	"never",
36 	NULL
37 };
38 
39 enum thp_defrag {
40 	THP_DEFRAG_ALWAYS,
41 	THP_DEFRAG_DEFER,
42 	THP_DEFRAG_DEFER_MADVISE,
43 	THP_DEFRAG_MADVISE,
44 	THP_DEFRAG_NEVER,
45 };
46 
47 static const char *thp_defrag_strings[] = {
48 	"always",
49 	"defer",
50 	"defer+madvise",
51 	"madvise",
52 	"never",
53 	NULL
54 };
55 
56 enum shmem_enabled {
57 	SHMEM_ALWAYS,
58 	SHMEM_WITHIN_SIZE,
59 	SHMEM_ADVISE,
60 	SHMEM_NEVER,
61 	SHMEM_DENY,
62 	SHMEM_FORCE,
63 };
64 
65 static const char *shmem_enabled_strings[] = {
66 	"always",
67 	"within_size",
68 	"advise",
69 	"never",
70 	"deny",
71 	"force",
72 	NULL
73 };
74 
75 struct khugepaged_settings {
76 	bool defrag;
77 	unsigned int alloc_sleep_millisecs;
78 	unsigned int scan_sleep_millisecs;
79 	unsigned int max_ptes_none;
80 	unsigned int max_ptes_swap;
81 	unsigned int max_ptes_shared;
82 	unsigned long pages_to_scan;
83 };
84 
85 struct settings {
86 	enum thp_enabled thp_enabled;
87 	enum thp_defrag thp_defrag;
88 	enum shmem_enabled shmem_enabled;
89 	bool debug_cow;
90 	bool use_zero_page;
91 	struct khugepaged_settings khugepaged;
92 };
93 
94 static struct settings default_settings = {
95 	.thp_enabled = THP_MADVISE,
96 	.thp_defrag = THP_DEFRAG_ALWAYS,
97 	.shmem_enabled = SHMEM_NEVER,
98 	.debug_cow = 0,
99 	.use_zero_page = 0,
100 	.khugepaged = {
101 		.defrag = 1,
102 		.alloc_sleep_millisecs = 10,
103 		.scan_sleep_millisecs = 10,
104 	},
105 };
106 
107 static struct settings saved_settings;
108 static bool skip_settings_restore;
109 
110 static int exit_status;
111 
success(const char * msg)112 static void success(const char *msg)
113 {
114 	printf(" \e[32m%s\e[0m\n", msg);
115 }
116 
fail(const char * msg)117 static void fail(const char *msg)
118 {
119 	printf(" \e[31m%s\e[0m\n", msg);
120 	exit_status++;
121 }
122 
read_file(const char * path,char * buf,size_t buflen)123 static int read_file(const char *path, char *buf, size_t buflen)
124 {
125 	int fd;
126 	ssize_t numread;
127 
128 	fd = open(path, O_RDONLY);
129 	if (fd == -1)
130 		return 0;
131 
132 	numread = read(fd, buf, buflen - 1);
133 	if (numread < 1) {
134 		close(fd);
135 		return 0;
136 	}
137 
138 	buf[numread] = '\0';
139 	close(fd);
140 
141 	return (unsigned int) numread;
142 }
143 
write_file(const char * path,const char * buf,size_t buflen)144 static int write_file(const char *path, const char *buf, size_t buflen)
145 {
146 	int fd;
147 	ssize_t numwritten;
148 
149 	fd = open(path, O_WRONLY);
150 	if (fd == -1)
151 		return 0;
152 
153 	numwritten = write(fd, buf, buflen - 1);
154 	close(fd);
155 	if (numwritten < 1)
156 		return 0;
157 
158 	return (unsigned int) numwritten;
159 }
160 
read_string(const char * name,const char * strings[])161 static int read_string(const char *name, const char *strings[])
162 {
163 	char path[PATH_MAX];
164 	char buf[256];
165 	char *c;
166 	int ret;
167 
168 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
169 	if (ret >= PATH_MAX) {
170 		printf("%s: Pathname is too long\n", __func__);
171 		exit(EXIT_FAILURE);
172 	}
173 
174 	if (!read_file(path, buf, sizeof(buf))) {
175 		perror(path);
176 		exit(EXIT_FAILURE);
177 	}
178 
179 	c = strchr(buf, '[');
180 	if (!c) {
181 		printf("%s: Parse failure\n", __func__);
182 		exit(EXIT_FAILURE);
183 	}
184 
185 	c++;
186 	memmove(buf, c, sizeof(buf) - (c - buf));
187 
188 	c = strchr(buf, ']');
189 	if (!c) {
190 		printf("%s: Parse failure\n", __func__);
191 		exit(EXIT_FAILURE);
192 	}
193 	*c = '\0';
194 
195 	ret = 0;
196 	while (strings[ret]) {
197 		if (!strcmp(strings[ret], buf))
198 			return ret;
199 		ret++;
200 	}
201 
202 	printf("Failed to parse %s\n", name);
203 	exit(EXIT_FAILURE);
204 }
205 
write_string(const char * name,const char * val)206 static void write_string(const char *name, const char *val)
207 {
208 	char path[PATH_MAX];
209 	int ret;
210 
211 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
212 	if (ret >= PATH_MAX) {
213 		printf("%s: Pathname is too long\n", __func__);
214 		exit(EXIT_FAILURE);
215 	}
216 
217 	if (!write_file(path, val, strlen(val) + 1)) {
218 		perror(path);
219 		exit(EXIT_FAILURE);
220 	}
221 }
222 
read_num(const char * name)223 static const unsigned long read_num(const char *name)
224 {
225 	char path[PATH_MAX];
226 	char buf[21];
227 	int ret;
228 
229 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
230 	if (ret >= PATH_MAX) {
231 		printf("%s: Pathname is too long\n", __func__);
232 		exit(EXIT_FAILURE);
233 	}
234 
235 	ret = read_file(path, buf, sizeof(buf));
236 	if (ret < 0) {
237 		perror("read_file(read_num)");
238 		exit(EXIT_FAILURE);
239 	}
240 
241 	return strtoul(buf, NULL, 10);
242 }
243 
write_num(const char * name,unsigned long num)244 static void write_num(const char *name, unsigned long num)
245 {
246 	char path[PATH_MAX];
247 	char buf[21];
248 	int ret;
249 
250 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
251 	if (ret >= PATH_MAX) {
252 		printf("%s: Pathname is too long\n", __func__);
253 		exit(EXIT_FAILURE);
254 	}
255 
256 	sprintf(buf, "%ld", num);
257 	if (!write_file(path, buf, strlen(buf) + 1)) {
258 		perror(path);
259 		exit(EXIT_FAILURE);
260 	}
261 }
262 
write_settings(struct settings * settings)263 static void write_settings(struct settings *settings)
264 {
265 	struct khugepaged_settings *khugepaged = &settings->khugepaged;
266 
267 	write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
268 	write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
269 	write_string("shmem_enabled",
270 			shmem_enabled_strings[settings->shmem_enabled]);
271 	write_num("debug_cow", settings->debug_cow);
272 	write_num("use_zero_page", settings->use_zero_page);
273 
274 	write_num("khugepaged/defrag", khugepaged->defrag);
275 	write_num("khugepaged/alloc_sleep_millisecs",
276 			khugepaged->alloc_sleep_millisecs);
277 	write_num("khugepaged/scan_sleep_millisecs",
278 			khugepaged->scan_sleep_millisecs);
279 	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
280 	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
281 	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
282 	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
283 }
284 
restore_settings(int sig)285 static void restore_settings(int sig)
286 {
287 	if (skip_settings_restore)
288 		goto out;
289 
290 	printf("Restore THP and khugepaged settings...");
291 	write_settings(&saved_settings);
292 	success("OK");
293 	if (sig)
294 		exit(EXIT_FAILURE);
295 out:
296 	exit(exit_status);
297 }
298 
save_settings(void)299 static void save_settings(void)
300 {
301 	printf("Save THP and khugepaged settings...");
302 	saved_settings = (struct settings) {
303 		.thp_enabled = read_string("enabled", thp_enabled_strings),
304 		.thp_defrag = read_string("defrag", thp_defrag_strings),
305 		.shmem_enabled =
306 			read_string("shmem_enabled", shmem_enabled_strings),
307 		.debug_cow = read_num("debug_cow"),
308 		.use_zero_page = read_num("use_zero_page"),
309 	};
310 	saved_settings.khugepaged = (struct khugepaged_settings) {
311 		.defrag = read_num("khugepaged/defrag"),
312 		.alloc_sleep_millisecs =
313 			read_num("khugepaged/alloc_sleep_millisecs"),
314 		.scan_sleep_millisecs =
315 			read_num("khugepaged/scan_sleep_millisecs"),
316 		.max_ptes_none = read_num("khugepaged/max_ptes_none"),
317 		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
318 		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
319 		.pages_to_scan = read_num("khugepaged/pages_to_scan"),
320 	};
321 	success("OK");
322 
323 	signal(SIGTERM, restore_settings);
324 	signal(SIGINT, restore_settings);
325 	signal(SIGHUP, restore_settings);
326 	signal(SIGQUIT, restore_settings);
327 }
328 
adjust_settings(void)329 static void adjust_settings(void)
330 {
331 
332 	printf("Adjust settings...");
333 	write_settings(&default_settings);
334 	success("OK");
335 }
336 
337 #define MAX_LINE_LENGTH 500
338 
check_for_pattern(FILE * fp,char * pattern,char * buf)339 static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
340 {
341 	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
342 		if (!strncmp(buf, pattern, strlen(pattern)))
343 			return true;
344 	}
345 	return false;
346 }
347 
check_huge(void * addr)348 static bool check_huge(void *addr)
349 {
350 	bool thp = false;
351 	int ret;
352 	FILE *fp;
353 	char buffer[MAX_LINE_LENGTH];
354 	char addr_pattern[MAX_LINE_LENGTH];
355 
356 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
357 		       (unsigned long) addr);
358 	if (ret >= MAX_LINE_LENGTH) {
359 		printf("%s: Pattern is too long\n", __func__);
360 		exit(EXIT_FAILURE);
361 	}
362 
363 
364 	fp = fopen(PID_SMAPS, "r");
365 	if (!fp) {
366 		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
367 		exit(EXIT_FAILURE);
368 	}
369 	if (!check_for_pattern(fp, addr_pattern, buffer))
370 		goto err_out;
371 
372 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
373 		       hpage_pmd_size >> 10);
374 	if (ret >= MAX_LINE_LENGTH) {
375 		printf("%s: Pattern is too long\n", __func__);
376 		exit(EXIT_FAILURE);
377 	}
378 	/*
379 	 * Fetch the AnonHugePages: in the same block and check whether it got
380 	 * the expected number of hugeepages next.
381 	 */
382 	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
383 		goto err_out;
384 
385 	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
386 		goto err_out;
387 
388 	thp = true;
389 err_out:
390 	fclose(fp);
391 	return thp;
392 }
393 
394 
check_swap(void * addr,unsigned long size)395 static bool check_swap(void *addr, unsigned long size)
396 {
397 	bool swap = false;
398 	int ret;
399 	FILE *fp;
400 	char buffer[MAX_LINE_LENGTH];
401 	char addr_pattern[MAX_LINE_LENGTH];
402 
403 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
404 		       (unsigned long) addr);
405 	if (ret >= MAX_LINE_LENGTH) {
406 		printf("%s: Pattern is too long\n", __func__);
407 		exit(EXIT_FAILURE);
408 	}
409 
410 
411 	fp = fopen(PID_SMAPS, "r");
412 	if (!fp) {
413 		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
414 		exit(EXIT_FAILURE);
415 	}
416 	if (!check_for_pattern(fp, addr_pattern, buffer))
417 		goto err_out;
418 
419 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
420 		       size >> 10);
421 	if (ret >= MAX_LINE_LENGTH) {
422 		printf("%s: Pattern is too long\n", __func__);
423 		exit(EXIT_FAILURE);
424 	}
425 	/*
426 	 * Fetch the Swap: in the same block and check whether it got
427 	 * the expected number of hugeepages next.
428 	 */
429 	if (!check_for_pattern(fp, "Swap:", buffer))
430 		goto err_out;
431 
432 	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
433 		goto err_out;
434 
435 	swap = true;
436 err_out:
437 	fclose(fp);
438 	return swap;
439 }
440 
alloc_mapping(void)441 static void *alloc_mapping(void)
442 {
443 	void *p;
444 
445 	p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
446 			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
447 	if (p != BASE_ADDR) {
448 		printf("Failed to allocate VMA at %p\n", BASE_ADDR);
449 		exit(EXIT_FAILURE);
450 	}
451 
452 	return p;
453 }
454 
fill_memory(int * p,unsigned long start,unsigned long end)455 static void fill_memory(int *p, unsigned long start, unsigned long end)
456 {
457 	int i;
458 
459 	for (i = start / page_size; i < end / page_size; i++)
460 		p[i * page_size / sizeof(*p)] = i + 0xdead0000;
461 }
462 
validate_memory(int * p,unsigned long start,unsigned long end)463 static void validate_memory(int *p, unsigned long start, unsigned long end)
464 {
465 	int i;
466 
467 	for (i = start / page_size; i < end / page_size; i++) {
468 		if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
469 			printf("Page %d is corrupted: %#x\n",
470 					i, p[i * page_size / sizeof(*p)]);
471 			exit(EXIT_FAILURE);
472 		}
473 	}
474 }
475 
476 #define TICK 500000
wait_for_scan(const char * msg,char * p)477 static bool wait_for_scan(const char *msg, char *p)
478 {
479 	int full_scans;
480 	int timeout = 6; /* 3 seconds */
481 
482 	/* Sanity check */
483 	if (check_huge(p)) {
484 		printf("Unexpected huge page\n");
485 		exit(EXIT_FAILURE);
486 	}
487 
488 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
489 
490 	/* Wait until the second full_scan completed */
491 	full_scans = read_num("khugepaged/full_scans") + 2;
492 
493 	printf("%s...", msg);
494 	while (timeout--) {
495 		if (check_huge(p))
496 			break;
497 		if (read_num("khugepaged/full_scans") >= full_scans)
498 			break;
499 		printf(".");
500 		usleep(TICK);
501 	}
502 
503 	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
504 
505 	return timeout == -1;
506 }
507 
alloc_at_fault(void)508 static void alloc_at_fault(void)
509 {
510 	struct settings settings = default_settings;
511 	char *p;
512 
513 	settings.thp_enabled = THP_ALWAYS;
514 	write_settings(&settings);
515 
516 	p = alloc_mapping();
517 	*p = 1;
518 	printf("Allocate huge page on fault...");
519 	if (check_huge(p))
520 		success("OK");
521 	else
522 		fail("Fail");
523 
524 	write_settings(&default_settings);
525 
526 	madvise(p, page_size, MADV_DONTNEED);
527 	printf("Split huge PMD on MADV_DONTNEED...");
528 	if (!check_huge(p))
529 		success("OK");
530 	else
531 		fail("Fail");
532 	munmap(p, hpage_pmd_size);
533 }
534 
collapse_full(void)535 static void collapse_full(void)
536 {
537 	void *p;
538 
539 	p = alloc_mapping();
540 	fill_memory(p, 0, hpage_pmd_size);
541 	if (wait_for_scan("Collapse fully populated PTE table", p))
542 		fail("Timeout");
543 	else if (check_huge(p))
544 		success("OK");
545 	else
546 		fail("Fail");
547 	validate_memory(p, 0, hpage_pmd_size);
548 	munmap(p, hpage_pmd_size);
549 }
550 
collapse_empty(void)551 static void collapse_empty(void)
552 {
553 	void *p;
554 
555 	p = alloc_mapping();
556 	if (wait_for_scan("Do not collapse empty PTE table", p))
557 		fail("Timeout");
558 	else if (check_huge(p))
559 		fail("Fail");
560 	else
561 		success("OK");
562 	munmap(p, hpage_pmd_size);
563 }
564 
collapse_single_pte_entry(void)565 static void collapse_single_pte_entry(void)
566 {
567 	void *p;
568 
569 	p = alloc_mapping();
570 	fill_memory(p, 0, page_size);
571 	if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
572 		fail("Timeout");
573 	else if (check_huge(p))
574 		success("OK");
575 	else
576 		fail("Fail");
577 	validate_memory(p, 0, page_size);
578 	munmap(p, hpage_pmd_size);
579 }
580 
collapse_max_ptes_none(void)581 static void collapse_max_ptes_none(void)
582 {
583 	int max_ptes_none = hpage_pmd_nr / 2;
584 	struct settings settings = default_settings;
585 	void *p;
586 
587 	settings.khugepaged.max_ptes_none = max_ptes_none;
588 	write_settings(&settings);
589 
590 	p = alloc_mapping();
591 
592 	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
593 	if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
594 		fail("Timeout");
595 	else if (check_huge(p))
596 		fail("Fail");
597 	else
598 		success("OK");
599 	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
600 
601 	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
602 	if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
603 		fail("Timeout");
604 	else if (check_huge(p))
605 		success("OK");
606 	else
607 		fail("Fail");
608 	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
609 
610 	munmap(p, hpage_pmd_size);
611 	write_settings(&default_settings);
612 }
613 
collapse_swapin_single_pte(void)614 static void collapse_swapin_single_pte(void)
615 {
616 	void *p;
617 	p = alloc_mapping();
618 	fill_memory(p, 0, hpage_pmd_size);
619 
620 	printf("Swapout one page...");
621 	if (madvise(p, page_size, MADV_PAGEOUT)) {
622 		perror("madvise(MADV_PAGEOUT)");
623 		exit(EXIT_FAILURE);
624 	}
625 	if (check_swap(p, page_size)) {
626 		success("OK");
627 	} else {
628 		fail("Fail");
629 		goto out;
630 	}
631 
632 	if (wait_for_scan("Collapse with swapping in single PTE entry", p))
633 		fail("Timeout");
634 	else if (check_huge(p))
635 		success("OK");
636 	else
637 		fail("Fail");
638 	validate_memory(p, 0, hpage_pmd_size);
639 out:
640 	munmap(p, hpage_pmd_size);
641 }
642 
collapse_max_ptes_swap(void)643 static void collapse_max_ptes_swap(void)
644 {
645 	int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
646 	void *p;
647 
648 	p = alloc_mapping();
649 
650 	fill_memory(p, 0, hpage_pmd_size);
651 	printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
652 	if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
653 		perror("madvise(MADV_PAGEOUT)");
654 		exit(EXIT_FAILURE);
655 	}
656 	if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
657 		success("OK");
658 	} else {
659 		fail("Fail");
660 		goto out;
661 	}
662 
663 	if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
664 		fail("Timeout");
665 	else if (check_huge(p))
666 		fail("Fail");
667 	else
668 		success("OK");
669 	validate_memory(p, 0, hpage_pmd_size);
670 
671 	fill_memory(p, 0, hpage_pmd_size);
672 	printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
673 	if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
674 		perror("madvise(MADV_PAGEOUT)");
675 		exit(EXIT_FAILURE);
676 	}
677 	if (check_swap(p, max_ptes_swap * page_size)) {
678 		success("OK");
679 	} else {
680 		fail("Fail");
681 		goto out;
682 	}
683 
684 	if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
685 		fail("Timeout");
686 	else if (check_huge(p))
687 		success("OK");
688 	else
689 		fail("Fail");
690 	validate_memory(p, 0, hpage_pmd_size);
691 out:
692 	munmap(p, hpage_pmd_size);
693 }
694 
collapse_single_pte_entry_compound(void)695 static void collapse_single_pte_entry_compound(void)
696 {
697 	void *p;
698 
699 	p = alloc_mapping();
700 
701 	printf("Allocate huge page...");
702 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
703 	fill_memory(p, 0, hpage_pmd_size);
704 	if (check_huge(p))
705 		success("OK");
706 	else
707 		fail("Fail");
708 	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
709 
710 	printf("Split huge page leaving single PTE mapping compound page...");
711 	madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
712 	if (!check_huge(p))
713 		success("OK");
714 	else
715 		fail("Fail");
716 
717 	if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
718 		fail("Timeout");
719 	else if (check_huge(p))
720 		success("OK");
721 	else
722 		fail("Fail");
723 	validate_memory(p, 0, page_size);
724 	munmap(p, hpage_pmd_size);
725 }
726 
collapse_full_of_compound(void)727 static void collapse_full_of_compound(void)
728 {
729 	void *p;
730 
731 	p = alloc_mapping();
732 
733 	printf("Allocate huge page...");
734 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
735 	fill_memory(p, 0, hpage_pmd_size);
736 	if (check_huge(p))
737 		success("OK");
738 	else
739 		fail("Fail");
740 
741 	printf("Split huge page leaving single PTE page table full of compound pages...");
742 	madvise(p, page_size, MADV_NOHUGEPAGE);
743 	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
744 	if (!check_huge(p))
745 		success("OK");
746 	else
747 		fail("Fail");
748 
749 	if (wait_for_scan("Collapse PTE table full of compound pages", p))
750 		fail("Timeout");
751 	else if (check_huge(p))
752 		success("OK");
753 	else
754 		fail("Fail");
755 	validate_memory(p, 0, hpage_pmd_size);
756 	munmap(p, hpage_pmd_size);
757 }
758 
collapse_compound_extreme(void)759 static void collapse_compound_extreme(void)
760 {
761 	void *p;
762 	int i;
763 
764 	p = alloc_mapping();
765 	for (i = 0; i < hpage_pmd_nr; i++) {
766 		printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
767 				i + 1, hpage_pmd_nr);
768 
769 		madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
770 		fill_memory(BASE_ADDR, 0, hpage_pmd_size);
771 		if (!check_huge(BASE_ADDR)) {
772 			printf("Failed to allocate huge page\n");
773 			exit(EXIT_FAILURE);
774 		}
775 		madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
776 
777 		p = mremap(BASE_ADDR - i * page_size,
778 				i * page_size + hpage_pmd_size,
779 				(i + 1) * page_size,
780 				MREMAP_MAYMOVE | MREMAP_FIXED,
781 				BASE_ADDR + 2 * hpage_pmd_size);
782 		if (p == MAP_FAILED) {
783 			perror("mremap+unmap");
784 			exit(EXIT_FAILURE);
785 		}
786 
787 		p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
788 				(i + 1) * page_size,
789 				(i + 1) * page_size + hpage_pmd_size,
790 				MREMAP_MAYMOVE | MREMAP_FIXED,
791 				BASE_ADDR - (i + 1) * page_size);
792 		if (p == MAP_FAILED) {
793 			perror("mremap+alloc");
794 			exit(EXIT_FAILURE);
795 		}
796 	}
797 
798 	munmap(BASE_ADDR, hpage_pmd_size);
799 	fill_memory(p, 0, hpage_pmd_size);
800 	if (!check_huge(p))
801 		success("OK");
802 	else
803 		fail("Fail");
804 
805 	if (wait_for_scan("Collapse PTE table full of different compound pages", p))
806 		fail("Timeout");
807 	else if (check_huge(p))
808 		success("OK");
809 	else
810 		fail("Fail");
811 
812 	validate_memory(p, 0, hpage_pmd_size);
813 	munmap(p, hpage_pmd_size);
814 }
815 
collapse_fork(void)816 static void collapse_fork(void)
817 {
818 	int wstatus;
819 	void *p;
820 
821 	p = alloc_mapping();
822 
823 	printf("Allocate small page...");
824 	fill_memory(p, 0, page_size);
825 	if (!check_huge(p))
826 		success("OK");
827 	else
828 		fail("Fail");
829 
830 	printf("Share small page over fork()...");
831 	if (!fork()) {
832 		/* Do not touch settings on child exit */
833 		skip_settings_restore = true;
834 		exit_status = 0;
835 
836 		if (!check_huge(p))
837 			success("OK");
838 		else
839 			fail("Fail");
840 
841 		fill_memory(p, page_size, 2 * page_size);
842 
843 		if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
844 			fail("Timeout");
845 		else if (check_huge(p))
846 			success("OK");
847 		else
848 			fail("Fail");
849 
850 		validate_memory(p, 0, page_size);
851 		munmap(p, hpage_pmd_size);
852 		exit(exit_status);
853 	}
854 
855 	wait(&wstatus);
856 	exit_status += WEXITSTATUS(wstatus);
857 
858 	printf("Check if parent still has small page...");
859 	if (!check_huge(p))
860 		success("OK");
861 	else
862 		fail("Fail");
863 	validate_memory(p, 0, page_size);
864 	munmap(p, hpage_pmd_size);
865 }
866 
collapse_fork_compound(void)867 static void collapse_fork_compound(void)
868 {
869 	int wstatus;
870 	void *p;
871 
872 	p = alloc_mapping();
873 
874 	printf("Allocate huge page...");
875 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
876 	fill_memory(p, 0, hpage_pmd_size);
877 	if (check_huge(p))
878 		success("OK");
879 	else
880 		fail("Fail");
881 
882 	printf("Share huge page over fork()...");
883 	if (!fork()) {
884 		/* Do not touch settings on child exit */
885 		skip_settings_restore = true;
886 		exit_status = 0;
887 
888 		if (check_huge(p))
889 			success("OK");
890 		else
891 			fail("Fail");
892 
893 		printf("Split huge page PMD in child process...");
894 		madvise(p, page_size, MADV_NOHUGEPAGE);
895 		madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
896 		if (!check_huge(p))
897 			success("OK");
898 		else
899 			fail("Fail");
900 		fill_memory(p, 0, page_size);
901 
902 		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
903 		if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
904 			fail("Timeout");
905 		else if (check_huge(p))
906 			success("OK");
907 		else
908 			fail("Fail");
909 		write_num("khugepaged/max_ptes_shared",
910 				default_settings.khugepaged.max_ptes_shared);
911 
912 		validate_memory(p, 0, hpage_pmd_size);
913 		munmap(p, hpage_pmd_size);
914 		exit(exit_status);
915 	}
916 
917 	wait(&wstatus);
918 	exit_status += WEXITSTATUS(wstatus);
919 
920 	printf("Check if parent still has huge page...");
921 	if (check_huge(p))
922 		success("OK");
923 	else
924 		fail("Fail");
925 	validate_memory(p, 0, hpage_pmd_size);
926 	munmap(p, hpage_pmd_size);
927 }
928 
collapse_max_ptes_shared()929 static void collapse_max_ptes_shared()
930 {
931 	int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
932 	int wstatus;
933 	void *p;
934 
935 	p = alloc_mapping();
936 
937 	printf("Allocate huge page...");
938 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
939 	fill_memory(p, 0, hpage_pmd_size);
940 	if (check_huge(p))
941 		success("OK");
942 	else
943 		fail("Fail");
944 
945 	printf("Share huge page over fork()...");
946 	if (!fork()) {
947 		/* Do not touch settings on child exit */
948 		skip_settings_restore = true;
949 		exit_status = 0;
950 
951 		if (check_huge(p))
952 			success("OK");
953 		else
954 			fail("Fail");
955 
956 		printf("Trigger CoW on page %d of %d...",
957 				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
958 		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
959 		if (!check_huge(p))
960 			success("OK");
961 		else
962 			fail("Fail");
963 
964 		if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
965 			fail("Timeout");
966 		else if (!check_huge(p))
967 			success("OK");
968 		else
969 			fail("Fail");
970 
971 		printf("Trigger CoW on page %d of %d...",
972 				hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
973 		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
974 		if (!check_huge(p))
975 			success("OK");
976 		else
977 			fail("Fail");
978 
979 
980 		if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
981 			fail("Timeout");
982 		else if (check_huge(p))
983 			success("OK");
984 		else
985 			fail("Fail");
986 
987 		validate_memory(p, 0, hpage_pmd_size);
988 		munmap(p, hpage_pmd_size);
989 		exit(exit_status);
990 	}
991 
992 	wait(&wstatus);
993 	exit_status += WEXITSTATUS(wstatus);
994 
995 	printf("Check if parent still has huge page...");
996 	if (check_huge(p))
997 		success("OK");
998 	else
999 		fail("Fail");
1000 	validate_memory(p, 0, hpage_pmd_size);
1001 	munmap(p, hpage_pmd_size);
1002 }
1003 
main(void)1004 int main(void)
1005 {
1006 	setbuf(stdout, NULL);
1007 
1008 	page_size = getpagesize();
1009 	hpage_pmd_size = read_num("hpage_pmd_size");
1010 	hpage_pmd_nr = hpage_pmd_size / page_size;
1011 
1012 	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1013 	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1014 	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1015 	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1016 
1017 	save_settings();
1018 	adjust_settings();
1019 
1020 	alloc_at_fault();
1021 	collapse_full();
1022 	collapse_empty();
1023 	collapse_single_pte_entry();
1024 	collapse_max_ptes_none();
1025 	collapse_swapin_single_pte();
1026 	collapse_max_ptes_swap();
1027 	collapse_single_pte_entry_compound();
1028 	collapse_full_of_compound();
1029 	collapse_compound_extreme();
1030 	collapse_fork();
1031 	collapse_fork_compound();
1032 	collapse_max_ptes_shared();
1033 
1034 	restore_settings(0);
1035 }
1036