1 #define _GNU_SOURCE
2 #include <fcntl.h>
3 #include <limits.h>
4 #include <signal.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <unistd.h>
10 
11 #include <sys/mman.h>
12 #include <sys/wait.h>
13 
14 #ifndef MADV_PAGEOUT
15 #define MADV_PAGEOUT 21
16 #endif
17 
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size;
20 static unsigned long page_size;
21 static int hpage_pmd_nr;
22 
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
25 
26 enum thp_enabled {
27 	THP_ALWAYS,
28 	THP_MADVISE,
29 	THP_NEVER,
30 };
31 
32 static const char *thp_enabled_strings[] = {
33 	"always",
34 	"madvise",
35 	"never",
36 	NULL
37 };
38 
39 enum thp_defrag {
40 	THP_DEFRAG_ALWAYS,
41 	THP_DEFRAG_DEFER,
42 	THP_DEFRAG_DEFER_MADVISE,
43 	THP_DEFRAG_MADVISE,
44 	THP_DEFRAG_NEVER,
45 };
46 
47 static const char *thp_defrag_strings[] = {
48 	"always",
49 	"defer",
50 	"defer+madvise",
51 	"madvise",
52 	"never",
53 	NULL
54 };
55 
56 enum shmem_enabled {
57 	SHMEM_ALWAYS,
58 	SHMEM_WITHIN_SIZE,
59 	SHMEM_ADVISE,
60 	SHMEM_NEVER,
61 	SHMEM_DENY,
62 	SHMEM_FORCE,
63 };
64 
65 static const char *shmem_enabled_strings[] = {
66 	"always",
67 	"within_size",
68 	"advise",
69 	"never",
70 	"deny",
71 	"force",
72 	NULL
73 };
74 
75 struct khugepaged_settings {
76 	bool defrag;
77 	unsigned int alloc_sleep_millisecs;
78 	unsigned int scan_sleep_millisecs;
79 	unsigned int max_ptes_none;
80 	unsigned int max_ptes_swap;
81 	unsigned int max_ptes_shared;
82 	unsigned long pages_to_scan;
83 };
84 
85 struct settings {
86 	enum thp_enabled thp_enabled;
87 	enum thp_defrag thp_defrag;
88 	enum shmem_enabled shmem_enabled;
89 	bool use_zero_page;
90 	struct khugepaged_settings khugepaged;
91 };
92 
93 static struct settings default_settings = {
94 	.thp_enabled = THP_MADVISE,
95 	.thp_defrag = THP_DEFRAG_ALWAYS,
96 	.shmem_enabled = SHMEM_NEVER,
97 	.use_zero_page = 0,
98 	.khugepaged = {
99 		.defrag = 1,
100 		.alloc_sleep_millisecs = 10,
101 		.scan_sleep_millisecs = 10,
102 	},
103 };
104 
105 static struct settings saved_settings;
106 static bool skip_settings_restore;
107 
108 static int exit_status;
109 
success(const char * msg)110 static void success(const char *msg)
111 {
112 	printf(" \e[32m%s\e[0m\n", msg);
113 }
114 
fail(const char * msg)115 static void fail(const char *msg)
116 {
117 	printf(" \e[31m%s\e[0m\n", msg);
118 	exit_status++;
119 }
120 
read_file(const char * path,char * buf,size_t buflen)121 static int read_file(const char *path, char *buf, size_t buflen)
122 {
123 	int fd;
124 	ssize_t numread;
125 
126 	fd = open(path, O_RDONLY);
127 	if (fd == -1)
128 		return 0;
129 
130 	numread = read(fd, buf, buflen - 1);
131 	if (numread < 1) {
132 		close(fd);
133 		return 0;
134 	}
135 
136 	buf[numread] = '\0';
137 	close(fd);
138 
139 	return (unsigned int) numread;
140 }
141 
write_file(const char * path,const char * buf,size_t buflen)142 static int write_file(const char *path, const char *buf, size_t buflen)
143 {
144 	int fd;
145 	ssize_t numwritten;
146 
147 	fd = open(path, O_WRONLY);
148 	if (fd == -1)
149 		return 0;
150 
151 	numwritten = write(fd, buf, buflen - 1);
152 	close(fd);
153 	if (numwritten < 1)
154 		return 0;
155 
156 	return (unsigned int) numwritten;
157 }
158 
read_string(const char * name,const char * strings[])159 static int read_string(const char *name, const char *strings[])
160 {
161 	char path[PATH_MAX];
162 	char buf[256];
163 	char *c;
164 	int ret;
165 
166 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
167 	if (ret >= PATH_MAX) {
168 		printf("%s: Pathname is too long\n", __func__);
169 		exit(EXIT_FAILURE);
170 	}
171 
172 	if (!read_file(path, buf, sizeof(buf))) {
173 		perror(path);
174 		exit(EXIT_FAILURE);
175 	}
176 
177 	c = strchr(buf, '[');
178 	if (!c) {
179 		printf("%s: Parse failure\n", __func__);
180 		exit(EXIT_FAILURE);
181 	}
182 
183 	c++;
184 	memmove(buf, c, sizeof(buf) - (c - buf));
185 
186 	c = strchr(buf, ']');
187 	if (!c) {
188 		printf("%s: Parse failure\n", __func__);
189 		exit(EXIT_FAILURE);
190 	}
191 	*c = '\0';
192 
193 	ret = 0;
194 	while (strings[ret]) {
195 		if (!strcmp(strings[ret], buf))
196 			return ret;
197 		ret++;
198 	}
199 
200 	printf("Failed to parse %s\n", name);
201 	exit(EXIT_FAILURE);
202 }
203 
write_string(const char * name,const char * val)204 static void write_string(const char *name, const char *val)
205 {
206 	char path[PATH_MAX];
207 	int ret;
208 
209 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
210 	if (ret >= PATH_MAX) {
211 		printf("%s: Pathname is too long\n", __func__);
212 		exit(EXIT_FAILURE);
213 	}
214 
215 	if (!write_file(path, val, strlen(val) + 1)) {
216 		perror(path);
217 		exit(EXIT_FAILURE);
218 	}
219 }
220 
read_num(const char * name)221 static const unsigned long read_num(const char *name)
222 {
223 	char path[PATH_MAX];
224 	char buf[21];
225 	int ret;
226 
227 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
228 	if (ret >= PATH_MAX) {
229 		printf("%s: Pathname is too long\n", __func__);
230 		exit(EXIT_FAILURE);
231 	}
232 
233 	ret = read_file(path, buf, sizeof(buf));
234 	if (ret < 0) {
235 		perror("read_file(read_num)");
236 		exit(EXIT_FAILURE);
237 	}
238 
239 	return strtoul(buf, NULL, 10);
240 }
241 
write_num(const char * name,unsigned long num)242 static void write_num(const char *name, unsigned long num)
243 {
244 	char path[PATH_MAX];
245 	char buf[21];
246 	int ret;
247 
248 	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
249 	if (ret >= PATH_MAX) {
250 		printf("%s: Pathname is too long\n", __func__);
251 		exit(EXIT_FAILURE);
252 	}
253 
254 	sprintf(buf, "%ld", num);
255 	if (!write_file(path, buf, strlen(buf) + 1)) {
256 		perror(path);
257 		exit(EXIT_FAILURE);
258 	}
259 }
260 
write_settings(struct settings * settings)261 static void write_settings(struct settings *settings)
262 {
263 	struct khugepaged_settings *khugepaged = &settings->khugepaged;
264 
265 	write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
266 	write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
267 	write_string("shmem_enabled",
268 			shmem_enabled_strings[settings->shmem_enabled]);
269 	write_num("use_zero_page", settings->use_zero_page);
270 
271 	write_num("khugepaged/defrag", khugepaged->defrag);
272 	write_num("khugepaged/alloc_sleep_millisecs",
273 			khugepaged->alloc_sleep_millisecs);
274 	write_num("khugepaged/scan_sleep_millisecs",
275 			khugepaged->scan_sleep_millisecs);
276 	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
277 	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
278 	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
279 	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
280 }
281 
restore_settings(int sig)282 static void restore_settings(int sig)
283 {
284 	if (skip_settings_restore)
285 		goto out;
286 
287 	printf("Restore THP and khugepaged settings...");
288 	write_settings(&saved_settings);
289 	success("OK");
290 	if (sig)
291 		exit(EXIT_FAILURE);
292 out:
293 	exit(exit_status);
294 }
295 
save_settings(void)296 static void save_settings(void)
297 {
298 	printf("Save THP and khugepaged settings...");
299 	saved_settings = (struct settings) {
300 		.thp_enabled = read_string("enabled", thp_enabled_strings),
301 		.thp_defrag = read_string("defrag", thp_defrag_strings),
302 		.shmem_enabled =
303 			read_string("shmem_enabled", shmem_enabled_strings),
304 		.use_zero_page = read_num("use_zero_page"),
305 	};
306 	saved_settings.khugepaged = (struct khugepaged_settings) {
307 		.defrag = read_num("khugepaged/defrag"),
308 		.alloc_sleep_millisecs =
309 			read_num("khugepaged/alloc_sleep_millisecs"),
310 		.scan_sleep_millisecs =
311 			read_num("khugepaged/scan_sleep_millisecs"),
312 		.max_ptes_none = read_num("khugepaged/max_ptes_none"),
313 		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
314 		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
315 		.pages_to_scan = read_num("khugepaged/pages_to_scan"),
316 	};
317 	success("OK");
318 
319 	signal(SIGTERM, restore_settings);
320 	signal(SIGINT, restore_settings);
321 	signal(SIGHUP, restore_settings);
322 	signal(SIGQUIT, restore_settings);
323 }
324 
adjust_settings(void)325 static void adjust_settings(void)
326 {
327 
328 	printf("Adjust settings...");
329 	write_settings(&default_settings);
330 	success("OK");
331 }
332 
333 #define MAX_LINE_LENGTH 500
334 
check_for_pattern(FILE * fp,char * pattern,char * buf)335 static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
336 {
337 	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
338 		if (!strncmp(buf, pattern, strlen(pattern)))
339 			return true;
340 	}
341 	return false;
342 }
343 
check_huge(void * addr)344 static bool check_huge(void *addr)
345 {
346 	bool thp = false;
347 	int ret;
348 	FILE *fp;
349 	char buffer[MAX_LINE_LENGTH];
350 	char addr_pattern[MAX_LINE_LENGTH];
351 
352 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
353 		       (unsigned long) addr);
354 	if (ret >= MAX_LINE_LENGTH) {
355 		printf("%s: Pattern is too long\n", __func__);
356 		exit(EXIT_FAILURE);
357 	}
358 
359 
360 	fp = fopen(PID_SMAPS, "r");
361 	if (!fp) {
362 		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
363 		exit(EXIT_FAILURE);
364 	}
365 	if (!check_for_pattern(fp, addr_pattern, buffer))
366 		goto err_out;
367 
368 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
369 		       hpage_pmd_size >> 10);
370 	if (ret >= MAX_LINE_LENGTH) {
371 		printf("%s: Pattern is too long\n", __func__);
372 		exit(EXIT_FAILURE);
373 	}
374 	/*
375 	 * Fetch the AnonHugePages: in the same block and check whether it got
376 	 * the expected number of hugeepages next.
377 	 */
378 	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
379 		goto err_out;
380 
381 	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
382 		goto err_out;
383 
384 	thp = true;
385 err_out:
386 	fclose(fp);
387 	return thp;
388 }
389 
390 
check_swap(void * addr,unsigned long size)391 static bool check_swap(void *addr, unsigned long size)
392 {
393 	bool swap = false;
394 	int ret;
395 	FILE *fp;
396 	char buffer[MAX_LINE_LENGTH];
397 	char addr_pattern[MAX_LINE_LENGTH];
398 
399 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
400 		       (unsigned long) addr);
401 	if (ret >= MAX_LINE_LENGTH) {
402 		printf("%s: Pattern is too long\n", __func__);
403 		exit(EXIT_FAILURE);
404 	}
405 
406 
407 	fp = fopen(PID_SMAPS, "r");
408 	if (!fp) {
409 		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
410 		exit(EXIT_FAILURE);
411 	}
412 	if (!check_for_pattern(fp, addr_pattern, buffer))
413 		goto err_out;
414 
415 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
416 		       size >> 10);
417 	if (ret >= MAX_LINE_LENGTH) {
418 		printf("%s: Pattern is too long\n", __func__);
419 		exit(EXIT_FAILURE);
420 	}
421 	/*
422 	 * Fetch the Swap: in the same block and check whether it got
423 	 * the expected number of hugeepages next.
424 	 */
425 	if (!check_for_pattern(fp, "Swap:", buffer))
426 		goto err_out;
427 
428 	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
429 		goto err_out;
430 
431 	swap = true;
432 err_out:
433 	fclose(fp);
434 	return swap;
435 }
436 
alloc_mapping(void)437 static void *alloc_mapping(void)
438 {
439 	void *p;
440 
441 	p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
442 			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
443 	if (p != BASE_ADDR) {
444 		printf("Failed to allocate VMA at %p\n", BASE_ADDR);
445 		exit(EXIT_FAILURE);
446 	}
447 
448 	return p;
449 }
450 
fill_memory(int * p,unsigned long start,unsigned long end)451 static void fill_memory(int *p, unsigned long start, unsigned long end)
452 {
453 	int i;
454 
455 	for (i = start / page_size; i < end / page_size; i++)
456 		p[i * page_size / sizeof(*p)] = i + 0xdead0000;
457 }
458 
validate_memory(int * p,unsigned long start,unsigned long end)459 static void validate_memory(int *p, unsigned long start, unsigned long end)
460 {
461 	int i;
462 
463 	for (i = start / page_size; i < end / page_size; i++) {
464 		if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
465 			printf("Page %d is corrupted: %#x\n",
466 					i, p[i * page_size / sizeof(*p)]);
467 			exit(EXIT_FAILURE);
468 		}
469 	}
470 }
471 
472 #define TICK 500000
wait_for_scan(const char * msg,char * p)473 static bool wait_for_scan(const char *msg, char *p)
474 {
475 	int full_scans;
476 	int timeout = 6; /* 3 seconds */
477 
478 	/* Sanity check */
479 	if (check_huge(p)) {
480 		printf("Unexpected huge page\n");
481 		exit(EXIT_FAILURE);
482 	}
483 
484 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
485 
486 	/* Wait until the second full_scan completed */
487 	full_scans = read_num("khugepaged/full_scans") + 2;
488 
489 	printf("%s...", msg);
490 	while (timeout--) {
491 		if (check_huge(p))
492 			break;
493 		if (read_num("khugepaged/full_scans") >= full_scans)
494 			break;
495 		printf(".");
496 		usleep(TICK);
497 	}
498 
499 	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
500 
501 	return timeout == -1;
502 }
503 
alloc_at_fault(void)504 static void alloc_at_fault(void)
505 {
506 	struct settings settings = default_settings;
507 	char *p;
508 
509 	settings.thp_enabled = THP_ALWAYS;
510 	write_settings(&settings);
511 
512 	p = alloc_mapping();
513 	*p = 1;
514 	printf("Allocate huge page on fault...");
515 	if (check_huge(p))
516 		success("OK");
517 	else
518 		fail("Fail");
519 
520 	write_settings(&default_settings);
521 
522 	madvise(p, page_size, MADV_DONTNEED);
523 	printf("Split huge PMD on MADV_DONTNEED...");
524 	if (!check_huge(p))
525 		success("OK");
526 	else
527 		fail("Fail");
528 	munmap(p, hpage_pmd_size);
529 }
530 
collapse_full(void)531 static void collapse_full(void)
532 {
533 	void *p;
534 
535 	p = alloc_mapping();
536 	fill_memory(p, 0, hpage_pmd_size);
537 	if (wait_for_scan("Collapse fully populated PTE table", p))
538 		fail("Timeout");
539 	else if (check_huge(p))
540 		success("OK");
541 	else
542 		fail("Fail");
543 	validate_memory(p, 0, hpage_pmd_size);
544 	munmap(p, hpage_pmd_size);
545 }
546 
collapse_empty(void)547 static void collapse_empty(void)
548 {
549 	void *p;
550 
551 	p = alloc_mapping();
552 	if (wait_for_scan("Do not collapse empty PTE table", p))
553 		fail("Timeout");
554 	else if (check_huge(p))
555 		fail("Fail");
556 	else
557 		success("OK");
558 	munmap(p, hpage_pmd_size);
559 }
560 
collapse_single_pte_entry(void)561 static void collapse_single_pte_entry(void)
562 {
563 	void *p;
564 
565 	p = alloc_mapping();
566 	fill_memory(p, 0, page_size);
567 	if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
568 		fail("Timeout");
569 	else if (check_huge(p))
570 		success("OK");
571 	else
572 		fail("Fail");
573 	validate_memory(p, 0, page_size);
574 	munmap(p, hpage_pmd_size);
575 }
576 
collapse_max_ptes_none(void)577 static void collapse_max_ptes_none(void)
578 {
579 	int max_ptes_none = hpage_pmd_nr / 2;
580 	struct settings settings = default_settings;
581 	void *p;
582 
583 	settings.khugepaged.max_ptes_none = max_ptes_none;
584 	write_settings(&settings);
585 
586 	p = alloc_mapping();
587 
588 	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
589 	if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
590 		fail("Timeout");
591 	else if (check_huge(p))
592 		fail("Fail");
593 	else
594 		success("OK");
595 	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
596 
597 	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
598 	if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
599 		fail("Timeout");
600 	else if (check_huge(p))
601 		success("OK");
602 	else
603 		fail("Fail");
604 	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
605 
606 	munmap(p, hpage_pmd_size);
607 	write_settings(&default_settings);
608 }
609 
collapse_swapin_single_pte(void)610 static void collapse_swapin_single_pte(void)
611 {
612 	void *p;
613 	p = alloc_mapping();
614 	fill_memory(p, 0, hpage_pmd_size);
615 
616 	printf("Swapout one page...");
617 	if (madvise(p, page_size, MADV_PAGEOUT)) {
618 		perror("madvise(MADV_PAGEOUT)");
619 		exit(EXIT_FAILURE);
620 	}
621 	if (check_swap(p, page_size)) {
622 		success("OK");
623 	} else {
624 		fail("Fail");
625 		goto out;
626 	}
627 
628 	if (wait_for_scan("Collapse with swapping in single PTE entry", p))
629 		fail("Timeout");
630 	else if (check_huge(p))
631 		success("OK");
632 	else
633 		fail("Fail");
634 	validate_memory(p, 0, hpage_pmd_size);
635 out:
636 	munmap(p, hpage_pmd_size);
637 }
638 
collapse_max_ptes_swap(void)639 static void collapse_max_ptes_swap(void)
640 {
641 	int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
642 	void *p;
643 
644 	p = alloc_mapping();
645 
646 	fill_memory(p, 0, hpage_pmd_size);
647 	printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
648 	if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
649 		perror("madvise(MADV_PAGEOUT)");
650 		exit(EXIT_FAILURE);
651 	}
652 	if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
653 		success("OK");
654 	} else {
655 		fail("Fail");
656 		goto out;
657 	}
658 
659 	if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
660 		fail("Timeout");
661 	else if (check_huge(p))
662 		fail("Fail");
663 	else
664 		success("OK");
665 	validate_memory(p, 0, hpage_pmd_size);
666 
667 	fill_memory(p, 0, hpage_pmd_size);
668 	printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
669 	if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
670 		perror("madvise(MADV_PAGEOUT)");
671 		exit(EXIT_FAILURE);
672 	}
673 	if (check_swap(p, max_ptes_swap * page_size)) {
674 		success("OK");
675 	} else {
676 		fail("Fail");
677 		goto out;
678 	}
679 
680 	if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
681 		fail("Timeout");
682 	else if (check_huge(p))
683 		success("OK");
684 	else
685 		fail("Fail");
686 	validate_memory(p, 0, hpage_pmd_size);
687 out:
688 	munmap(p, hpage_pmd_size);
689 }
690 
collapse_single_pte_entry_compound(void)691 static void collapse_single_pte_entry_compound(void)
692 {
693 	void *p;
694 
695 	p = alloc_mapping();
696 
697 	printf("Allocate huge page...");
698 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
699 	fill_memory(p, 0, hpage_pmd_size);
700 	if (check_huge(p))
701 		success("OK");
702 	else
703 		fail("Fail");
704 	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
705 
706 	printf("Split huge page leaving single PTE mapping compound page...");
707 	madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
708 	if (!check_huge(p))
709 		success("OK");
710 	else
711 		fail("Fail");
712 
713 	if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
714 		fail("Timeout");
715 	else if (check_huge(p))
716 		success("OK");
717 	else
718 		fail("Fail");
719 	validate_memory(p, 0, page_size);
720 	munmap(p, hpage_pmd_size);
721 }
722 
collapse_full_of_compound(void)723 static void collapse_full_of_compound(void)
724 {
725 	void *p;
726 
727 	p = alloc_mapping();
728 
729 	printf("Allocate huge page...");
730 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
731 	fill_memory(p, 0, hpage_pmd_size);
732 	if (check_huge(p))
733 		success("OK");
734 	else
735 		fail("Fail");
736 
737 	printf("Split huge page leaving single PTE page table full of compound pages...");
738 	madvise(p, page_size, MADV_NOHUGEPAGE);
739 	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
740 	if (!check_huge(p))
741 		success("OK");
742 	else
743 		fail("Fail");
744 
745 	if (wait_for_scan("Collapse PTE table full of compound pages", p))
746 		fail("Timeout");
747 	else if (check_huge(p))
748 		success("OK");
749 	else
750 		fail("Fail");
751 	validate_memory(p, 0, hpage_pmd_size);
752 	munmap(p, hpage_pmd_size);
753 }
754 
collapse_compound_extreme(void)755 static void collapse_compound_extreme(void)
756 {
757 	void *p;
758 	int i;
759 
760 	p = alloc_mapping();
761 	for (i = 0; i < hpage_pmd_nr; i++) {
762 		printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
763 				i + 1, hpage_pmd_nr);
764 
765 		madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
766 		fill_memory(BASE_ADDR, 0, hpage_pmd_size);
767 		if (!check_huge(BASE_ADDR)) {
768 			printf("Failed to allocate huge page\n");
769 			exit(EXIT_FAILURE);
770 		}
771 		madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
772 
773 		p = mremap(BASE_ADDR - i * page_size,
774 				i * page_size + hpage_pmd_size,
775 				(i + 1) * page_size,
776 				MREMAP_MAYMOVE | MREMAP_FIXED,
777 				BASE_ADDR + 2 * hpage_pmd_size);
778 		if (p == MAP_FAILED) {
779 			perror("mremap+unmap");
780 			exit(EXIT_FAILURE);
781 		}
782 
783 		p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
784 				(i + 1) * page_size,
785 				(i + 1) * page_size + hpage_pmd_size,
786 				MREMAP_MAYMOVE | MREMAP_FIXED,
787 				BASE_ADDR - (i + 1) * page_size);
788 		if (p == MAP_FAILED) {
789 			perror("mremap+alloc");
790 			exit(EXIT_FAILURE);
791 		}
792 	}
793 
794 	munmap(BASE_ADDR, hpage_pmd_size);
795 	fill_memory(p, 0, hpage_pmd_size);
796 	if (!check_huge(p))
797 		success("OK");
798 	else
799 		fail("Fail");
800 
801 	if (wait_for_scan("Collapse PTE table full of different compound pages", p))
802 		fail("Timeout");
803 	else if (check_huge(p))
804 		success("OK");
805 	else
806 		fail("Fail");
807 
808 	validate_memory(p, 0, hpage_pmd_size);
809 	munmap(p, hpage_pmd_size);
810 }
811 
collapse_fork(void)812 static void collapse_fork(void)
813 {
814 	int wstatus;
815 	void *p;
816 
817 	p = alloc_mapping();
818 
819 	printf("Allocate small page...");
820 	fill_memory(p, 0, page_size);
821 	if (!check_huge(p))
822 		success("OK");
823 	else
824 		fail("Fail");
825 
826 	printf("Share small page over fork()...");
827 	if (!fork()) {
828 		/* Do not touch settings on child exit */
829 		skip_settings_restore = true;
830 		exit_status = 0;
831 
832 		if (!check_huge(p))
833 			success("OK");
834 		else
835 			fail("Fail");
836 
837 		fill_memory(p, page_size, 2 * page_size);
838 
839 		if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
840 			fail("Timeout");
841 		else if (check_huge(p))
842 			success("OK");
843 		else
844 			fail("Fail");
845 
846 		validate_memory(p, 0, page_size);
847 		munmap(p, hpage_pmd_size);
848 		exit(exit_status);
849 	}
850 
851 	wait(&wstatus);
852 	exit_status += WEXITSTATUS(wstatus);
853 
854 	printf("Check if parent still has small page...");
855 	if (!check_huge(p))
856 		success("OK");
857 	else
858 		fail("Fail");
859 	validate_memory(p, 0, page_size);
860 	munmap(p, hpage_pmd_size);
861 }
862 
collapse_fork_compound(void)863 static void collapse_fork_compound(void)
864 {
865 	int wstatus;
866 	void *p;
867 
868 	p = alloc_mapping();
869 
870 	printf("Allocate huge page...");
871 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
872 	fill_memory(p, 0, hpage_pmd_size);
873 	if (check_huge(p))
874 		success("OK");
875 	else
876 		fail("Fail");
877 
878 	printf("Share huge page over fork()...");
879 	if (!fork()) {
880 		/* Do not touch settings on child exit */
881 		skip_settings_restore = true;
882 		exit_status = 0;
883 
884 		if (check_huge(p))
885 			success("OK");
886 		else
887 			fail("Fail");
888 
889 		printf("Split huge page PMD in child process...");
890 		madvise(p, page_size, MADV_NOHUGEPAGE);
891 		madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
892 		if (!check_huge(p))
893 			success("OK");
894 		else
895 			fail("Fail");
896 		fill_memory(p, 0, page_size);
897 
898 		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
899 		if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
900 			fail("Timeout");
901 		else if (check_huge(p))
902 			success("OK");
903 		else
904 			fail("Fail");
905 		write_num("khugepaged/max_ptes_shared",
906 				default_settings.khugepaged.max_ptes_shared);
907 
908 		validate_memory(p, 0, hpage_pmd_size);
909 		munmap(p, hpage_pmd_size);
910 		exit(exit_status);
911 	}
912 
913 	wait(&wstatus);
914 	exit_status += WEXITSTATUS(wstatus);
915 
916 	printf("Check if parent still has huge page...");
917 	if (check_huge(p))
918 		success("OK");
919 	else
920 		fail("Fail");
921 	validate_memory(p, 0, hpage_pmd_size);
922 	munmap(p, hpage_pmd_size);
923 }
924 
collapse_max_ptes_shared()925 static void collapse_max_ptes_shared()
926 {
927 	int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
928 	int wstatus;
929 	void *p;
930 
931 	p = alloc_mapping();
932 
933 	printf("Allocate huge page...");
934 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
935 	fill_memory(p, 0, hpage_pmd_size);
936 	if (check_huge(p))
937 		success("OK");
938 	else
939 		fail("Fail");
940 
941 	printf("Share huge page over fork()...");
942 	if (!fork()) {
943 		/* Do not touch settings on child exit */
944 		skip_settings_restore = true;
945 		exit_status = 0;
946 
947 		if (check_huge(p))
948 			success("OK");
949 		else
950 			fail("Fail");
951 
952 		printf("Trigger CoW on page %d of %d...",
953 				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
954 		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
955 		if (!check_huge(p))
956 			success("OK");
957 		else
958 			fail("Fail");
959 
960 		if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
961 			fail("Timeout");
962 		else if (!check_huge(p))
963 			success("OK");
964 		else
965 			fail("Fail");
966 
967 		printf("Trigger CoW on page %d of %d...",
968 				hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
969 		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
970 		if (!check_huge(p))
971 			success("OK");
972 		else
973 			fail("Fail");
974 
975 
976 		if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
977 			fail("Timeout");
978 		else if (check_huge(p))
979 			success("OK");
980 		else
981 			fail("Fail");
982 
983 		validate_memory(p, 0, hpage_pmd_size);
984 		munmap(p, hpage_pmd_size);
985 		exit(exit_status);
986 	}
987 
988 	wait(&wstatus);
989 	exit_status += WEXITSTATUS(wstatus);
990 
991 	printf("Check if parent still has huge page...");
992 	if (check_huge(p))
993 		success("OK");
994 	else
995 		fail("Fail");
996 	validate_memory(p, 0, hpage_pmd_size);
997 	munmap(p, hpage_pmd_size);
998 }
999 
main(void)1000 int main(void)
1001 {
1002 	setbuf(stdout, NULL);
1003 
1004 	page_size = getpagesize();
1005 	hpage_pmd_size = read_num("hpage_pmd_size");
1006 	hpage_pmd_nr = hpage_pmd_size / page_size;
1007 
1008 	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1009 	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1010 	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1011 	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1012 
1013 	save_settings();
1014 	adjust_settings();
1015 
1016 	alloc_at_fault();
1017 	collapse_full();
1018 	collapse_empty();
1019 	collapse_single_pte_entry();
1020 	collapse_max_ptes_none();
1021 	collapse_swapin_single_pte();
1022 	collapse_max_ptes_swap();
1023 	collapse_single_pte_entry_compound();
1024 	collapse_full_of_compound();
1025 	collapse_compound_extreme();
1026 	collapse_fork();
1027 	collapse_fork_compound();
1028 	collapse_max_ptes_shared();
1029 
1030 	restore_settings(0);
1031 }
1032