1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hugepage-madvise:
4  *
5  * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
6  * on hugetlb mappings.
7  *
8  * Before running this test, make sure the administrator has pre-allocated
9  * at least MIN_FREE_PAGES hugetlb pages and they are free.  In addition,
10  * the test takes an argument that is the path to a file in a hugetlbfs
11  * filesystem.  Therefore, a hugetlbfs filesystem must be mounted on some
12  * directory.
13  */
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <sys/mman.h>
19 #define __USE_GNU
20 #include <fcntl.h>
21 
22 #define USAGE	"USAGE: %s <hugepagefile_name>\n"
23 #define MIN_FREE_PAGES	20
24 #define NR_HUGE_PAGES	10	/* common number of pages to map/allocate */
25 
26 #define validate_free_pages(exp_free)					\
27 	do {								\
28 		int fhp = get_free_hugepages();				\
29 		if (fhp != (exp_free)) {				\
30 			printf("Unexpected number of free huge "	\
31 				"pages line %d\n", __LINE__);		\
32 			exit(1);					\
33 		}							\
34 	} while (0)
35 
36 unsigned long huge_page_size;
37 unsigned long base_page_size;
38 
39 /*
40  * default_huge_page_size copied from mlock2-tests.c
41  */
default_huge_page_size(void)42 unsigned long default_huge_page_size(void)
43 {
44 	unsigned long hps = 0;
45 	char *line = NULL;
46 	size_t linelen = 0;
47 	FILE *f = fopen("/proc/meminfo", "r");
48 
49 	if (!f)
50 		return 0;
51 	while (getline(&line, &linelen, f) > 0) {
52 		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
53 			hps <<= 10;
54 			break;
55 		}
56 	}
57 
58 	free(line);
59 	fclose(f);
60 	return hps;
61 }
62 
get_free_hugepages(void)63 unsigned long get_free_hugepages(void)
64 {
65 	unsigned long fhp = 0;
66 	char *line = NULL;
67 	size_t linelen = 0;
68 	FILE *f = fopen("/proc/meminfo", "r");
69 
70 	if (!f)
71 		return fhp;
72 	while (getline(&line, &linelen, f) > 0) {
73 		if (sscanf(line, "HugePages_Free:      %lu", &fhp) == 1)
74 			break;
75 	}
76 
77 	free(line);
78 	fclose(f);
79 	return fhp;
80 }
81 
write_fault_pages(void * addr,unsigned long nr_pages)82 void write_fault_pages(void *addr, unsigned long nr_pages)
83 {
84 	unsigned long i;
85 
86 	for (i = 0; i < nr_pages; i++)
87 		*((unsigned long *)(addr + (i * huge_page_size))) = i;
88 }
89 
read_fault_pages(void * addr,unsigned long nr_pages)90 void read_fault_pages(void *addr, unsigned long nr_pages)
91 {
92 	unsigned long dummy = 0;
93 	unsigned long i;
94 
95 	for (i = 0; i < nr_pages; i++)
96 		dummy += *((unsigned long *)(addr + (i * huge_page_size)));
97 }
98 
main(int argc,char ** argv)99 int main(int argc, char **argv)
100 {
101 	unsigned long free_hugepages;
102 	void *addr, *addr2;
103 	int fd;
104 	int ret;
105 
106 	if (argc != 2) {
107 		printf(USAGE, argv[0]);
108 		exit(1);
109 	}
110 
111 	huge_page_size = default_huge_page_size();
112 	if (!huge_page_size) {
113 		printf("Unable to determine huge page size, exiting!\n");
114 		exit(1);
115 	}
116 	base_page_size = sysconf(_SC_PAGE_SIZE);
117 	if (!huge_page_size) {
118 		printf("Unable to determine base page size, exiting!\n");
119 		exit(1);
120 	}
121 
122 	free_hugepages = get_free_hugepages();
123 	if (free_hugepages < MIN_FREE_PAGES) {
124 		printf("Not enough free huge pages to test, exiting!\n");
125 		exit(1);
126 	}
127 
128 	fd = open(argv[1], O_CREAT | O_RDWR, 0755);
129 	if (fd < 0) {
130 		perror("Open failed");
131 		exit(1);
132 	}
133 
134 	/*
135 	 * Test validity of MADV_DONTNEED addr and length arguments.  mmap
136 	 * size is NR_HUGE_PAGES + 2.  One page at the beginning and end of
137 	 * the mapping will be unmapped so we KNOW there is nothing mapped
138 	 * there.
139 	 */
140 	addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
141 			PROT_READ | PROT_WRITE,
142 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
143 			-1, 0);
144 	if (addr == MAP_FAILED) {
145 		perror("mmap");
146 		exit(1);
147 	}
148 	if (munmap(addr, huge_page_size) ||
149 			munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
150 				huge_page_size)) {
151 		perror("munmap");
152 		exit(1);
153 	}
154 	addr = addr + huge_page_size;
155 
156 	write_fault_pages(addr, NR_HUGE_PAGES);
157 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
158 
159 	/* addr before mapping should fail */
160 	ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
161 		MADV_DONTNEED);
162 	if (!ret) {
163 		printf("Unexpected success of madvise call with invalid addr line %d\n",
164 				__LINE__);
165 			exit(1);
166 	}
167 
168 	/* addr + length after mapping should fail */
169 	ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
170 		MADV_DONTNEED);
171 	if (!ret) {
172 		printf("Unexpected success of madvise call with invalid length line %d\n",
173 				__LINE__);
174 			exit(1);
175 	}
176 
177 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
178 
179 	/*
180 	 * Test alignment of MADV_DONTNEED addr and length arguments
181 	 */
182 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
183 			PROT_READ | PROT_WRITE,
184 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
185 			-1, 0);
186 	if (addr == MAP_FAILED) {
187 		perror("mmap");
188 		exit(1);
189 	}
190 	write_fault_pages(addr, NR_HUGE_PAGES);
191 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
192 
193 	/* addr is not huge page size aligned and should fail */
194 	ret = madvise(addr + base_page_size,
195 			NR_HUGE_PAGES * huge_page_size - base_page_size,
196 			MADV_DONTNEED);
197 	if (!ret) {
198 		printf("Unexpected success of madvise call with unaligned start address %d\n",
199 				__LINE__);
200 			exit(1);
201 	}
202 
203 	/* addr + length should be aligned up to huge page size */
204 	if (madvise(addr,
205 			((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
206 			MADV_DONTNEED)) {
207 		perror("madvise");
208 		exit(1);
209 	}
210 
211 	/* should free all pages in mapping */
212 	validate_free_pages(free_hugepages);
213 
214 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
215 
216 	/*
217 	 * Test MADV_DONTNEED on anonymous private mapping
218 	 */
219 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
220 			PROT_READ | PROT_WRITE,
221 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
222 			-1, 0);
223 	if (addr == MAP_FAILED) {
224 		perror("mmap");
225 		exit(1);
226 	}
227 	write_fault_pages(addr, NR_HUGE_PAGES);
228 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
229 
230 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
231 		perror("madvise");
232 		exit(1);
233 	}
234 
235 	/* should free all pages in mapping */
236 	validate_free_pages(free_hugepages);
237 
238 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
239 
240 	/*
241 	 * Test MADV_DONTNEED on private mapping of hugetlb file
242 	 */
243 	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
244 		perror("fallocate");
245 		exit(1);
246 	}
247 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
248 
249 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
250 			PROT_READ | PROT_WRITE,
251 			MAP_PRIVATE, fd, 0);
252 	if (addr == MAP_FAILED) {
253 		perror("mmap");
254 		exit(1);
255 	}
256 
257 	/* read should not consume any pages */
258 	read_fault_pages(addr, NR_HUGE_PAGES);
259 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
260 
261 	/* madvise should not free any pages */
262 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
263 		perror("madvise");
264 		exit(1);
265 	}
266 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
267 
268 	/* writes should allocate private pages */
269 	write_fault_pages(addr, NR_HUGE_PAGES);
270 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
271 
272 	/* madvise should free private pages */
273 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
274 		perror("madvise");
275 		exit(1);
276 	}
277 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
278 
279 	/* writes should allocate private pages */
280 	write_fault_pages(addr, NR_HUGE_PAGES);
281 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
282 
283 	/*
284 	 * The fallocate below certainly should free the pages associated
285 	 * with the file.  However, pages in the private mapping are also
286 	 * freed.  This is not the 'correct' behavior, but is expected
287 	 * because this is how it has worked since the initial hugetlb
288 	 * implementation.
289 	 */
290 	if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
291 					0, NR_HUGE_PAGES * huge_page_size)) {
292 		perror("fallocate");
293 		exit(1);
294 	}
295 	validate_free_pages(free_hugepages);
296 
297 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
298 
299 	/*
300 	 * Test MADV_DONTNEED on shared mapping of hugetlb file
301 	 */
302 	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
303 		perror("fallocate");
304 		exit(1);
305 	}
306 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
307 
308 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
309 			PROT_READ | PROT_WRITE,
310 			MAP_SHARED, fd, 0);
311 	if (addr == MAP_FAILED) {
312 		perror("mmap");
313 		exit(1);
314 	}
315 
316 	/* write should not consume any pages */
317 	write_fault_pages(addr, NR_HUGE_PAGES);
318 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
319 
320 	/* madvise should not free any pages */
321 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
322 		perror("madvise");
323 		exit(1);
324 	}
325 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
326 
327 	/*
328 	 * Test MADV_REMOVE on shared mapping of hugetlb file
329 	 *
330 	 * madvise is same as hole punch and should free all pages.
331 	 */
332 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
333 		perror("madvise");
334 		exit(1);
335 	}
336 	validate_free_pages(free_hugepages);
337 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
338 
339 	/*
340 	 * Test MADV_REMOVE on shared and private mapping of hugetlb file
341 	 */
342 	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
343 		perror("fallocate");
344 		exit(1);
345 	}
346 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
347 
348 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
349 			PROT_READ | PROT_WRITE,
350 			MAP_SHARED, fd, 0);
351 	if (addr == MAP_FAILED) {
352 		perror("mmap");
353 		exit(1);
354 	}
355 
356 	/* shared write should not consume any additional pages */
357 	write_fault_pages(addr, NR_HUGE_PAGES);
358 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
359 
360 	addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
361 			PROT_READ | PROT_WRITE,
362 			MAP_PRIVATE, fd, 0);
363 	if (addr2 == MAP_FAILED) {
364 		perror("mmap");
365 		exit(1);
366 	}
367 
368 	/* private read should not consume any pages */
369 	read_fault_pages(addr2, NR_HUGE_PAGES);
370 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
371 
372 	/* private write should consume additional pages */
373 	write_fault_pages(addr2, NR_HUGE_PAGES);
374 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
375 
376 	/* madvise of shared mapping should not free any pages */
377 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
378 		perror("madvise");
379 		exit(1);
380 	}
381 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
382 
383 	/* madvise of private mapping should free private pages */
384 	if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
385 		perror("madvise");
386 		exit(1);
387 	}
388 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
389 
390 	/* private write should consume additional pages again */
391 	write_fault_pages(addr2, NR_HUGE_PAGES);
392 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
393 
394 	/*
395 	 * madvise should free both file and private pages although this is
396 	 * not correct.  private pages should not be freed, but this is
397 	 * expected.  See comment associated with FALLOC_FL_PUNCH_HOLE call.
398 	 */
399 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
400 		perror("madvise");
401 		exit(1);
402 	}
403 	validate_free_pages(free_hugepages);
404 
405 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
406 	(void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
407 
408 	close(fd);
409 	unlink(argv[1]);
410 	return 0;
411 }
412