1 /*
2  * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 /*
17  * Fork and exec tiny 1 page executable which precisely controls its VM.
18  * Test /proc/$PID/maps
19  * Test /proc/$PID/smaps
20  * Test /proc/$PID/smaps_rollup
21  * Test /proc/$PID/statm
22  *
23  * FIXME require CONFIG_TMPFS which can be disabled
24  * FIXME test other values from "smaps"
25  * FIXME support other archs
26  */
27 #undef NDEBUG
28 #include <assert.h>
29 #include <errno.h>
30 #include <sched.h>
31 #include <signal.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/syscall.h>
44 #include <sys/uio.h>
45 #include <linux/kdev_t.h>
46 #include <sys/time.h>
47 #include <sys/resource.h>
48 
sys_execveat(int dirfd,const char * pathname,char ** argv,char ** envp,int flags)49 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
50 {
51 	return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
52 }
53 
make_private_tmp(void)54 static void make_private_tmp(void)
55 {
56 	if (unshare(CLONE_NEWNS) == -1) {
57 		if (errno == ENOSYS || errno == EPERM) {
58 			exit(4);
59 		}
60 		exit(1);
61 	}
62 	if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
63 		exit(1);
64 	}
65 	if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
66 		exit(1);
67 	}
68 }
69 
70 static pid_t pid = -1;
ate(void)71 static void ate(void)
72 {
73 	if (pid > 0) {
74 		kill(pid, SIGTERM);
75 	}
76 }
77 
78 struct elf64_hdr {
79 	uint8_t e_ident[16];
80 	uint16_t e_type;
81 	uint16_t e_machine;
82 	uint32_t e_version;
83 	uint64_t e_entry;
84 	uint64_t e_phoff;
85 	uint64_t e_shoff;
86 	uint32_t e_flags;
87 	uint16_t e_ehsize;
88 	uint16_t e_phentsize;
89 	uint16_t e_phnum;
90 	uint16_t e_shentsize;
91 	uint16_t e_shnum;
92 	uint16_t e_shstrndx;
93 };
94 
95 struct elf64_phdr {
96 	uint32_t p_type;
97 	uint32_t p_flags;
98 	uint64_t p_offset;
99 	uint64_t p_vaddr;
100 	uint64_t p_paddr;
101 	uint64_t p_filesz;
102 	uint64_t p_memsz;
103 	uint64_t p_align;
104 };
105 
106 #ifdef __x86_64__
107 #define PAGE_SIZE 4096
108 #define VADDR (1UL << 32)
109 #define MAPS_OFFSET 73
110 
111 #define syscall	0x0f, 0x05
112 #define mov_rdi(x)	\
113 	0x48, 0xbf,	\
114 	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
115 	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
116 
117 #define mov_rsi(x)	\
118 	0x48, 0xbe,	\
119 	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
120 	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
121 
122 #define mov_eax(x)	\
123 	0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
124 
125 static const uint8_t payload[] = {
126 	/* Casually unmap stack, vDSO and everything else. */
127 	/* munmap */
128 	mov_rdi(VADDR + 4096),
129 	mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
130 	mov_eax(11),
131 	syscall,
132 
133 	/* Ping parent. */
134 	/* write(0, &c, 1); */
135 	0x31, 0xff,					/* xor edi, edi */
136 	0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00,	/* lea rsi, [rip] */
137 	0xba, 0x01, 0x00, 0x00, 0x00,			/* mov edx, 1 */
138 	mov_eax(1),
139 	syscall,
140 
141 	/* 1: pause(); */
142 	mov_eax(34),
143 	syscall,
144 
145 	0xeb, 0xf7,	/* jmp 1b */
146 };
147 
make_exe(const uint8_t * payload,size_t len)148 static int make_exe(const uint8_t *payload, size_t len)
149 {
150 	struct elf64_hdr h;
151 	struct elf64_phdr ph;
152 
153 	struct iovec iov[3] = {
154 		{&h, sizeof(struct elf64_hdr)},
155 		{&ph, sizeof(struct elf64_phdr)},
156 		{(void *)payload, len},
157 	};
158 	int fd, fd1;
159 	char buf[64];
160 
161 	memset(&h, 0, sizeof(h));
162 	h.e_ident[0] = 0x7f;
163 	h.e_ident[1] = 'E';
164 	h.e_ident[2] = 'L';
165 	h.e_ident[3] = 'F';
166 	h.e_ident[4] = 2;
167 	h.e_ident[5] = 1;
168 	h.e_ident[6] = 1;
169 	h.e_ident[7] = 0;
170 	h.e_type = 2;
171 	h.e_machine = 0x3e;
172 	h.e_version = 1;
173 	h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
174 	h.e_phoff = sizeof(struct elf64_hdr);
175 	h.e_shoff = 0;
176 	h.e_flags = 0;
177 	h.e_ehsize = sizeof(struct elf64_hdr);
178 	h.e_phentsize = sizeof(struct elf64_phdr);
179 	h.e_phnum = 1;
180 	h.e_shentsize = 0;
181 	h.e_shnum = 0;
182 	h.e_shstrndx = 0;
183 
184 	memset(&ph, 0, sizeof(ph));
185 	ph.p_type = 1;
186 	ph.p_flags = (1<<2)|1;
187 	ph.p_offset = 0;
188 	ph.p_vaddr = VADDR;
189 	ph.p_paddr = 0;
190 	ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
191 	ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
192 	ph.p_align = 4096;
193 
194 	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
195 	if (fd == -1) {
196 		exit(1);
197 	}
198 
199 	if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
200 		exit(1);
201 	}
202 
203 	/* Avoid ETXTBSY on exec. */
204 	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
205 	fd1 = open(buf, O_RDONLY|O_CLOEXEC);
206 	close(fd);
207 
208 	return fd1;
209 }
210 #endif
211 
212 static bool g_vsyscall = false;
213 
214 static const char str_vsyscall[] =
215 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
216 
217 #ifdef __x86_64__
sigaction_SIGSEGV(int _,siginfo_t * __,void * ___)218 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
219 {
220 	_exit(1);
221 }
222 
223 /*
224  * vsyscall page can't be unmapped, probe it with memory load.
225  */
vsyscall(void)226 static void vsyscall(void)
227 {
228 	pid_t pid;
229 	int wstatus;
230 
231 	pid = fork();
232 	if (pid < 0) {
233 		fprintf(stderr, "fork, errno %d\n", errno);
234 		exit(1);
235 	}
236 	if (pid == 0) {
237 		struct rlimit rlim = {0, 0};
238 		(void)setrlimit(RLIMIT_CORE, &rlim);
239 
240 		/* Hide "segfault at ffffffffff600000" messages. */
241 		struct sigaction act;
242 		memset(&act, 0, sizeof(struct sigaction));
243 		act.sa_flags = SA_SIGINFO;
244 		act.sa_sigaction = sigaction_SIGSEGV;
245 		(void)sigaction(SIGSEGV, &act, NULL);
246 
247 		*(volatile int *)0xffffffffff600000UL;
248 		exit(0);
249 	}
250 	waitpid(pid, &wstatus, 0);
251 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
252 		g_vsyscall = true;
253 	}
254 }
255 
main(void)256 int main(void)
257 {
258 	int pipefd[2];
259 	int exec_fd;
260 
261 	vsyscall();
262 
263 	atexit(ate);
264 
265 	make_private_tmp();
266 
267 	/* Reserve fd 0 for 1-byte pipe ping from child. */
268 	close(0);
269 	if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
270 		return 1;
271 	}
272 
273 	exec_fd = make_exe(payload, sizeof(payload));
274 
275 	if (pipe(pipefd) == -1) {
276 		return 1;
277 	}
278 	if (dup2(pipefd[1], 0) != 0) {
279 		return 1;
280 	}
281 
282 	pid = fork();
283 	if (pid == -1) {
284 		return 1;
285 	}
286 	if (pid == 0) {
287 		sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
288 		return 1;
289 	}
290 
291 	char _;
292 	if (read(pipefd[0], &_, 1) != 1) {
293 		return 1;
294 	}
295 
296 	struct stat st;
297 	if (fstat(exec_fd, &st) == -1) {
298 		return 1;
299 	}
300 
301 	/* Generate "head -n1 /proc/$PID/maps" */
302 	char buf0[256];
303 	memset(buf0, ' ', sizeof(buf0));
304 	int len = snprintf(buf0, sizeof(buf0),
305 			"%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
306 			VADDR, VADDR + PAGE_SIZE,
307 			MAJOR(st.st_dev), MINOR(st.st_dev),
308 			(unsigned long long)st.st_ino);
309 	buf0[len] = ' ';
310 	snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
311 		 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
312 
313 	/* Test /proc/$PID/maps */
314 	{
315 		const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0);
316 		char buf[256];
317 		ssize_t rv;
318 		int fd;
319 
320 		snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
321 		fd = open(buf, O_RDONLY);
322 		if (fd == -1) {
323 			return 1;
324 		}
325 		rv = read(fd, buf, sizeof(buf));
326 		assert(rv == len);
327 		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
328 		if (g_vsyscall) {
329 			assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
330 		}
331 	}
332 
333 	/* Test /proc/$PID/smaps */
334 	{
335 		char buf[4096];
336 		ssize_t rv;
337 		int fd;
338 
339 		snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
340 		fd = open(buf, O_RDONLY);
341 		if (fd == -1) {
342 			return 1;
343 		}
344 		rv = read(fd, buf, sizeof(buf));
345 		assert(0 <= rv && rv <= sizeof(buf));
346 
347 		assert(rv >= strlen(buf0));
348 		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
349 
350 #define RSS1 "Rss:                   4 kB\n"
351 #define RSS2 "Rss:                   0 kB\n"
352 #define PSS1 "Pss:                   4 kB\n"
353 #define PSS2 "Pss:                   0 kB\n"
354 		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
355 		       memmem(buf, rv, RSS2, strlen(RSS2)));
356 		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
357 		       memmem(buf, rv, PSS2, strlen(PSS2)));
358 
359 		static const char *S[] = {
360 			"Size:                  4 kB\n",
361 			"KernelPageSize:        4 kB\n",
362 			"MMUPageSize:           4 kB\n",
363 			"Anonymous:             0 kB\n",
364 			"AnonHugePages:         0 kB\n",
365 			"Shared_Hugetlb:        0 kB\n",
366 			"Private_Hugetlb:       0 kB\n",
367 			"Locked:                0 kB\n",
368 		};
369 		int i;
370 
371 		for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
372 			assert(memmem(buf, rv, S[i], strlen(S[i])));
373 		}
374 
375 		if (g_vsyscall) {
376 			assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
377 		}
378 	}
379 
380 	/* Test /proc/$PID/smaps_rollup */
381 	{
382 		char bufr[256];
383 		memset(bufr, ' ', sizeof(bufr));
384 		len = snprintf(bufr, sizeof(bufr),
385 				"%08lx-%08lx ---p 00000000 00:00 0",
386 				VADDR, VADDR + PAGE_SIZE);
387 		bufr[len] = ' ';
388 		snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
389 			 "[rollup]\n");
390 
391 		char buf[1024];
392 		ssize_t rv;
393 		int fd;
394 
395 		snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
396 		fd = open(buf, O_RDONLY);
397 		if (fd == -1) {
398 			return 1;
399 		}
400 		rv = read(fd, buf, sizeof(buf));
401 		assert(0 <= rv && rv <= sizeof(buf));
402 
403 		assert(rv >= strlen(bufr));
404 		assert(memcmp(buf, bufr, strlen(bufr)) == 0);
405 
406 		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
407 		       memmem(buf, rv, RSS2, strlen(RSS2)));
408 		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
409 		       memmem(buf, rv, PSS2, strlen(PSS2)));
410 
411 		static const char *S[] = {
412 			"Anonymous:             0 kB\n",
413 			"AnonHugePages:         0 kB\n",
414 			"Shared_Hugetlb:        0 kB\n",
415 			"Private_Hugetlb:       0 kB\n",
416 			"Locked:                0 kB\n",
417 		};
418 		int i;
419 
420 		for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
421 			assert(memmem(buf, rv, S[i], strlen(S[i])));
422 		}
423 	}
424 
425 	/* Test /proc/$PID/statm */
426 	{
427 		char buf[64];
428 		ssize_t rv;
429 		int fd;
430 
431 		snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
432 		fd = open(buf, O_RDONLY);
433 		if (fd == -1) {
434 			return 1;
435 		}
436 		rv = read(fd, buf, sizeof(buf));
437 		assert(rv == 7 * 2);
438 
439 		assert(buf[0] == '1');	/* ->total_vm */
440 		assert(buf[1] == ' ');
441 		assert(buf[2] == '0' || buf[2] == '1');	/* rss */
442 		assert(buf[3] == ' ');
443 		assert(buf[4] == '0' || buf[2] == '1');	/* file rss */
444 		assert(buf[5] == ' ');
445 		assert(buf[6] == '1');	/* ELF executable segments */
446 		assert(buf[7] == ' ');
447 		assert(buf[8] == '0');
448 		assert(buf[9] == ' ');
449 		assert(buf[10] == '0');	/* ->data_vm + ->stack_vm */
450 		assert(buf[11] == ' ');
451 		assert(buf[12] == '0');
452 		assert(buf[13] == '\n');
453 	}
454 
455 	return 0;
456 }
457 #else
main(void)458 int main(void)
459 {
460 	return 4;
461 }
462 #endif
463