1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
4 *
5 * This test starts a transaction and triggers a signal, forcing a pagefault to
6 * happen when the kernel signal handling code touches the user signal stack.
7 *
8 * In order to avoid pre-faulting the signal stack memory and to force the
9 * pagefault to happen precisely in the kernel signal handling code, the
10 * pagefault handling is done in userspace using the userfaultfd facility.
11 *
12 * Further pagefaults are triggered by crafting the signal handler's ucontext
13 * to point to additional memory regions managed by the userfaultfd, so using
14 * the same mechanism used to avoid pre-faulting the signal stack memory.
15 *
16 * On failure (bug is present) kernel crashes or never returns control back to
17 * userspace. If bug is not present, tests completes almost immediately.
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <linux/userfaultfd.h>
24 #include <poll.h>
25 #include <unistd.h>
26 #include <sys/ioctl.h>
27 #include <sys/syscall.h>
28 #include <fcntl.h>
29 #include <sys/mman.h>
30 #include <pthread.h>
31 #include <signal.h>
32 #include <errno.h>
33
34 #include "tm.h"
35
36
37 #define UF_MEM_SIZE 655360 /* 10 x 64k pages */
38
39 /* Memory handled by userfaultfd */
40 static char *uf_mem;
41 static size_t uf_mem_offset = 0;
42
43 /*
44 * Data that will be copied into the faulting pages (instead of zero-filled
45 * pages). This is used to make the test more reliable and avoid segfaulting
46 * when we return from the signal handler. Since we are making the signal
47 * handler's ucontext point to newly allocated memory, when that memory is
48 * paged-in it will contain the expected content.
49 */
50 static char backing_mem[UF_MEM_SIZE];
51
52 static size_t pagesize;
53
54 /*
55 * Return a chunk of at least 'size' bytes of memory that will be handled by
56 * userfaultfd. If 'backing_data' is not NULL, its content will be save to
57 * 'backing_mem' and then copied into the faulting pages when the page fault
58 * is handled.
59 */
get_uf_mem(size_t size,void * backing_data)60 void *get_uf_mem(size_t size, void *backing_data)
61 {
62 void *ret;
63
64 if (uf_mem_offset + size > UF_MEM_SIZE) {
65 fprintf(stderr, "Requesting more uf_mem than expected!\n");
66 exit(EXIT_FAILURE);
67 }
68
69 ret = &uf_mem[uf_mem_offset];
70
71 /* Save the data that will be copied into the faulting page */
72 if (backing_data != NULL)
73 memcpy(&backing_mem[uf_mem_offset], backing_data, size);
74
75 /* Reserve the requested amount of uf_mem */
76 uf_mem_offset += size;
77 /* Keep uf_mem_offset aligned to the page size (round up) */
78 uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
79
80 return ret;
81 }
82
fault_handler_thread(void * arg)83 void *fault_handler_thread(void *arg)
84 {
85 struct uffd_msg msg; /* Data read from userfaultfd */
86 long uffd; /* userfaultfd file descriptor */
87 struct uffdio_copy uffdio_copy;
88 struct pollfd pollfd;
89 ssize_t nread, offset;
90
91 uffd = (long) arg;
92
93 for (;;) {
94 pollfd.fd = uffd;
95 pollfd.events = POLLIN;
96 if (poll(&pollfd, 1, -1) == -1) {
97 perror("poll() failed");
98 exit(EXIT_FAILURE);
99 }
100
101 nread = read(uffd, &msg, sizeof(msg));
102 if (nread == 0) {
103 fprintf(stderr, "read(): EOF on userfaultfd\n");
104 exit(EXIT_FAILURE);
105 }
106
107 if (nread == -1) {
108 perror("read() failed");
109 exit(EXIT_FAILURE);
110 }
111
112 /* We expect only one kind of event */
113 if (msg.event != UFFD_EVENT_PAGEFAULT) {
114 fprintf(stderr, "Unexpected event on userfaultfd\n");
115 exit(EXIT_FAILURE);
116 }
117
118 /*
119 * We need to handle page faults in units of pages(!).
120 * So, round faulting address down to page boundary.
121 */
122 uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
123
124 offset = (char *) uffdio_copy.dst - uf_mem;
125 uffdio_copy.src = (unsigned long) &backing_mem[offset];
126
127 uffdio_copy.len = pagesize;
128 uffdio_copy.mode = 0;
129 uffdio_copy.copy = 0;
130 if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
131 perror("ioctl-UFFDIO_COPY failed");
132 exit(EXIT_FAILURE);
133 }
134 }
135 }
136
setup_uf_mem(void)137 void setup_uf_mem(void)
138 {
139 long uffd; /* userfaultfd file descriptor */
140 pthread_t thr;
141 struct uffdio_api uffdio_api;
142 struct uffdio_register uffdio_register;
143 int ret;
144
145 pagesize = sysconf(_SC_PAGE_SIZE);
146
147 /* Create and enable userfaultfd object */
148 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
149 if (uffd == -1) {
150 perror("userfaultfd() failed");
151 exit(EXIT_FAILURE);
152 }
153 uffdio_api.api = UFFD_API;
154 uffdio_api.features = 0;
155 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
156 perror("ioctl-UFFDIO_API failed");
157 exit(EXIT_FAILURE);
158 }
159
160 /*
161 * Create a private anonymous mapping. The memory will be demand-zero
162 * paged, that is, not yet allocated. When we actually touch the memory
163 * the related page will be allocated via the userfaultfd mechanism.
164 */
165 uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
166 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
167 if (uf_mem == MAP_FAILED) {
168 perror("mmap() failed");
169 exit(EXIT_FAILURE);
170 }
171
172 /*
173 * Register the memory range of the mapping we've just mapped to be
174 * handled by the userfaultfd object. In 'mode' we request to track
175 * missing pages (i.e. pages that have not yet been faulted-in).
176 */
177 uffdio_register.range.start = (unsigned long) uf_mem;
178 uffdio_register.range.len = UF_MEM_SIZE;
179 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
180 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
181 perror("ioctl-UFFDIO_REGISTER");
182 exit(EXIT_FAILURE);
183 }
184
185 /* Create a thread that will process the userfaultfd events */
186 ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
187 if (ret != 0) {
188 fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
189 exit(EXIT_FAILURE);
190 }
191 }
192
193 /*
194 * Assumption: the signal was delivered while userspace was in transactional or
195 * suspended state, i.e. uc->uc_link != NULL.
196 */
signal_handler(int signo,siginfo_t * si,void * uc)197 void signal_handler(int signo, siginfo_t *si, void *uc)
198 {
199 ucontext_t *ucp = uc;
200
201 /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
202 ucp->uc_link->uc_mcontext.regs->nip += 4;
203
204 ucp->uc_mcontext.v_regs =
205 get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
206
207 ucp->uc_link->uc_mcontext.v_regs =
208 get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
209
210 ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
211 }
212
have_userfaultfd(void)213 bool have_userfaultfd(void)
214 {
215 long rc;
216
217 errno = 0;
218 rc = syscall(__NR_userfaultfd, -1);
219
220 return rc == 0 || errno != ENOSYS;
221 }
222
tm_signal_pagefault(void)223 int tm_signal_pagefault(void)
224 {
225 struct sigaction sa;
226 stack_t ss;
227
228 SKIP_IF(!have_htm());
229 SKIP_IF(!have_userfaultfd());
230
231 setup_uf_mem();
232
233 /*
234 * Set an alternative stack that will generate a page fault when the
235 * signal is raised. The page fault will be treated via userfaultfd,
236 * i.e. via fault_handler_thread.
237 */
238 ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
239 ss.ss_size = SIGSTKSZ;
240 ss.ss_flags = 0;
241 if (sigaltstack(&ss, NULL) == -1) {
242 perror("sigaltstack() failed");
243 exit(EXIT_FAILURE);
244 }
245
246 sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
247 sa.sa_sigaction = signal_handler;
248 if (sigaction(SIGTRAP, &sa, NULL) == -1) {
249 perror("sigaction() failed");
250 exit(EXIT_FAILURE);
251 }
252
253 /* Trigger a SIGTRAP in transactional state */
254 asm __volatile__(
255 "tbegin.;"
256 "beq 1f;"
257 "trap;"
258 "1: ;"
259 : : : "memory");
260
261 /* Trigger a SIGTRAP in suspended state */
262 asm __volatile__(
263 "tbegin.;"
264 "beq 1f;"
265 "tsuspend.;"
266 "trap;"
267 "tresume.;"
268 "1: ;"
269 : : : "memory");
270
271 return EXIT_SUCCESS;
272 }
273
main(int argc,char ** argv)274 int main(int argc, char **argv)
275 {
276 /*
277 * Depending on kernel config, the TM Bad Thing might not result in a
278 * crash, instead the kernel never returns control back to userspace, so
279 * set a tight timeout. If the test passes it completes almost
280 * immediately.
281 */
282 test_harness_set_timeout(2);
283 return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
284 }
285