1 /*
2  * Copyright (c) 2020 Tobias Svehagen
3  * Copyright (c) 2023, Meta
4  * Copyright (c) 2024, Tenstorrent AI ULC
5  *
6  * SPDX-License-Identifier: Apache-2.0
7  */
8 
9 #include <zephyr/kernel.h>
10 #include <zephyr/net/socket.h>
11 #include <zephyr/posix/fcntl.h>
12 #include <zephyr/zvfs/eventfd.h>
13 #include <zephyr/sys/bitarray.h>
14 #include <zephyr/sys/fdtable.h>
15 #include <zephyr/sys/math_extras.h>
16 
17 #define ZVFS_EFD_IN_USE    0x1
18 #define ZVFS_EFD_FLAGS_SET (ZVFS_EFD_SEMAPHORE | ZVFS_EFD_NONBLOCK)
19 
20 struct zvfs_eventfd {
21 	struct k_poll_signal read_sig;
22 	struct k_poll_signal write_sig;
23 	struct k_spinlock lock;
24 	zvfs_eventfd_t cnt;
25 	int flags;
26 };
27 
28 static ssize_t zvfs_eventfd_rw_op(void *obj, void *buf, size_t sz,
29 			     int (*op)(struct zvfs_eventfd *efd, zvfs_eventfd_t *value));
30 
31 SYS_BITARRAY_DEFINE_STATIC(efds_bitarray, CONFIG_ZVFS_EVENTFD_MAX);
32 static struct zvfs_eventfd efds[CONFIG_ZVFS_EVENTFD_MAX];
33 static const struct fd_op_vtable zvfs_eventfd_fd_vtable;
34 
zvfs_eventfd_is_in_use(struct zvfs_eventfd * efd)35 static inline bool zvfs_eventfd_is_in_use(struct zvfs_eventfd *efd)
36 {
37 	return (efd->flags & ZVFS_EFD_IN_USE) != 0;
38 }
39 
zvfs_eventfd_is_semaphore(struct zvfs_eventfd * efd)40 static inline bool zvfs_eventfd_is_semaphore(struct zvfs_eventfd *efd)
41 {
42 	return (efd->flags & ZVFS_EFD_SEMAPHORE) != 0;
43 }
44 
zvfs_eventfd_is_blocking(struct zvfs_eventfd * efd)45 static inline bool zvfs_eventfd_is_blocking(struct zvfs_eventfd *efd)
46 {
47 	return (efd->flags & ZVFS_EFD_NONBLOCK) == 0;
48 }
49 
zvfs_eventfd_poll_prepare(struct zvfs_eventfd * efd,struct zsock_pollfd * pfd,struct k_poll_event ** pev,struct k_poll_event * pev_end)50 static int zvfs_eventfd_poll_prepare(struct zvfs_eventfd *efd,
51 				struct zsock_pollfd *pfd,
52 				struct k_poll_event **pev,
53 				struct k_poll_event *pev_end)
54 {
55 	if (pfd->events & ZSOCK_POLLIN) {
56 		if (*pev == pev_end) {
57 			errno = ENOMEM;
58 			return -1;
59 		}
60 
61 		(*pev)->obj = &efd->read_sig;
62 		(*pev)->type = K_POLL_TYPE_SIGNAL;
63 		(*pev)->mode = K_POLL_MODE_NOTIFY_ONLY;
64 		(*pev)->state = K_POLL_STATE_NOT_READY;
65 		(*pev)++;
66 	}
67 
68 	if (pfd->events & ZSOCK_POLLOUT) {
69 		if (*pev == pev_end) {
70 			errno = ENOMEM;
71 			return -1;
72 		}
73 
74 		(*pev)->obj = &efd->write_sig;
75 		(*pev)->type = K_POLL_TYPE_SIGNAL;
76 		(*pev)->mode = K_POLL_MODE_NOTIFY_ONLY;
77 		(*pev)->state = K_POLL_STATE_NOT_READY;
78 		(*pev)++;
79 	}
80 
81 	return 0;
82 }
83 
zvfs_eventfd_poll_update(struct zvfs_eventfd * efd,struct zsock_pollfd * pfd,struct k_poll_event ** pev)84 static int zvfs_eventfd_poll_update(struct zvfs_eventfd *efd,
85 			       struct zsock_pollfd *pfd,
86 			       struct k_poll_event **pev)
87 {
88 	if (pfd->events & ZSOCK_POLLIN) {
89 		pfd->revents |= ZSOCK_POLLIN * (efd->cnt > 0);
90 		(*pev)++;
91 	}
92 
93 	if (pfd->events & ZSOCK_POLLOUT) {
94 		pfd->revents |= ZSOCK_POLLOUT * (efd->cnt < UINT64_MAX - 1);
95 		(*pev)++;
96 	}
97 
98 	return 0;
99 }
100 
zvfs_eventfd_read_locked(struct zvfs_eventfd * efd,zvfs_eventfd_t * value)101 static int zvfs_eventfd_read_locked(struct zvfs_eventfd *efd, zvfs_eventfd_t *value)
102 {
103 	if (!zvfs_eventfd_is_in_use(efd)) {
104 		/* file descriptor has been closed */
105 		return -EBADF;
106 	}
107 
108 	if (efd->cnt == 0) {
109 		/* would block / try again */
110 		return -EAGAIN;
111 	}
112 
113 	/* successful read */
114 	if (zvfs_eventfd_is_semaphore(efd)) {
115 		*value = 1;
116 		--efd->cnt;
117 	} else {
118 		*value = efd->cnt;
119 		efd->cnt = 0;
120 	}
121 
122 	if (efd->cnt == 0) {
123 		k_poll_signal_reset(&efd->read_sig);
124 	}
125 
126 	k_poll_signal_raise(&efd->write_sig, 0);
127 
128 	return 0;
129 }
130 
zvfs_eventfd_write_locked(struct zvfs_eventfd * efd,zvfs_eventfd_t * value)131 static int zvfs_eventfd_write_locked(struct zvfs_eventfd *efd, zvfs_eventfd_t *value)
132 {
133 	zvfs_eventfd_t result;
134 
135 	if (!zvfs_eventfd_is_in_use(efd)) {
136 		/* file descriptor has been closed */
137 		return -EBADF;
138 	}
139 
140 	if (*value == UINT64_MAX) {
141 		/* not a permitted value */
142 		return -EINVAL;
143 	}
144 
145 	if (u64_add_overflow(efd->cnt, *value, &result) || result == UINT64_MAX) {
146 		/* would block / try again */
147 		return -EAGAIN;
148 	}
149 
150 	/* successful write */
151 	efd->cnt = result;
152 
153 	if (efd->cnt == (UINT64_MAX - 1)) {
154 		k_poll_signal_reset(&efd->write_sig);
155 	}
156 
157 	k_poll_signal_raise(&efd->read_sig, 0);
158 
159 	return 0;
160 }
161 
zvfs_eventfd_read_op(void * obj,void * buf,size_t sz)162 static ssize_t zvfs_eventfd_read_op(void *obj, void *buf, size_t sz)
163 {
164 	return zvfs_eventfd_rw_op(obj, buf, sz, zvfs_eventfd_read_locked);
165 }
166 
zvfs_eventfd_write_op(void * obj,const void * buf,size_t sz)167 static ssize_t zvfs_eventfd_write_op(void *obj, const void *buf, size_t sz)
168 {
169 	return zvfs_eventfd_rw_op(obj, (zvfs_eventfd_t *)buf, sz, zvfs_eventfd_write_locked);
170 }
171 
zvfs_eventfd_close_op(void * obj)172 static int zvfs_eventfd_close_op(void *obj)
173 {
174 	int ret;
175 	int err;
176 	k_spinlock_key_t key;
177 	struct k_mutex *lock = NULL;
178 	struct k_condvar *cond = NULL;
179 	struct zvfs_eventfd *efd = (struct zvfs_eventfd *)obj;
180 
181 	if (k_is_in_isr()) {
182 		/* not covered by the man page, but necessary in Zephyr */
183 		errno = EWOULDBLOCK;
184 		return -1;
185 	}
186 
187 	err = (int)zvfs_get_obj_lock_and_cond(obj, &zvfs_eventfd_fd_vtable, &lock, &cond);
188 	__ASSERT((bool)err, "zvfs_get_obj_lock_and_cond() failed");
189 	__ASSERT_NO_MSG(lock != NULL);
190 	__ASSERT_NO_MSG(cond != NULL);
191 
192 	err = k_mutex_lock(lock, K_FOREVER);
193 	__ASSERT(err == 0, "k_mutex_lock() failed: %d", err);
194 
195 	key = k_spin_lock(&efd->lock);
196 
197 	if (!zvfs_eventfd_is_in_use(efd)) {
198 		errno = EBADF;
199 		ret = -1;
200 		goto unlock;
201 	}
202 
203 	err = sys_bitarray_free(&efds_bitarray, 1, (struct zvfs_eventfd *)obj - efds);
204 	__ASSERT(err == 0, "sys_bitarray_free() failed: %d", err);
205 
206 	efd->flags = 0;
207 	efd->cnt = 0;
208 
209 	ret = 0;
210 
211 unlock:
212 	k_spin_unlock(&efd->lock, key);
213 	/* when closing an zvfs_eventfd, broadcast to all waiters */
214 	err = k_condvar_broadcast(cond);
215 	__ASSERT(err == 0, "k_condvar_broadcast() failed: %d", err);
216 	err = k_mutex_unlock(lock);
217 	__ASSERT(err == 0, "k_mutex_unlock() failed: %d", err);
218 
219 	return ret;
220 }
221 
zvfs_eventfd_ioctl_op(void * obj,unsigned int request,va_list args)222 static int zvfs_eventfd_ioctl_op(void *obj, unsigned int request, va_list args)
223 {
224 	int ret;
225 	k_spinlock_key_t key;
226 	struct zvfs_eventfd *efd = (struct zvfs_eventfd *)obj;
227 
228 	/* note: zsock_poll_internal() has already taken the mutex */
229 	key = k_spin_lock(&efd->lock);
230 
231 	if (!zvfs_eventfd_is_in_use(efd)) {
232 		errno = EBADF;
233 		ret = -1;
234 		goto unlock;
235 	}
236 
237 	switch (request) {
238 	case F_GETFL:
239 		ret = efd->flags & ZVFS_EFD_FLAGS_SET;
240 		break;
241 
242 	case F_SETFL: {
243 		int flags;
244 
245 		flags = va_arg(args, int);
246 
247 		if (flags & ~ZVFS_EFD_FLAGS_SET) {
248 			errno = EINVAL;
249 			ret = -1;
250 		} else {
251 			int prev_flags = efd->flags & ~ZVFS_EFD_FLAGS_SET;
252 
253 			efd->flags = flags | prev_flags;
254 			ret = 0;
255 		}
256 	} break;
257 
258 	case ZFD_IOCTL_POLL_PREPARE: {
259 		struct zsock_pollfd *pfd;
260 		struct k_poll_event **pev;
261 		struct k_poll_event *pev_end;
262 
263 		pfd = va_arg(args, struct zsock_pollfd *);
264 		pev = va_arg(args, struct k_poll_event **);
265 		pev_end = va_arg(args, struct k_poll_event *);
266 
267 		ret = zvfs_eventfd_poll_prepare(obj, pfd, pev, pev_end);
268 	} break;
269 
270 	case ZFD_IOCTL_POLL_UPDATE: {
271 		struct zsock_pollfd *pfd;
272 		struct k_poll_event **pev;
273 
274 		pfd = va_arg(args, struct zsock_pollfd *);
275 		pev = va_arg(args, struct k_poll_event **);
276 
277 		ret = zvfs_eventfd_poll_update(obj, pfd, pev);
278 	} break;
279 
280 	default:
281 		errno = EOPNOTSUPP;
282 		ret = -1;
283 		break;
284 	}
285 
286 unlock:
287 	k_spin_unlock(&efd->lock, key);
288 
289 	return ret;
290 }
291 
292 static const struct fd_op_vtable zvfs_eventfd_fd_vtable = {
293 	.read = zvfs_eventfd_read_op,
294 	.write = zvfs_eventfd_write_op,
295 	.close = zvfs_eventfd_close_op,
296 	.ioctl = zvfs_eventfd_ioctl_op,
297 };
298 
299 /* common to both zvfs_eventfd_read_op() and zvfs_eventfd_write_op() */
zvfs_eventfd_rw_op(void * obj,void * buf,size_t sz,int (* op)(struct zvfs_eventfd * efd,zvfs_eventfd_t * value))300 static ssize_t zvfs_eventfd_rw_op(void *obj, void *buf, size_t sz,
301 			     int (*op)(struct zvfs_eventfd *efd, zvfs_eventfd_t *value))
302 {
303 	int err;
304 	ssize_t ret;
305 	k_spinlock_key_t key;
306 	struct zvfs_eventfd *efd = obj;
307 	struct k_mutex *lock = NULL;
308 	struct k_condvar *cond = NULL;
309 
310 	if (sz < sizeof(zvfs_eventfd_t)) {
311 		errno = EINVAL;
312 		return -1;
313 	}
314 
315 	if (buf == NULL) {
316 		errno = EFAULT;
317 		return -1;
318 	}
319 
320 	key = k_spin_lock(&efd->lock);
321 
322 	if (!zvfs_eventfd_is_blocking(efd)) {
323 		/*
324 		 * Handle the non-blocking case entirely within this scope
325 		 */
326 		ret = op(efd, buf);
327 		if (ret < 0) {
328 			errno = -ret;
329 			ret = -1;
330 		} else {
331 			ret = sizeof(zvfs_eventfd_t);
332 		}
333 
334 		goto unlock_spin;
335 	}
336 
337 	/*
338 	 * Handle the blocking case below
339 	 */
340 	__ASSERT_NO_MSG(zvfs_eventfd_is_blocking(efd));
341 
342 	if (k_is_in_isr()) {
343 		/* not covered by the man page, but necessary in Zephyr */
344 		errno = EWOULDBLOCK;
345 		ret = -1;
346 		goto unlock_spin;
347 	}
348 
349 	err = (int)zvfs_get_obj_lock_and_cond(obj, &zvfs_eventfd_fd_vtable, &lock, &cond);
350 	__ASSERT((bool)err, "zvfs_get_obj_lock_and_cond() failed");
351 	__ASSERT_NO_MSG(lock != NULL);
352 	__ASSERT_NO_MSG(cond != NULL);
353 
354 	/* do not hold a spinlock when taking a mutex */
355 	k_spin_unlock(&efd->lock, key);
356 	err = k_mutex_lock(lock, K_FOREVER);
357 	__ASSERT(err == 0, "k_mutex_lock() failed: %d", err);
358 
359 	while (true) {
360 		/* retake the spinlock */
361 		key = k_spin_lock(&efd->lock);
362 
363 		ret = op(efd, buf);
364 		switch (ret) {
365 		case -EAGAIN:
366 			/* not an error in blocking mode. break and try again */
367 			break;
368 		case 0:
369 			/* success! */
370 			ret = sizeof(zvfs_eventfd_t);
371 			goto unlock_mutex;
372 		default:
373 			/* some other error */
374 			__ASSERT_NO_MSG(ret < 0);
375 			errno = -ret;
376 			ret = -1;
377 			goto unlock_mutex;
378 		}
379 
380 		/* do not hold a spinlock when taking a mutex */
381 		k_spin_unlock(&efd->lock, key);
382 
383 		/* wait for a write or close */
384 		err = k_condvar_wait(cond, lock, K_FOREVER);
385 		__ASSERT(err == 0, "k_condvar_wait() failed: %d", err);
386 	}
387 
388 unlock_mutex:
389 	k_spin_unlock(&efd->lock, key);
390 	/* only wake a single waiter */
391 	err = k_condvar_signal(cond);
392 	__ASSERT(err == 0, "k_condvar_signal() failed: %d", err);
393 	err = k_mutex_unlock(lock);
394 	__ASSERT(err == 0, "k_mutex_unlock() failed: %d", err);
395 	goto out;
396 
397 unlock_spin:
398 	k_spin_unlock(&efd->lock, key);
399 
400 out:
401 	return ret;
402 }
403 
404 /*
405  * Public-facing API
406  */
407 
zvfs_eventfd(unsigned int initval,int flags)408 int zvfs_eventfd(unsigned int initval, int flags)
409 {
410 	int fd = 1;
411 	size_t offset;
412 	struct zvfs_eventfd *efd = NULL;
413 
414 	if (flags & ~ZVFS_EFD_FLAGS_SET) {
415 		errno = EINVAL;
416 		return -1;
417 	}
418 
419 	if (sys_bitarray_alloc(&efds_bitarray, 1, &offset) < 0) {
420 		errno = ENOMEM;
421 		return -1;
422 	}
423 
424 	efd = &efds[offset];
425 
426 	fd = zvfs_reserve_fd();
427 	if (fd < 0) {
428 		sys_bitarray_free(&efds_bitarray, 1, offset);
429 		return -1;
430 	}
431 
432 	efd->flags = ZVFS_EFD_IN_USE | flags;
433 	efd->cnt = initval;
434 
435 	k_poll_signal_init(&efd->write_sig);
436 	k_poll_signal_init(&efd->read_sig);
437 
438 	if (initval != 0) {
439 		k_poll_signal_raise(&efd->read_sig, 0);
440 	}
441 
442 	k_poll_signal_raise(&efd->write_sig, 0);
443 
444 	zvfs_finalize_fd(fd, efd, &zvfs_eventfd_fd_vtable);
445 
446 	return fd;
447 }
448 
zvfs_eventfd_read(int fd,zvfs_eventfd_t * value)449 int zvfs_eventfd_read(int fd, zvfs_eventfd_t *value)
450 {
451 	int ret;
452 	void *obj;
453 
454 	obj = zvfs_get_fd_obj(fd, &zvfs_eventfd_fd_vtable, EBADF);
455 	if (obj == NULL) {
456 		return -1;
457 	}
458 
459 	ret = zvfs_eventfd_rw_op(obj, value, sizeof(zvfs_eventfd_t), zvfs_eventfd_read_locked);
460 	__ASSERT_NO_MSG(ret == -1 || ret == sizeof(zvfs_eventfd_t));
461 	if (ret < 0) {
462 		return -1;
463 	}
464 
465 	return 0;
466 }
467 
zvfs_eventfd_write(int fd,zvfs_eventfd_t value)468 int zvfs_eventfd_write(int fd, zvfs_eventfd_t value)
469 {
470 	int ret;
471 	void *obj;
472 
473 	obj = zvfs_get_fd_obj(fd, &zvfs_eventfd_fd_vtable, EBADF);
474 	if (obj == NULL) {
475 		return -1;
476 	}
477 
478 	ret = zvfs_eventfd_rw_op(obj, &value, sizeof(zvfs_eventfd_t), zvfs_eventfd_write_locked);
479 	__ASSERT_NO_MSG(ret == -1 || ret == sizeof(zvfs_eventfd_t));
480 	if (ret < 0) {
481 		return -1;
482 	}
483 
484 	return 0;
485 }
486