1 /*
2 * Copyright (c) 2020 Tobias Svehagen
3 * Copyright (c) 2023, Meta
4 * Copyright (c) 2024, Tenstorrent AI ULC
5 *
6 * SPDX-License-Identifier: Apache-2.0
7 */
8
9 #include <zephyr/kernel.h>
10 #include <zephyr/net/socket.h>
11 #include <zephyr/posix/fcntl.h>
12 #include <zephyr/zvfs/eventfd.h>
13 #include <zephyr/sys/bitarray.h>
14 #include <zephyr/sys/fdtable.h>
15 #include <zephyr/sys/math_extras.h>
16
17 #define ZVFS_EFD_IN_USE 0x1
18 #define ZVFS_EFD_FLAGS_SET (ZVFS_EFD_SEMAPHORE | ZVFS_EFD_NONBLOCK)
19
20 struct zvfs_eventfd {
21 struct k_poll_signal read_sig;
22 struct k_poll_signal write_sig;
23 struct k_spinlock lock;
24 zvfs_eventfd_t cnt;
25 int flags;
26 };
27
28 static ssize_t zvfs_eventfd_rw_op(void *obj, void *buf, size_t sz,
29 int (*op)(struct zvfs_eventfd *efd, zvfs_eventfd_t *value));
30
31 SYS_BITARRAY_DEFINE_STATIC(efds_bitarray, CONFIG_ZVFS_EVENTFD_MAX);
32 static struct zvfs_eventfd efds[CONFIG_ZVFS_EVENTFD_MAX];
33 static const struct fd_op_vtable zvfs_eventfd_fd_vtable;
34
zvfs_eventfd_is_in_use(struct zvfs_eventfd * efd)35 static inline bool zvfs_eventfd_is_in_use(struct zvfs_eventfd *efd)
36 {
37 return (efd->flags & ZVFS_EFD_IN_USE) != 0;
38 }
39
zvfs_eventfd_is_semaphore(struct zvfs_eventfd * efd)40 static inline bool zvfs_eventfd_is_semaphore(struct zvfs_eventfd *efd)
41 {
42 return (efd->flags & ZVFS_EFD_SEMAPHORE) != 0;
43 }
44
zvfs_eventfd_is_blocking(struct zvfs_eventfd * efd)45 static inline bool zvfs_eventfd_is_blocking(struct zvfs_eventfd *efd)
46 {
47 return (efd->flags & ZVFS_EFD_NONBLOCK) == 0;
48 }
49
zvfs_eventfd_poll_prepare(struct zvfs_eventfd * efd,struct zsock_pollfd * pfd,struct k_poll_event ** pev,struct k_poll_event * pev_end)50 static int zvfs_eventfd_poll_prepare(struct zvfs_eventfd *efd,
51 struct zsock_pollfd *pfd,
52 struct k_poll_event **pev,
53 struct k_poll_event *pev_end)
54 {
55 if (pfd->events & ZSOCK_POLLIN) {
56 if (*pev == pev_end) {
57 errno = ENOMEM;
58 return -1;
59 }
60
61 (*pev)->obj = &efd->read_sig;
62 (*pev)->type = K_POLL_TYPE_SIGNAL;
63 (*pev)->mode = K_POLL_MODE_NOTIFY_ONLY;
64 (*pev)->state = K_POLL_STATE_NOT_READY;
65 (*pev)++;
66 }
67
68 if (pfd->events & ZSOCK_POLLOUT) {
69 if (*pev == pev_end) {
70 errno = ENOMEM;
71 return -1;
72 }
73
74 (*pev)->obj = &efd->write_sig;
75 (*pev)->type = K_POLL_TYPE_SIGNAL;
76 (*pev)->mode = K_POLL_MODE_NOTIFY_ONLY;
77 (*pev)->state = K_POLL_STATE_NOT_READY;
78 (*pev)++;
79 }
80
81 return 0;
82 }
83
zvfs_eventfd_poll_update(struct zvfs_eventfd * efd,struct zsock_pollfd * pfd,struct k_poll_event ** pev)84 static int zvfs_eventfd_poll_update(struct zvfs_eventfd *efd,
85 struct zsock_pollfd *pfd,
86 struct k_poll_event **pev)
87 {
88 if (pfd->events & ZSOCK_POLLIN) {
89 pfd->revents |= ZSOCK_POLLIN * (efd->cnt > 0);
90 (*pev)++;
91 }
92
93 if (pfd->events & ZSOCK_POLLOUT) {
94 pfd->revents |= ZSOCK_POLLOUT * (efd->cnt < UINT64_MAX - 1);
95 (*pev)++;
96 }
97
98 return 0;
99 }
100
zvfs_eventfd_read_locked(struct zvfs_eventfd * efd,zvfs_eventfd_t * value)101 static int zvfs_eventfd_read_locked(struct zvfs_eventfd *efd, zvfs_eventfd_t *value)
102 {
103 if (!zvfs_eventfd_is_in_use(efd)) {
104 /* file descriptor has been closed */
105 return -EBADF;
106 }
107
108 if (efd->cnt == 0) {
109 /* would block / try again */
110 return -EAGAIN;
111 }
112
113 /* successful read */
114 if (zvfs_eventfd_is_semaphore(efd)) {
115 *value = 1;
116 --efd->cnt;
117 } else {
118 *value = efd->cnt;
119 efd->cnt = 0;
120 }
121
122 if (efd->cnt == 0) {
123 k_poll_signal_reset(&efd->read_sig);
124 }
125
126 k_poll_signal_raise(&efd->write_sig, 0);
127
128 return 0;
129 }
130
zvfs_eventfd_write_locked(struct zvfs_eventfd * efd,zvfs_eventfd_t * value)131 static int zvfs_eventfd_write_locked(struct zvfs_eventfd *efd, zvfs_eventfd_t *value)
132 {
133 zvfs_eventfd_t result;
134
135 if (!zvfs_eventfd_is_in_use(efd)) {
136 /* file descriptor has been closed */
137 return -EBADF;
138 }
139
140 if (*value == UINT64_MAX) {
141 /* not a permitted value */
142 return -EINVAL;
143 }
144
145 if (u64_add_overflow(efd->cnt, *value, &result) || result == UINT64_MAX) {
146 /* would block / try again */
147 return -EAGAIN;
148 }
149
150 /* successful write */
151 efd->cnt = result;
152
153 if (efd->cnt == (UINT64_MAX - 1)) {
154 k_poll_signal_reset(&efd->write_sig);
155 }
156
157 k_poll_signal_raise(&efd->read_sig, 0);
158
159 return 0;
160 }
161
zvfs_eventfd_read_op(void * obj,void * buf,size_t sz)162 static ssize_t zvfs_eventfd_read_op(void *obj, void *buf, size_t sz)
163 {
164 return zvfs_eventfd_rw_op(obj, buf, sz, zvfs_eventfd_read_locked);
165 }
166
zvfs_eventfd_write_op(void * obj,const void * buf,size_t sz)167 static ssize_t zvfs_eventfd_write_op(void *obj, const void *buf, size_t sz)
168 {
169 return zvfs_eventfd_rw_op(obj, (zvfs_eventfd_t *)buf, sz, zvfs_eventfd_write_locked);
170 }
171
zvfs_eventfd_close_op(void * obj)172 static int zvfs_eventfd_close_op(void *obj)
173 {
174 int ret;
175 int err;
176 k_spinlock_key_t key;
177 struct k_mutex *lock = NULL;
178 struct k_condvar *cond = NULL;
179 struct zvfs_eventfd *efd = (struct zvfs_eventfd *)obj;
180
181 if (k_is_in_isr()) {
182 /* not covered by the man page, but necessary in Zephyr */
183 errno = EWOULDBLOCK;
184 return -1;
185 }
186
187 err = (int)zvfs_get_obj_lock_and_cond(obj, &zvfs_eventfd_fd_vtable, &lock, &cond);
188 __ASSERT((bool)err, "zvfs_get_obj_lock_and_cond() failed");
189 __ASSERT_NO_MSG(lock != NULL);
190 __ASSERT_NO_MSG(cond != NULL);
191
192 err = k_mutex_lock(lock, K_FOREVER);
193 __ASSERT(err == 0, "k_mutex_lock() failed: %d", err);
194
195 key = k_spin_lock(&efd->lock);
196
197 if (!zvfs_eventfd_is_in_use(efd)) {
198 errno = EBADF;
199 ret = -1;
200 goto unlock;
201 }
202
203 err = sys_bitarray_free(&efds_bitarray, 1, (struct zvfs_eventfd *)obj - efds);
204 __ASSERT(err == 0, "sys_bitarray_free() failed: %d", err);
205
206 efd->flags = 0;
207 efd->cnt = 0;
208
209 ret = 0;
210
211 unlock:
212 k_spin_unlock(&efd->lock, key);
213 /* when closing an zvfs_eventfd, broadcast to all waiters */
214 err = k_condvar_broadcast(cond);
215 __ASSERT(err == 0, "k_condvar_broadcast() failed: %d", err);
216 err = k_mutex_unlock(lock);
217 __ASSERT(err == 0, "k_mutex_unlock() failed: %d", err);
218
219 return ret;
220 }
221
zvfs_eventfd_ioctl_op(void * obj,unsigned int request,va_list args)222 static int zvfs_eventfd_ioctl_op(void *obj, unsigned int request, va_list args)
223 {
224 int ret;
225 k_spinlock_key_t key;
226 struct zvfs_eventfd *efd = (struct zvfs_eventfd *)obj;
227
228 /* note: zsock_poll_internal() has already taken the mutex */
229 key = k_spin_lock(&efd->lock);
230
231 if (!zvfs_eventfd_is_in_use(efd)) {
232 errno = EBADF;
233 ret = -1;
234 goto unlock;
235 }
236
237 switch (request) {
238 case F_GETFL:
239 ret = efd->flags & ZVFS_EFD_FLAGS_SET;
240 break;
241
242 case F_SETFL: {
243 int flags;
244
245 flags = va_arg(args, int);
246
247 if (flags & ~ZVFS_EFD_FLAGS_SET) {
248 errno = EINVAL;
249 ret = -1;
250 } else {
251 int prev_flags = efd->flags & ~ZVFS_EFD_FLAGS_SET;
252
253 efd->flags = flags | prev_flags;
254 ret = 0;
255 }
256 } break;
257
258 case ZFD_IOCTL_POLL_PREPARE: {
259 struct zsock_pollfd *pfd;
260 struct k_poll_event **pev;
261 struct k_poll_event *pev_end;
262
263 pfd = va_arg(args, struct zsock_pollfd *);
264 pev = va_arg(args, struct k_poll_event **);
265 pev_end = va_arg(args, struct k_poll_event *);
266
267 ret = zvfs_eventfd_poll_prepare(obj, pfd, pev, pev_end);
268 } break;
269
270 case ZFD_IOCTL_POLL_UPDATE: {
271 struct zsock_pollfd *pfd;
272 struct k_poll_event **pev;
273
274 pfd = va_arg(args, struct zsock_pollfd *);
275 pev = va_arg(args, struct k_poll_event **);
276
277 ret = zvfs_eventfd_poll_update(obj, pfd, pev);
278 } break;
279
280 default:
281 errno = EOPNOTSUPP;
282 ret = -1;
283 break;
284 }
285
286 unlock:
287 k_spin_unlock(&efd->lock, key);
288
289 return ret;
290 }
291
292 static const struct fd_op_vtable zvfs_eventfd_fd_vtable = {
293 .read = zvfs_eventfd_read_op,
294 .write = zvfs_eventfd_write_op,
295 .close = zvfs_eventfd_close_op,
296 .ioctl = zvfs_eventfd_ioctl_op,
297 };
298
299 /* common to both zvfs_eventfd_read_op() and zvfs_eventfd_write_op() */
zvfs_eventfd_rw_op(void * obj,void * buf,size_t sz,int (* op)(struct zvfs_eventfd * efd,zvfs_eventfd_t * value))300 static ssize_t zvfs_eventfd_rw_op(void *obj, void *buf, size_t sz,
301 int (*op)(struct zvfs_eventfd *efd, zvfs_eventfd_t *value))
302 {
303 int err;
304 ssize_t ret;
305 k_spinlock_key_t key;
306 struct zvfs_eventfd *efd = obj;
307 struct k_mutex *lock = NULL;
308 struct k_condvar *cond = NULL;
309
310 if (sz < sizeof(zvfs_eventfd_t)) {
311 errno = EINVAL;
312 return -1;
313 }
314
315 if (buf == NULL) {
316 errno = EFAULT;
317 return -1;
318 }
319
320 key = k_spin_lock(&efd->lock);
321
322 if (!zvfs_eventfd_is_blocking(efd)) {
323 /*
324 * Handle the non-blocking case entirely within this scope
325 */
326 ret = op(efd, buf);
327 if (ret < 0) {
328 errno = -ret;
329 ret = -1;
330 } else {
331 ret = sizeof(zvfs_eventfd_t);
332 }
333
334 goto unlock_spin;
335 }
336
337 /*
338 * Handle the blocking case below
339 */
340 __ASSERT_NO_MSG(zvfs_eventfd_is_blocking(efd));
341
342 if (k_is_in_isr()) {
343 /* not covered by the man page, but necessary in Zephyr */
344 errno = EWOULDBLOCK;
345 ret = -1;
346 goto unlock_spin;
347 }
348
349 err = (int)zvfs_get_obj_lock_and_cond(obj, &zvfs_eventfd_fd_vtable, &lock, &cond);
350 __ASSERT((bool)err, "zvfs_get_obj_lock_and_cond() failed");
351 __ASSERT_NO_MSG(lock != NULL);
352 __ASSERT_NO_MSG(cond != NULL);
353
354 /* do not hold a spinlock when taking a mutex */
355 k_spin_unlock(&efd->lock, key);
356 err = k_mutex_lock(lock, K_FOREVER);
357 __ASSERT(err == 0, "k_mutex_lock() failed: %d", err);
358
359 while (true) {
360 /* retake the spinlock */
361 key = k_spin_lock(&efd->lock);
362
363 ret = op(efd, buf);
364 switch (ret) {
365 case -EAGAIN:
366 /* not an error in blocking mode. break and try again */
367 break;
368 case 0:
369 /* success! */
370 ret = sizeof(zvfs_eventfd_t);
371 goto unlock_mutex;
372 default:
373 /* some other error */
374 __ASSERT_NO_MSG(ret < 0);
375 errno = -ret;
376 ret = -1;
377 goto unlock_mutex;
378 }
379
380 /* do not hold a spinlock when taking a mutex */
381 k_spin_unlock(&efd->lock, key);
382
383 /* wait for a write or close */
384 err = k_condvar_wait(cond, lock, K_FOREVER);
385 __ASSERT(err == 0, "k_condvar_wait() failed: %d", err);
386 }
387
388 unlock_mutex:
389 k_spin_unlock(&efd->lock, key);
390 /* only wake a single waiter */
391 err = k_condvar_signal(cond);
392 __ASSERT(err == 0, "k_condvar_signal() failed: %d", err);
393 err = k_mutex_unlock(lock);
394 __ASSERT(err == 0, "k_mutex_unlock() failed: %d", err);
395 goto out;
396
397 unlock_spin:
398 k_spin_unlock(&efd->lock, key);
399
400 out:
401 return ret;
402 }
403
404 /*
405 * Public-facing API
406 */
407
zvfs_eventfd(unsigned int initval,int flags)408 int zvfs_eventfd(unsigned int initval, int flags)
409 {
410 int fd = 1;
411 size_t offset;
412 struct zvfs_eventfd *efd = NULL;
413
414 if (flags & ~ZVFS_EFD_FLAGS_SET) {
415 errno = EINVAL;
416 return -1;
417 }
418
419 if (sys_bitarray_alloc(&efds_bitarray, 1, &offset) < 0) {
420 errno = ENOMEM;
421 return -1;
422 }
423
424 efd = &efds[offset];
425
426 fd = zvfs_reserve_fd();
427 if (fd < 0) {
428 sys_bitarray_free(&efds_bitarray, 1, offset);
429 return -1;
430 }
431
432 efd->flags = ZVFS_EFD_IN_USE | flags;
433 efd->cnt = initval;
434
435 k_poll_signal_init(&efd->write_sig);
436 k_poll_signal_init(&efd->read_sig);
437
438 if (initval != 0) {
439 k_poll_signal_raise(&efd->read_sig, 0);
440 }
441
442 k_poll_signal_raise(&efd->write_sig, 0);
443
444 zvfs_finalize_fd(fd, efd, &zvfs_eventfd_fd_vtable);
445
446 return fd;
447 }
448
zvfs_eventfd_read(int fd,zvfs_eventfd_t * value)449 int zvfs_eventfd_read(int fd, zvfs_eventfd_t *value)
450 {
451 int ret;
452 void *obj;
453
454 obj = zvfs_get_fd_obj(fd, &zvfs_eventfd_fd_vtable, EBADF);
455 if (obj == NULL) {
456 return -1;
457 }
458
459 ret = zvfs_eventfd_rw_op(obj, value, sizeof(zvfs_eventfd_t), zvfs_eventfd_read_locked);
460 __ASSERT_NO_MSG(ret == -1 || ret == sizeof(zvfs_eventfd_t));
461 if (ret < 0) {
462 return -1;
463 }
464
465 return 0;
466 }
467
zvfs_eventfd_write(int fd,zvfs_eventfd_t value)468 int zvfs_eventfd_write(int fd, zvfs_eventfd_t value)
469 {
470 int ret;
471 void *obj;
472
473 obj = zvfs_get_fd_obj(fd, &zvfs_eventfd_fd_vtable, EBADF);
474 if (obj == NULL) {
475 return -1;
476 }
477
478 ret = zvfs_eventfd_rw_op(obj, &value, sizeof(zvfs_eventfd_t), zvfs_eventfd_write_locked);
479 __ASSERT_NO_MSG(ret == -1 || ret == sizeof(zvfs_eventfd_t));
480 if (ret < 0) {
481 return -1;
482 }
483
484 return 0;
485 }
486