Lines Matching +full:poll +full:- +full:retry +full:- +full:count

1 // SPDX-License-Identifier: GPL-2.0
14 * through a control-dependency in io_get_cqring (smp_store_release to
25 * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
39 * Copyright (C) 2018-2019 Jens Axboe
40 * Copyright (c) 2018-2019 Christoph Hellwig
83 #include <linux/blk-cgroup.h>
92 #include "io-wq.h"
98 * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
102 #define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1)
131 * ring_entries - 1)
298 * For SQPOLL usage - we hold a reference to the parent task, so we
299 * have access to the ->files
317 * If used, fixed file set. Writers must ensure that ->refs is dead,
318 * readers must ensure that ->refs is alive as long as the file* is
371 * ->iopoll_list is protected by the ctx->uring_lock for
626 /* already went through poll handler */
639 struct io_poll_iocb poll; member
646 * access the file pointer through any of the sub-structs,
653 struct io_poll_iocb poll; member
670 /* use only after cleaning per-op data, see io_clean_op() */
692 * 1. used with ctx->iopoll_list with reads/writes
693 * 2. to track reqs with ->files (see io_op_def::file_table)
699 /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
743 /* needs req->file assigned */
749 /* unbound wq insertion if file is a non-regular file */
976 if (file->f_op == &io_uring_fops) { in io_uring_get_socket()
977 struct io_ring_ctx *ctx = file->private_data; in io_uring_get_socket()
979 return ctx->ring_sock->sk; in io_uring_get_socket()
988 if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | in io_clean_op()
995 struct mm_struct *mm = current->mm; in io_sq_thread_drop_mm()
1000 current->mm = NULL; in io_sq_thread_drop_mm()
1008 if (current->mm) in __io_sq_thread_acquire_mm()
1012 if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL))) in __io_sq_thread_acquire_mm()
1013 return -EFAULT; in __io_sq_thread_acquire_mm()
1015 task_lock(ctx->sqo_task); in __io_sq_thread_acquire_mm()
1016 mm = ctx->sqo_task->mm; in __io_sq_thread_acquire_mm()
1019 task_unlock(ctx->sqo_task); in __io_sq_thread_acquire_mm()
1026 return -EFAULT; in __io_sq_thread_acquire_mm()
1032 if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM)) in io_sq_thread_acquire_mm()
1043 if (*cur_css != ctx->sqo_blkcg_css) { in io_sq_thread_associate_blkcg()
1044 kthread_associate_blkcg(ctx->sqo_blkcg_css); in io_sq_thread_associate_blkcg()
1045 *cur_css = ctx->sqo_blkcg_css; in io_sq_thread_associate_blkcg()
1059 if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK) in req_set_fail_links()
1060 req->flags |= REQ_F_FAIL_LINK; in req_set_fail_links()
1066 * same, we're fine to grab references to them for actual out-of-line use.
1070 id->files = current->files; in io_init_identity()
1071 id->mm = current->mm; in io_init_identity()
1074 id->blkcg_css = blkcg_css(); in io_init_identity()
1077 id->creds = current_cred(); in io_init_identity()
1078 id->nsproxy = current->nsproxy; in io_init_identity()
1079 id->fs = current->fs; in io_init_identity()
1080 id->fsize = rlimit(RLIMIT_FSIZE); in io_init_identity()
1082 id->loginuid = current->loginuid; in io_init_identity()
1083 id->sessionid = current->sessionid; in io_init_identity()
1085 refcount_set(&id->count, 1); in io_init_identity()
1090 memset(&req->work, 0, sizeof(req->work)); in __io_req_init_async()
1091 req->flags |= REQ_F_WORK_INITIALIZED; in __io_req_init_async()
1100 struct io_uring_task *tctx = current->io_uring; in io_req_init_async()
1102 if (req->flags & REQ_F_WORK_INITIALIZED) in io_req_init_async()
1108 req->work.identity = tctx->identity; in io_req_init_async()
1109 if (tctx->identity != &tctx->__identity) in io_req_init_async()
1110 refcount_inc(&req->work.identity->count); in io_req_init_async()
1115 return ctx->flags & IORING_SETUP_SQPOLL; in io_async_submit()
1122 complete(&ctx->ref_comp); in io_ring_ctx_ref_free()
1127 return !req->timeout.off; in io_is_timeout_noseq()
1139 ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL); in io_ring_ctx_alloc()
1140 if (!ctx->fallback_req) in io_ring_ctx_alloc()
1147 hash_bits = ilog2(p->cq_entries); in io_ring_ctx_alloc()
1148 hash_bits -= 5; in io_ring_ctx_alloc()
1151 ctx->cancel_hash_bits = hash_bits; in io_ring_ctx_alloc()
1152 ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), in io_ring_ctx_alloc()
1154 if (!ctx->cancel_hash) in io_ring_ctx_alloc()
1156 __hash_init(ctx->cancel_hash, 1U << hash_bits); in io_ring_ctx_alloc()
1158 if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, in io_ring_ctx_alloc()
1162 ctx->flags = p->flags; in io_ring_ctx_alloc()
1163 init_waitqueue_head(&ctx->sqo_sq_wait); in io_ring_ctx_alloc()
1164 INIT_LIST_HEAD(&ctx->sqd_list); in io_ring_ctx_alloc()
1165 init_waitqueue_head(&ctx->cq_wait); in io_ring_ctx_alloc()
1166 INIT_LIST_HEAD(&ctx->cq_overflow_list); in io_ring_ctx_alloc()
1167 init_completion(&ctx->ref_comp); in io_ring_ctx_alloc()
1168 init_completion(&ctx->sq_thread_comp); in io_ring_ctx_alloc()
1169 idr_init(&ctx->io_buffer_idr); in io_ring_ctx_alloc()
1170 idr_init(&ctx->personality_idr); in io_ring_ctx_alloc()
1171 mutex_init(&ctx->uring_lock); in io_ring_ctx_alloc()
1172 init_waitqueue_head(&ctx->wait); in io_ring_ctx_alloc()
1173 spin_lock_init(&ctx->completion_lock); in io_ring_ctx_alloc()
1174 INIT_LIST_HEAD(&ctx->iopoll_list); in io_ring_ctx_alloc()
1175 INIT_LIST_HEAD(&ctx->defer_list); in io_ring_ctx_alloc()
1176 INIT_LIST_HEAD(&ctx->timeout_list); in io_ring_ctx_alloc()
1177 init_waitqueue_head(&ctx->inflight_wait); in io_ring_ctx_alloc()
1178 spin_lock_init(&ctx->inflight_lock); in io_ring_ctx_alloc()
1179 INIT_LIST_HEAD(&ctx->inflight_list); in io_ring_ctx_alloc()
1180 INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work); in io_ring_ctx_alloc()
1181 init_llist_head(&ctx->file_put_llist); in io_ring_ctx_alloc()
1184 if (ctx->fallback_req) in io_ring_ctx_alloc()
1185 kmem_cache_free(req_cachep, ctx->fallback_req); in io_ring_ctx_alloc()
1186 kfree(ctx->cancel_hash); in io_ring_ctx_alloc()
1193 if (unlikely(req->flags & REQ_F_IO_DRAIN)) { in req_need_defer()
1194 struct io_ring_ctx *ctx = req->ctx; in req_need_defer()
1196 return seq != ctx->cached_cq_tail in req_need_defer()
1197 + READ_ONCE(ctx->cached_cq_overflow); in req_need_defer()
1205 struct io_rings *rings = ctx->rings; in __io_commit_cqring()
1208 smp_store_release(&rings->cq.tail, ctx->cached_cq_tail); in __io_commit_cqring()
1210 if (wq_has_sleeper(&ctx->cq_wait)) { in __io_commit_cqring()
1211 wake_up_interruptible(&ctx->cq_wait); in __io_commit_cqring()
1212 kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); in __io_commit_cqring()
1218 if (req->work.identity == &tctx->__identity) in io_put_identity()
1220 if (refcount_dec_and_test(&req->work.identity->count)) in io_put_identity()
1221 kfree(req->work.identity); in io_put_identity()
1226 if (!(req->flags & REQ_F_WORK_INITIALIZED)) in io_req_clean_work()
1229 req->flags &= ~REQ_F_WORK_INITIALIZED; in io_req_clean_work()
1231 if (req->work.flags & IO_WQ_WORK_MM) { in io_req_clean_work()
1232 mmdrop(req->work.identity->mm); in io_req_clean_work()
1233 req->work.flags &= ~IO_WQ_WORK_MM; in io_req_clean_work()
1236 if (req->work.flags & IO_WQ_WORK_BLKCG) { in io_req_clean_work()
1237 css_put(req->work.identity->blkcg_css); in io_req_clean_work()
1238 req->work.flags &= ~IO_WQ_WORK_BLKCG; in io_req_clean_work()
1241 if (req->work.flags & IO_WQ_WORK_CREDS) { in io_req_clean_work()
1242 put_cred(req->work.identity->creds); in io_req_clean_work()
1243 req->work.flags &= ~IO_WQ_WORK_CREDS; in io_req_clean_work()
1245 if (req->work.flags & IO_WQ_WORK_FS) { in io_req_clean_work()
1246 struct fs_struct *fs = req->work.identity->fs; in io_req_clean_work()
1248 spin_lock(&req->work.identity->fs->lock); in io_req_clean_work()
1249 if (--fs->users) in io_req_clean_work()
1251 spin_unlock(&req->work.identity->fs->lock); in io_req_clean_work()
1254 req->work.flags &= ~IO_WQ_WORK_FS; in io_req_clean_work()
1257 io_put_identity(req->task->io_uring, req); in io_req_clean_work()
1266 struct io_uring_task *tctx = current->io_uring; in io_identity_cow()
1270 if (req->work.flags & IO_WQ_WORK_CREDS) in io_identity_cow()
1271 creds = req->work.identity->creds; in io_identity_cow()
1273 id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL); in io_identity_cow()
1275 req->work.flags |= IO_WQ_WORK_CANCEL; in io_identity_cow()
1280 * We can safely just re-init the creds we copied Either the field in io_identity_cow()
1282 * exception is ->creds, through registered personalities, so handle in io_identity_cow()
1287 id->creds = creds; in io_identity_cow()
1290 refcount_inc(&id->count); in io_identity_cow()
1293 if (tctx->identity != &tctx->__identity && in io_identity_cow()
1294 refcount_dec_and_test(&tctx->identity->count)) in io_identity_cow()
1295 kfree(tctx->identity); in io_identity_cow()
1296 if (req->work.identity != &tctx->__identity && in io_identity_cow()
1297 refcount_dec_and_test(&req->work.identity->count)) in io_identity_cow()
1298 kfree(req->work.identity); in io_identity_cow()
1300 req->work.identity = id; in io_identity_cow()
1301 tctx->identity = id; in io_identity_cow()
1307 const struct io_op_def *def = &io_op_defs[req->opcode]; in io_grab_identity()
1308 struct io_identity *id = req->work.identity; in io_grab_identity()
1309 struct io_ring_ctx *ctx = req->ctx; in io_grab_identity()
1311 if (def->work_flags & IO_WQ_WORK_FSIZE) { in io_grab_identity()
1312 if (id->fsize != rlimit(RLIMIT_FSIZE)) in io_grab_identity()
1314 req->work.flags |= IO_WQ_WORK_FSIZE; in io_grab_identity()
1317 if (!(req->work.flags & IO_WQ_WORK_BLKCG) && in io_grab_identity()
1318 (def->work_flags & IO_WQ_WORK_BLKCG)) { in io_grab_identity()
1320 if (id->blkcg_css != blkcg_css()) { in io_grab_identity()
1328 if (css_tryget_online(id->blkcg_css)) in io_grab_identity()
1329 req->work.flags |= IO_WQ_WORK_BLKCG; in io_grab_identity()
1333 if (!(req->work.flags & IO_WQ_WORK_CREDS)) { in io_grab_identity()
1334 if (id->creds != current_cred()) in io_grab_identity()
1336 get_cred(id->creds); in io_grab_identity()
1337 req->work.flags |= IO_WQ_WORK_CREDS; in io_grab_identity()
1340 if (!uid_eq(current->loginuid, id->loginuid) || in io_grab_identity()
1341 current->sessionid != id->sessionid) in io_grab_identity()
1344 if (!(req->work.flags & IO_WQ_WORK_FS) && in io_grab_identity()
1345 (def->work_flags & IO_WQ_WORK_FS)) { in io_grab_identity()
1346 if (current->fs != id->fs) in io_grab_identity()
1348 spin_lock(&id->fs->lock); in io_grab_identity()
1349 if (!id->fs->in_exec) { in io_grab_identity()
1350 id->fs->users++; in io_grab_identity()
1351 req->work.flags |= IO_WQ_WORK_FS; in io_grab_identity()
1353 req->work.flags |= IO_WQ_WORK_CANCEL; in io_grab_identity()
1355 spin_unlock(&current->fs->lock); in io_grab_identity()
1357 if (!(req->work.flags & IO_WQ_WORK_FILES) && in io_grab_identity()
1358 (def->work_flags & IO_WQ_WORK_FILES) && in io_grab_identity()
1359 !(req->flags & REQ_F_NO_FILE_TABLE)) { in io_grab_identity()
1360 if (id->files != current->files || in io_grab_identity()
1361 id->nsproxy != current->nsproxy) in io_grab_identity()
1363 atomic_inc(&id->files->count); in io_grab_identity()
1364 get_nsproxy(id->nsproxy); in io_grab_identity()
1365 req->flags |= REQ_F_INFLIGHT; in io_grab_identity()
1367 spin_lock_irq(&ctx->inflight_lock); in io_grab_identity()
1368 list_add(&req->inflight_entry, &ctx->inflight_list); in io_grab_identity()
1369 spin_unlock_irq(&ctx->inflight_lock); in io_grab_identity()
1370 req->work.flags |= IO_WQ_WORK_FILES; in io_grab_identity()
1378 const struct io_op_def *def = &io_op_defs[req->opcode]; in io_prep_async_work()
1379 struct io_ring_ctx *ctx = req->ctx; in io_prep_async_work()
1383 id = req->work.identity; in io_prep_async_work()
1385 if (req->flags & REQ_F_FORCE_ASYNC) in io_prep_async_work()
1386 req->work.flags |= IO_WQ_WORK_CONCURRENT; in io_prep_async_work()
1388 if (req->flags & REQ_F_ISREG) { in io_prep_async_work()
1389 if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) in io_prep_async_work()
1390 io_wq_hash_work(&req->work, file_inode(req->file)); in io_prep_async_work()
1392 if (def->unbound_nonreg_file) in io_prep_async_work()
1393 req->work.flags |= IO_WQ_WORK_UNBOUND; in io_prep_async_work()
1396 /* ->mm can never change on us */ in io_prep_async_work()
1397 if (!(req->work.flags & IO_WQ_WORK_MM) && in io_prep_async_work()
1398 (def->work_flags & IO_WQ_WORK_MM)) { in io_prep_async_work()
1399 mmgrab(id->mm); in io_prep_async_work()
1400 req->work.flags |= IO_WQ_WORK_MM; in io_prep_async_work()
1403 /* if we fail grabbing identity, we must COW, regrab, and retry */ in io_prep_async_work()
1420 if (req->flags & REQ_F_LINK_HEAD) in io_prep_async_link()
1421 list_for_each_entry(cur, &req->link_list, link_list) in io_prep_async_link()
1427 struct io_ring_ctx *ctx = req->ctx; in __io_queue_async_work()
1430 trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, in __io_queue_async_work()
1431 &req->work, req->flags); in __io_queue_async_work()
1432 io_wq_enqueue(ctx->io_wq, &req->work); in __io_queue_async_work()
1440 /* init ->work of the whole link before punting */ in io_queue_async_work()
1450 struct io_timeout_data *io = req->async_data; in io_kill_timeout()
1453 ret = hrtimer_try_to_cancel(&io->timer); in io_kill_timeout()
1454 if (ret != -1) { in io_kill_timeout()
1455 atomic_set(&req->ctx->cq_timeouts, in io_kill_timeout()
1456 atomic_read(&req->ctx->cq_timeouts) + 1); in io_kill_timeout()
1457 list_del_init(&req->timeout.list); in io_kill_timeout()
1465 struct io_ring_ctx *ctx = req->ctx; in io_task_match()
1467 if (!tsk || req->task == tsk) in io_task_match()
1469 if (ctx->flags & IORING_SETUP_SQPOLL) { in io_task_match()
1470 if (ctx->sq_data && req->task == ctx->sq_data->thread) in io_task_match()
1484 spin_lock_irq(&ctx->completion_lock); in io_kill_timeouts()
1485 list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { in io_kill_timeouts()
1491 spin_unlock_irq(&ctx->completion_lock); in io_kill_timeouts()
1498 struct io_defer_entry *de = list_first_entry(&ctx->defer_list, in __io_queue_deferred()
1502 if (req_need_defer(de->req, de->seq)) in __io_queue_deferred()
1504 list_del_init(&de->list); in __io_queue_deferred()
1505 /* punt-init is done before queueing for defer */ in __io_queue_deferred()
1506 link = __io_queue_async_work(de->req); in __io_queue_deferred()
1513 } while (!list_empty(&ctx->defer_list)); in __io_queue_deferred()
1518 while (!list_empty(&ctx->timeout_list)) { in io_flush_timeouts()
1519 struct io_kiocb *req = list_first_entry(&ctx->timeout_list, in io_flush_timeouts()
1524 if (req->timeout.target_seq != ctx->cached_cq_tail in io_flush_timeouts()
1525 - atomic_read(&ctx->cq_timeouts)) in io_flush_timeouts()
1528 list_del_init(&req->timeout.list); in io_flush_timeouts()
1538 if (unlikely(!list_empty(&ctx->defer_list))) in io_commit_cqring()
1544 struct io_rings *r = ctx->rings; in io_sqring_full()
1546 return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries; in io_sqring_full()
1551 struct io_rings *rings = ctx->rings; in io_get_cqring()
1554 tail = ctx->cached_cq_tail; in io_get_cqring()
1560 if (tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries) in io_get_cqring()
1563 ctx->cached_cq_tail++; in io_get_cqring()
1564 return &rings->cqes[tail & ctx->cq_mask]; in io_get_cqring()
1569 if (!ctx->cq_ev_fd) in io_should_trigger_evfd()
1571 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) in io_should_trigger_evfd()
1573 if (!ctx->eventfd_async) in io_should_trigger_evfd()
1580 if (waitqueue_active(&ctx->wait)) in io_cqring_ev_posted()
1581 wake_up(&ctx->wait); in io_cqring_ev_posted()
1582 if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) in io_cqring_ev_posted()
1583 wake_up(&ctx->sq_data->wait); in io_cqring_ev_posted()
1585 eventfd_signal(ctx->cq_ev_fd, 1); in io_cqring_ev_posted()
1590 if (list_empty(&ctx->cq_overflow_list)) { in io_cqring_mark_overflow()
1591 clear_bit(0, &ctx->sq_check_overflow); in io_cqring_mark_overflow()
1592 clear_bit(0, &ctx->cq_check_overflow); in io_cqring_mark_overflow()
1593 ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; in io_cqring_mark_overflow()
1600 return ((req->flags & REQ_F_WORK_INITIALIZED) && in __io_match_files()
1601 (req->work.flags & IO_WQ_WORK_FILES)) && in __io_match_files()
1602 req->work.identity->files == files; in __io_match_files()
1614 if (req->flags & REQ_F_LINK_HEAD) { in io_match_files()
1615 list_for_each_entry(link, &req->link_list, link_list) { in io_match_files()
1628 struct io_rings *rings = ctx->rings; in io_cqring_overflow_flush()
1635 if (list_empty_careful(&ctx->cq_overflow_list)) in io_cqring_overflow_flush()
1637 if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == in io_cqring_overflow_flush()
1638 rings->cq_ring_entries)) in io_cqring_overflow_flush()
1642 spin_lock_irqsave(&ctx->completion_lock, flags); in io_cqring_overflow_flush()
1646 ctx->cq_overflow_flushed = 1; in io_cqring_overflow_flush()
1649 list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { in io_cqring_overflow_flush()
1650 if (tsk && req->task != tsk) in io_cqring_overflow_flush()
1659 list_move(&req->compl.list, &list); in io_cqring_overflow_flush()
1661 WRITE_ONCE(cqe->user_data, req->user_data); in io_cqring_overflow_flush()
1662 WRITE_ONCE(cqe->res, req->result); in io_cqring_overflow_flush()
1663 WRITE_ONCE(cqe->flags, req->compl.cflags); in io_cqring_overflow_flush()
1665 ctx->cached_cq_overflow++; in io_cqring_overflow_flush()
1666 WRITE_ONCE(ctx->rings->cq_overflow, in io_cqring_overflow_flush()
1667 ctx->cached_cq_overflow); in io_cqring_overflow_flush()
1674 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_cqring_overflow_flush()
1679 list_del(&req->compl.list); in io_cqring_overflow_flush()
1688 struct io_ring_ctx *ctx = req->ctx; in __io_cqring_fill_event()
1691 trace_io_uring_complete(ctx, req->user_data, res); in __io_cqring_fill_event()
1695 * submission (by quite a lot). Increment the overflow count in in __io_cqring_fill_event()
1700 WRITE_ONCE(cqe->user_data, req->user_data); in __io_cqring_fill_event()
1701 WRITE_ONCE(cqe->res, res); in __io_cqring_fill_event()
1702 WRITE_ONCE(cqe->flags, cflags); in __io_cqring_fill_event()
1703 } else if (ctx->cq_overflow_flushed || in __io_cqring_fill_event()
1704 atomic_read(&req->task->io_uring->in_idle)) { in __io_cqring_fill_event()
1710 ctx->cached_cq_overflow++; in __io_cqring_fill_event()
1711 WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); in __io_cqring_fill_event()
1713 if (list_empty(&ctx->cq_overflow_list)) { in __io_cqring_fill_event()
1714 set_bit(0, &ctx->sq_check_overflow); in __io_cqring_fill_event()
1715 set_bit(0, &ctx->cq_check_overflow); in __io_cqring_fill_event()
1716 ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; in __io_cqring_fill_event()
1719 req->result = res; in __io_cqring_fill_event()
1720 req->compl.cflags = cflags; in __io_cqring_fill_event()
1721 refcount_inc(&req->refs); in __io_cqring_fill_event()
1722 list_add_tail(&req->compl.list, &ctx->cq_overflow_list); in __io_cqring_fill_event()
1733 struct io_ring_ctx *ctx = req->ctx; in io_cqring_add_event()
1736 spin_lock_irqsave(&ctx->completion_lock, flags); in io_cqring_add_event()
1739 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_cqring_add_event()
1746 struct io_ring_ctx *ctx = cs->ctx; in io_submit_flush_completions()
1748 spin_lock_irq(&ctx->completion_lock); in io_submit_flush_completions()
1749 while (!list_empty(&cs->list)) { in io_submit_flush_completions()
1752 req = list_first_entry(&cs->list, struct io_kiocb, compl.list); in io_submit_flush_completions()
1753 list_del(&req->compl.list); in io_submit_flush_completions()
1754 __io_cqring_fill_event(req, req->result, req->compl.cflags); in io_submit_flush_completions()
1759 * because of a potential deadlock with req->work.fs->lock in io_submit_flush_completions()
1761 if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT in io_submit_flush_completions()
1763 spin_unlock_irq(&ctx->completion_lock); in io_submit_flush_completions()
1765 spin_lock_irq(&ctx->completion_lock); in io_submit_flush_completions()
1771 spin_unlock_irq(&ctx->completion_lock); in io_submit_flush_completions()
1774 cs->nr = 0; in io_submit_flush_completions()
1785 req->result = res; in __io_req_complete()
1786 req->compl.cflags = cflags; in __io_req_complete()
1787 list_add_tail(&req->compl.list, &cs->list); in __io_req_complete()
1788 if (++cs->nr >= 32) in __io_req_complete()
1801 ((unsigned long) req->ctx->fallback_req & ~1UL); in io_is_fallback_req()
1808 req = ctx->fallback_req; in io_get_fallback_req()
1809 if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req)) in io_get_fallback_req()
1818 if (!state->free_reqs) { in io_alloc_req()
1823 sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs)); in io_alloc_req()
1824 ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs); in io_alloc_req()
1827 * Bulk alloc is all-or-nothing. If we fail to get a batch, in io_alloc_req()
1828 * retry single alloc to be on the safe side. in io_alloc_req()
1831 state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); in io_alloc_req()
1832 if (!state->reqs[0]) in io_alloc_req()
1836 state->free_reqs = ret; in io_alloc_req()
1839 state->free_reqs--; in io_alloc_req()
1840 return state->reqs[state->free_reqs]; in io_alloc_req()
1849 percpu_ref_put(req->fixed_file_refs); in io_put_file()
1858 if (req->async_data) in io_dismantle_req()
1859 kfree(req->async_data); in io_dismantle_req()
1860 if (req->file) in io_dismantle_req()
1861 io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); in io_dismantle_req()
1868 struct io_uring_task *tctx = req->task->io_uring; in __io_free_req()
1869 struct io_ring_ctx *ctx = req->ctx; in __io_free_req()
1873 percpu_counter_dec(&tctx->inflight); in __io_free_req()
1874 if (atomic_read(&tctx->in_idle)) in __io_free_req()
1875 wake_up(&tctx->wait); in __io_free_req()
1876 put_task_struct(req->task); in __io_free_req()
1881 clear_bit_unlock(0, (unsigned long *) &ctx->fallback_req); in __io_free_req()
1882 percpu_ref_put(&ctx->refs); in __io_free_req()
1887 struct io_ring_ctx *ctx = req->ctx; in io_kill_linked_timeout()
1892 spin_lock_irqsave(&ctx->completion_lock, flags); in io_kill_linked_timeout()
1893 link = list_first_entry_or_null(&req->link_list, struct io_kiocb, in io_kill_linked_timeout()
1897 * req -> link t-out -> link t-out [-> ...] in io_kill_linked_timeout()
1899 if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) { in io_kill_linked_timeout()
1900 struct io_timeout_data *io = link->async_data; in io_kill_linked_timeout()
1903 list_del_init(&link->link_list); in io_kill_linked_timeout()
1904 ret = hrtimer_try_to_cancel(&io->timer); in io_kill_linked_timeout()
1905 if (ret != -1) { in io_kill_linked_timeout()
1906 io_cqring_fill_event(link, -ECANCELED); in io_kill_linked_timeout()
1911 req->flags &= ~REQ_F_LINK_TIMEOUT; in io_kill_linked_timeout()
1912 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_kill_linked_timeout()
1929 if (unlikely(list_empty(&req->link_list))) in io_req_link_next()
1932 nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list); in io_req_link_next()
1933 list_del_init(&req->link_list); in io_req_link_next()
1934 if (!list_empty(&nxt->link_list)) in io_req_link_next()
1935 nxt->flags |= REQ_F_LINK_HEAD; in io_req_link_next()
1944 struct io_ring_ctx *ctx = req->ctx; in io_fail_links()
1947 spin_lock_irqsave(&ctx->completion_lock, flags); in io_fail_links()
1948 while (!list_empty(&req->link_list)) { in io_fail_links()
1949 struct io_kiocb *link = list_first_entry(&req->link_list, in io_fail_links()
1952 list_del_init(&link->link_list); in io_fail_links()
1955 io_cqring_fill_event(link, -ECANCELED); in io_fail_links()
1960 * work.fs->lock. in io_fail_links()
1962 if (link->flags & REQ_F_WORK_INITIALIZED) in io_fail_links()
1969 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_fail_links()
1976 req->flags &= ~REQ_F_LINK_HEAD; in __io_req_find_next()
1977 if (req->flags & REQ_F_LINK_TIMEOUT) in __io_req_find_next()
1986 if (likely(!(req->flags & REQ_F_FAIL_LINK))) in __io_req_find_next()
1994 if (likely(!(req->flags & REQ_F_LINK_HEAD))) in io_req_find_next()
2001 struct task_struct *tsk = req->task; in io_req_task_work_add()
2002 struct io_ring_ctx *ctx = req->ctx; in io_req_task_work_add()
2006 if (tsk->flags & PF_EXITING) in io_req_task_work_add()
2007 return -ESRCH; in io_req_task_work_add()
2016 if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok) in io_req_task_work_add()
2019 ret = task_work_add(tsk, &req->task_work, notify); in io_req_task_work_add()
2028 struct io_ring_ctx *ctx = req->ctx; in __io_req_task_cancel()
2030 spin_lock_irq(&ctx->completion_lock); in __io_req_task_cancel()
2033 spin_unlock_irq(&ctx->completion_lock); in __io_req_task_cancel()
2043 struct io_ring_ctx *ctx = req->ctx; in io_req_task_cancel()
2045 __io_req_task_cancel(req, -ECANCELED); in io_req_task_cancel()
2046 percpu_ref_put(&ctx->refs); in io_req_task_cancel()
2051 struct io_ring_ctx *ctx = req->ctx; in __io_req_task_submit()
2054 mutex_lock(&ctx->uring_lock); in __io_req_task_submit()
2056 mutex_unlock(&ctx->uring_lock); in __io_req_task_submit()
2058 __io_req_task_cancel(req, -EFAULT); in __io_req_task_submit()
2065 struct io_ring_ctx *ctx = req->ctx; in io_req_task_submit()
2068 percpu_ref_put(&ctx->refs); in io_req_task_submit()
2075 init_task_work(&req->task_work, io_req_task_submit); in io_req_task_queue()
2076 percpu_ref_get(&req->ctx->refs); in io_req_task_queue()
2082 init_task_work(&req->task_work, io_req_task_cancel); in io_req_task_queue()
2083 tsk = io_wq_get_task(req->ctx->io_wq); in io_req_task_queue()
2084 task_work_add(tsk, &req->task_work, TWA_NONE); in io_req_task_queue()
2113 rb->to_free = 0; in io_init_req_batch()
2114 rb->task_refs = 0; in io_init_req_batch()
2115 rb->task = NULL; in io_init_req_batch()
2121 kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs); in __io_req_free_batch_flush()
2122 percpu_ref_put_many(&ctx->refs, rb->to_free); in __io_req_free_batch_flush()
2123 rb->to_free = 0; in __io_req_free_batch_flush()
2129 if (rb->to_free) in io_req_free_batch_finish()
2131 if (rb->task) { in io_req_free_batch_finish()
2132 struct io_uring_task *tctx = rb->task->io_uring; in io_req_free_batch_finish()
2134 percpu_counter_sub(&tctx->inflight, rb->task_refs); in io_req_free_batch_finish()
2135 put_task_struct_many(rb->task, rb->task_refs); in io_req_free_batch_finish()
2136 rb->task = NULL; in io_req_free_batch_finish()
2146 if (req->flags & REQ_F_LINK_HEAD) in io_req_free_batch()
2149 if (req->task != rb->task) { in io_req_free_batch()
2150 if (rb->task) { in io_req_free_batch()
2151 struct io_uring_task *tctx = rb->task->io_uring; in io_req_free_batch()
2153 percpu_counter_sub(&tctx->inflight, rb->task_refs); in io_req_free_batch()
2154 put_task_struct_many(rb->task, rb->task_refs); in io_req_free_batch()
2156 rb->task = req->task; in io_req_free_batch()
2157 rb->task_refs = 0; in io_req_free_batch()
2159 rb->task_refs++; in io_req_free_batch()
2162 rb->reqs[rb->to_free++] = req; in io_req_free_batch()
2163 if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs))) in io_req_free_batch()
2164 __io_req_free_batch_flush(req->ctx, rb); in io_req_free_batch()
2175 if (refcount_dec_and_test(&req->refs)) { in io_put_req_find_next()
2184 if (refcount_dec_and_test(&req->refs)) in io_put_req()
2199 init_task_work(&req->task_work, io_put_req_deferred_cb); in io_free_req_deferred()
2204 tsk = io_wq_get_task(req->ctx->io_wq); in io_free_req_deferred()
2205 task_work_add(tsk, &req->task_work, TWA_NONE); in io_free_req_deferred()
2212 if (refcount_sub_and_test(refs, &req->refs)) in io_put_req_deferred()
2221 * A ref is owned by io-wq in which context we're. So, if that's the in io_steal_work()
2223 * it just will be re-punted async in io_put_work() in io_steal_work()
2225 if (refcount_read(&req->refs) != 1) in io_steal_work()
2229 return nxt ? &nxt->work : NULL; in io_steal_work()
2235 if (refcount_sub_and_test(2, &req->refs)) in io_double_put_req()
2241 struct io_rings *rings = ctx->rings; in io_cqring_events()
2243 if (test_bit(0, &ctx->cq_check_overflow)) { in io_cqring_events()
2249 if (noflush && !list_empty(&ctx->cq_overflow_list)) in io_cqring_events()
2250 return -1U; in io_cqring_events()
2257 return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); in io_cqring_events()
2262 struct io_rings *rings = ctx->rings; in io_sqring_entries()
2265 return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; in io_sqring_entries()
2272 cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT; in io_put_kbuf()
2274 req->flags &= ~REQ_F_BUFFER_SELECTED; in io_put_kbuf()
2283 kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; in io_put_rw_kbuf()
2293 if (unlikely(current->flags & PF_EXITING)) in io_run_task_work()
2295 if (current->task_works) { in io_run_task_work()
2310 list_del(&req->inflight_entry); in io_iopoll_queue()
2311 __io_complete_rw(req, -EAGAIN, 0, NULL); in io_iopoll_queue()
2316 * Find and free completed poll iocbs
2325 /* order with ->result store in io_complete_rw_iopoll() */ in io_iopoll_complete()
2333 if (READ_ONCE(req->result) == -EAGAIN) { in io_iopoll_complete()
2334 req->result = 0; in io_iopoll_complete()
2335 req->iopoll_completed = 0; in io_iopoll_complete()
2336 list_move_tail(&req->inflight_entry, &again); in io_iopoll_complete()
2339 list_del(&req->inflight_entry); in io_iopoll_complete()
2341 if (req->flags & REQ_F_BUFFER_SELECTED) in io_iopoll_complete()
2344 __io_cqring_fill_event(req, req->result, cflags); in io_iopoll_complete()
2347 if (refcount_dec_and_test(&req->refs)) in io_iopoll_complete()
2352 if (ctx->flags & IORING_SETUP_SQPOLL) in io_iopoll_complete()
2372 spin = !ctx->poll_multi_file && *nr_events < min; in io_do_iopoll()
2375 list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { in io_do_iopoll()
2376 struct kiocb *kiocb = &req->rw.kiocb; in io_do_iopoll()
2383 if (READ_ONCE(req->iopoll_completed)) { in io_do_iopoll()
2384 list_move_tail(&req->inflight_entry, &done); in io_do_iopoll()
2390 ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); in io_do_iopoll()
2395 if (READ_ONCE(req->iopoll_completed)) in io_do_iopoll()
2396 list_move_tail(&req->inflight_entry, &done); in io_do_iopoll()
2410 * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
2411 * non-spinning poll check - we'll still enter the driver poll loop, but only
2412 * as a non-spinning completion check.
2417 while (!list_empty(&ctx->iopoll_list) && !need_resched()) { in io_iopoll_getevents()
2436 if (!(ctx->flags & IORING_SETUP_IOPOLL)) in io_iopoll_try_reap_events()
2439 mutex_lock(&ctx->uring_lock); in io_iopoll_try_reap_events()
2440 while (!list_empty(&ctx->iopoll_list)) { in io_iopoll_try_reap_events()
2449 * Ensure we allow local-to-the-cpu processing to take place, in io_iopoll_try_reap_events()
2454 mutex_unlock(&ctx->uring_lock); in io_iopoll_try_reap_events()
2456 mutex_lock(&ctx->uring_lock); in io_iopoll_try_reap_events()
2459 mutex_unlock(&ctx->uring_lock); in io_iopoll_try_reap_events()
2472 mutex_lock(&ctx->uring_lock); in io_iopoll_check()
2475 * Don't enter poll loop if we already have events pending. in io_iopoll_check()
2486 * of the poll right here, so we need to take a breather every in io_iopoll_check()
2488 * the poll to the issued list. Otherwise we can spin here in io_iopoll_check()
2493 mutex_unlock(&ctx->uring_lock); in io_iopoll_check()
2495 mutex_lock(&ctx->uring_lock); in io_iopoll_check()
2504 mutex_unlock(&ctx->uring_lock); in io_iopoll_check()
2514 if (req->flags & REQ_F_ISREG) { in kiocb_end_write()
2515 struct inode *inode = file_inode(req->file); in kiocb_end_write()
2517 __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); in kiocb_end_write()
2519 file_end_write(req->file); in kiocb_end_write()
2528 if (kiocb->ki_flags & IOCB_WRITE) in io_complete_rw_common()
2531 if (res != req->result) in io_complete_rw_common()
2533 if (req->flags & REQ_F_BUFFER_SELECTED) in io_complete_rw_common()
2542 ssize_t ret = -ECANCELED; in io_resubmit_prep()
2551 switch (req->opcode) { in io_resubmit_prep()
2564 req->opcode); in io_resubmit_prep()
2568 if (!req->async_data) { in io_resubmit_prep()
2588 umode_t mode = file_inode(req->file)->i_mode; in io_rw_reissue()
2593 if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) in io_rw_reissue()
2596 ret = io_sq_thread_acquire_mm(req->ctx, req); in io_rw_reissue()
2599 refcount_inc(&req->refs); in io_rw_reissue()
2612 io_complete_rw_common(&req->rw.kiocb, res, cs); in __io_complete_rw()
2626 if (kiocb->ki_flags & IOCB_WRITE) in io_complete_rw_iopoll()
2629 if (res != -EAGAIN && res != req->result) in io_complete_rw_iopoll()
2632 WRITE_ONCE(req->result, res); in io_complete_rw_iopoll()
2633 /* order with io_poll_complete() checking ->result */ in io_complete_rw_iopoll()
2635 WRITE_ONCE(req->iopoll_completed, 1); in io_complete_rw_iopoll()
2639 * After the iocb has been issued, it's safe to be found on the poll list.
2646 struct io_ring_ctx *ctx = req->ctx; in io_iopoll_req_issued()
2653 if (list_empty(&ctx->iopoll_list)) { in io_iopoll_req_issued()
2654 ctx->poll_multi_file = false; in io_iopoll_req_issued()
2655 } else if (!ctx->poll_multi_file) { in io_iopoll_req_issued()
2658 list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, in io_iopoll_req_issued()
2660 if (list_req->file != req->file) in io_iopoll_req_issued()
2661 ctx->poll_multi_file = true; in io_iopoll_req_issued()
2668 if (READ_ONCE(req->iopoll_completed)) in io_iopoll_req_issued()
2669 list_add(&req->inflight_entry, &ctx->iopoll_list); in io_iopoll_req_issued()
2671 list_add_tail(&req->inflight_entry, &ctx->iopoll_list); in io_iopoll_req_issued()
2673 if ((ctx->flags & IORING_SETUP_SQPOLL) && in io_iopoll_req_issued()
2674 wq_has_sleeper(&ctx->sq_data->wait)) in io_iopoll_req_issued()
2675 wake_up(&ctx->sq_data->wait); in io_iopoll_req_issued()
2680 if (state->has_refs) in __io_state_file_put()
2681 fput_many(state->file, state->has_refs); in __io_state_file_put()
2682 state->file = NULL; in __io_state_file_put()
2687 if (state->file) in io_state_file_put()
2701 if (state->file) { in __io_file_get()
2702 if (state->fd == fd) { in __io_file_get()
2703 state->has_refs--; in __io_file_get()
2704 return state->file; in __io_file_get()
2708 state->file = fget_many(fd, state->ios_left); in __io_file_get()
2709 if (!state->file) in __io_file_get()
2712 state->fd = fd; in __io_file_get()
2713 state->has_refs = state->ios_left - 1; in __io_file_get()
2714 return state->file; in __io_file_get()
2733 umode_t mode = file_inode(file)->i_mode; in io_file_supports_async()
2736 if (io_bdev_nowait(file->f_inode->i_bdev)) in io_file_supports_async()
2743 if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) && in io_file_supports_async()
2744 file->f_op != &io_uring_fops) in io_file_supports_async()
2749 /* any ->read/write should understand O_NONBLOCK */ in io_file_supports_async()
2750 if (file->f_flags & O_NONBLOCK) in io_file_supports_async()
2753 if (!(file->f_mode & FMODE_NOWAIT)) in io_file_supports_async()
2757 return file->f_op->read_iter != NULL; in io_file_supports_async()
2759 return file->f_op->write_iter != NULL; in io_file_supports_async()
2764 struct io_ring_ctx *ctx = req->ctx; in io_prep_rw()
2765 struct kiocb *kiocb = &req->rw.kiocb; in io_prep_rw()
2769 if (S_ISREG(file_inode(req->file)->i_mode)) in io_prep_rw()
2770 req->flags |= REQ_F_ISREG; in io_prep_rw()
2772 kiocb->ki_pos = READ_ONCE(sqe->off); in io_prep_rw()
2773 if (kiocb->ki_pos == -1 && !(req->file->f_mode & FMODE_STREAM)) { in io_prep_rw()
2774 req->flags |= REQ_F_CUR_POS; in io_prep_rw()
2775 kiocb->ki_pos = req->file->f_pos; in io_prep_rw()
2777 kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); in io_prep_rw()
2778 kiocb->ki_flags = iocb_flags(kiocb->ki_filp); in io_prep_rw()
2779 ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); in io_prep_rw()
2783 ioprio = READ_ONCE(sqe->ioprio); in io_prep_rw()
2789 kiocb->ki_ioprio = ioprio; in io_prep_rw()
2791 kiocb->ki_ioprio = get_current_ioprio(); in io_prep_rw()
2794 if (kiocb->ki_flags & IOCB_NOWAIT) in io_prep_rw()
2795 req->flags |= REQ_F_NOWAIT; in io_prep_rw()
2797 if (ctx->flags & IORING_SETUP_IOPOLL) { in io_prep_rw()
2798 if (!(kiocb->ki_flags & IOCB_DIRECT) || in io_prep_rw()
2799 !kiocb->ki_filp->f_op->iopoll) in io_prep_rw()
2800 return -EOPNOTSUPP; in io_prep_rw()
2802 kiocb->ki_flags |= IOCB_HIPRI; in io_prep_rw()
2803 kiocb->ki_complete = io_complete_rw_iopoll; in io_prep_rw()
2804 req->iopoll_completed = 0; in io_prep_rw()
2806 if (kiocb->ki_flags & IOCB_HIPRI) in io_prep_rw()
2807 return -EINVAL; in io_prep_rw()
2808 kiocb->ki_complete = io_complete_rw; in io_prep_rw()
2811 req->rw.addr = READ_ONCE(sqe->addr); in io_prep_rw()
2812 req->rw.len = READ_ONCE(sqe->len); in io_prep_rw()
2813 req->buf_index = READ_ONCE(sqe->buf_index); in io_prep_rw()
2820 case -EIOCBQUEUED: in io_rw_done()
2822 case -ERESTARTSYS: in io_rw_done()
2823 case -ERESTARTNOINTR: in io_rw_done()
2824 case -ERESTARTNOHAND: in io_rw_done()
2825 case -ERESTART_RESTARTBLOCK: in io_rw_done()
2831 ret = -EINTR; in io_rw_done()
2834 kiocb->ki_complete(kiocb, ret, 0); in io_rw_done()
2842 struct io_async_rw *io = req->async_data; in kiocb_done()
2845 if (io && io->bytes_done > 0) { in kiocb_done()
2847 ret = io->bytes_done; in kiocb_done()
2849 ret += io->bytes_done; in kiocb_done()
2852 if (req->flags & REQ_F_CUR_POS) in kiocb_done()
2853 req->file->f_pos = kiocb->ki_pos; in kiocb_done()
2854 if (ret >= 0 && kiocb->ki_complete == io_complete_rw) in kiocb_done()
2863 struct io_ring_ctx *ctx = req->ctx; in io_import_fixed()
2864 size_t len = req->rw.len; in io_import_fixed()
2866 u16 index, buf_index = req->buf_index; in io_import_fixed()
2870 if (unlikely(buf_index >= ctx->nr_user_bufs)) in io_import_fixed()
2871 return -EFAULT; in io_import_fixed()
2872 index = array_index_nospec(buf_index, ctx->nr_user_bufs); in io_import_fixed()
2873 imu = &ctx->user_bufs[index]; in io_import_fixed()
2874 buf_addr = req->rw.addr; in io_import_fixed()
2878 return -EFAULT; in io_import_fixed()
2880 if (buf_addr < imu->ubuf || buf_addr + len > imu->ubuf + imu->len) in io_import_fixed()
2881 return -EFAULT; in io_import_fixed()
2887 offset = buf_addr - imu->ubuf; in io_import_fixed()
2888 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); in io_import_fixed()
2893 * using the latter parts of a big fixed buffer - it iterates in io_import_fixed()
2907 const struct bio_vec *bvec = imu->bvec; in io_import_fixed()
2909 if (offset <= bvec->bv_len) { in io_import_fixed()
2915 offset -= bvec->bv_len; in io_import_fixed()
2918 iter->bvec = bvec + seg_skip; in io_import_fixed()
2919 iter->nr_segs -= seg_skip; in io_import_fixed()
2920 iter->count -= bvec->bv_len + offset; in io_import_fixed()
2921 iter->iov_offset = offset & ~PAGE_MASK; in io_import_fixed()
2931 mutex_unlock(&ctx->uring_lock); in io_ring_submit_unlock()
2943 mutex_lock(&ctx->uring_lock); in io_ring_submit_lock()
2952 if (req->flags & REQ_F_BUFFER_SELECTED) in io_buffer_select()
2955 io_ring_submit_lock(req->ctx, needs_lock); in io_buffer_select()
2957 lockdep_assert_held(&req->ctx->uring_lock); in io_buffer_select()
2959 head = idr_find(&req->ctx->io_buffer_idr, bgid); in io_buffer_select()
2961 if (!list_empty(&head->list)) { in io_buffer_select()
2962 kbuf = list_last_entry(&head->list, struct io_buffer, in io_buffer_select()
2964 list_del(&kbuf->list); in io_buffer_select()
2967 idr_remove(&req->ctx->io_buffer_idr, bgid); in io_buffer_select()
2969 if (*len > kbuf->len) in io_buffer_select()
2970 *len = kbuf->len; in io_buffer_select()
2972 kbuf = ERR_PTR(-ENOBUFS); in io_buffer_select()
2975 io_ring_submit_unlock(req->ctx, needs_lock); in io_buffer_select()
2986 kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; in io_rw_buffer_select()
2987 bgid = req->buf_index; in io_rw_buffer_select()
2991 req->rw.addr = (u64) (unsigned long) kbuf; in io_rw_buffer_select()
2992 req->flags |= REQ_F_BUFFER_SELECTED; in io_rw_buffer_select()
2993 return u64_to_user_ptr(kbuf->addr); in io_rw_buffer_select()
3005 uiov = u64_to_user_ptr(req->rw.addr); in io_compat_import()
3007 return -EFAULT; in io_compat_import()
3008 if (__get_user(clen, &uiov->iov_len)) in io_compat_import()
3009 return -EFAULT; in io_compat_import()
3011 return -EINVAL; in io_compat_import()
3026 struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr); in __io_iov_buffer_select()
3031 return -EFAULT; in __io_iov_buffer_select()
3035 return -EINVAL; in __io_iov_buffer_select()
3047 if (req->flags & REQ_F_BUFFER_SELECTED) { in io_iov_buffer_select()
3050 kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; in io_iov_buffer_select()
3051 iov[0].iov_base = u64_to_user_ptr(kbuf->addr); in io_iov_buffer_select()
3052 iov[0].iov_len = kbuf->len; in io_iov_buffer_select()
3055 if (!req->rw.len) in io_iov_buffer_select()
3057 else if (req->rw.len > 1) in io_iov_buffer_select()
3058 return -EINVAL; in io_iov_buffer_select()
3061 if (req->ctx->compat) in io_iov_buffer_select()
3072 void __user *buf = u64_to_user_ptr(req->rw.addr); in __io_import_iovec()
3073 size_t sqe_len = req->rw.len; in __io_import_iovec()
3077 opcode = req->opcode; in __io_import_iovec()
3084 if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) in __io_import_iovec()
3085 return -EINVAL; in __io_import_iovec()
3088 if (req->flags & REQ_F_BUFFER_SELECT) { in __io_import_iovec()
3092 req->rw.len = sqe_len; in __io_import_iovec()
3100 if (req->flags & REQ_F_BUFFER_SELECT) { in __io_import_iovec()
3103 ret = (*iovec)->iov_len; in __io_import_iovec()
3111 req->ctx->compat); in __io_import_iovec()
3118 struct io_async_rw *iorw = req->async_data; in io_import_iovec()
3123 return iov_iter_count(&iorw->iter); in io_import_iovec()
3128 return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; in io_kiocb_ppos()
3132 * For files that don't have ->read_iter() and ->write_iter(), handle them
3133 * by looping over ->read() or ->write() manually.
3137 struct kiocb *kiocb = &req->rw.kiocb; in loop_rw_iter()
3138 struct file *file = req->file; in loop_rw_iter()
3143 * support non-blocking either. For the latter, this just causes in loop_rw_iter()
3146 if (kiocb->ki_flags & IOCB_HIPRI) in loop_rw_iter()
3147 return -EOPNOTSUPP; in loop_rw_iter()
3148 if (kiocb->ki_flags & IOCB_NOWAIT) in loop_rw_iter()
3149 return -EAGAIN; in loop_rw_iter()
3158 iovec.iov_base = u64_to_user_ptr(req->rw.addr); in loop_rw_iter()
3159 iovec.iov_len = req->rw.len; in loop_rw_iter()
3163 nr = file->f_op->read(file, iovec.iov_base, in loop_rw_iter()
3166 nr = file->f_op->write(file, iovec.iov_base, in loop_rw_iter()
3178 req->rw.len -= nr; in loop_rw_iter()
3179 req->rw.addr += nr; in loop_rw_iter()
3189 struct io_async_rw *rw = req->async_data; in io_req_map_rw()
3191 memcpy(&rw->iter, iter, sizeof(*iter)); in io_req_map_rw()
3192 rw->free_iovec = iovec; in io_req_map_rw()
3193 rw->bytes_done = 0; in io_req_map_rw()
3200 rw->iter.iov = rw->fast_iov; in io_req_map_rw()
3201 if (iter->iov != fast_iov) { in io_req_map_rw()
3202 iov_off = iter->iov - fast_iov; in io_req_map_rw()
3203 rw->iter.iov += iov_off; in io_req_map_rw()
3205 if (rw->fast_iov != fast_iov) in io_req_map_rw()
3206 memcpy(rw->fast_iov + iov_off, fast_iov + iov_off, in io_req_map_rw()
3207 sizeof(struct iovec) * iter->nr_segs); in io_req_map_rw()
3209 req->flags |= REQ_F_NEED_CLEANUP; in io_req_map_rw()
3215 WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); in __io_alloc_async_data()
3216 req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); in __io_alloc_async_data()
3217 return req->async_data == NULL; in __io_alloc_async_data()
3222 if (!io_op_defs[req->opcode].needs_async_data) in io_alloc_async_data()
3232 if (!force && !io_op_defs[req->opcode].needs_async_data) in io_setup_async_rw()
3234 if (!req->async_data) { in io_setup_async_rw()
3236 return -ENOMEM; in io_setup_async_rw()
3245 struct io_async_rw *iorw = req->async_data; in io_rw_prep_async()
3246 struct iovec *iov = iorw->fast_iov; in io_rw_prep_async()
3249 ret = __io_import_iovec(rw, req, &iov, &iorw->iter, false); in io_rw_prep_async()
3253 iorw->bytes_done = 0; in io_rw_prep_async()
3254 iorw->free_iovec = iov; in io_rw_prep_async()
3256 req->flags |= REQ_F_NEED_CLEANUP; in io_rw_prep_async()
3268 if (unlikely(!(req->file->f_mode & FMODE_READ))) in io_read_prep()
3269 return -EBADF; in io_read_prep()
3272 if (!req->async_data) in io_read_prep()
3282 * queue a task_work based retry of the operation, attempting to copy the data
3284 * do a thread based blocking retry of the operation. That's the unexpected
3291 struct io_kiocb *req = wait->private; in io_async_buf_func()
3300 req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; in io_async_buf_func()
3301 list_del_init(&wait->entry); in io_async_buf_func()
3303 init_task_work(&req->task_work, io_req_task_submit); in io_async_buf_func()
3304 percpu_ref_get(&req->ctx->refs); in io_async_buf_func()
3307 refcount_inc(&req->refs); in io_async_buf_func()
3313 init_task_work(&req->task_work, io_req_task_cancel); in io_async_buf_func()
3314 tsk = io_wq_get_task(req->ctx->io_wq); in io_async_buf_func()
3315 task_work_add(tsk, &req->task_work, TWA_NONE); in io_async_buf_func()
3323 * based retry. If we return false here, the request is handed to the async
3324 * worker threads for retry. If we're doing buffered reads on a regular file,
3325 * we prepare a private wait_page_queue entry and retry the operation. This
3328 * that callback, io_uring uses task_work to setup a retry of the operation.
3329 * That retry will attempt the buffered read again. The retry will generally
3331 * async worker threads for a blocking retry.
3335 struct io_async_rw *rw = req->async_data; in io_rw_should_retry()
3336 struct wait_page_queue *wait = &rw->wpq; in io_rw_should_retry()
3337 struct kiocb *kiocb = &req->rw.kiocb; in io_rw_should_retry()
3339 /* never retry for NOWAIT, we just complete with -EAGAIN */ in io_rw_should_retry()
3340 if (req->flags & REQ_F_NOWAIT) in io_rw_should_retry()
3344 if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) in io_rw_should_retry()
3348 * just use poll if we can, and don't attempt if the fs doesn't in io_rw_should_retry()
3351 if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) in io_rw_should_retry()
3354 wait->wait.func = io_async_buf_func; in io_rw_should_retry()
3355 wait->wait.private = req; in io_rw_should_retry()
3356 wait->wait.flags = 0; in io_rw_should_retry()
3357 INIT_LIST_HEAD(&wait->wait.entry); in io_rw_should_retry()
3358 kiocb->ki_flags |= IOCB_WAITQ; in io_rw_should_retry()
3359 kiocb->ki_flags &= ~IOCB_NOWAIT; in io_rw_should_retry()
3360 kiocb->ki_waitq = wait; in io_rw_should_retry()
3366 if (req->file->f_op->read_iter) in io_iter_do_read()
3367 return call_read_iter(req->file, &req->rw.kiocb, iter); in io_iter_do_read()
3368 else if (req->file->f_op->read) in io_iter_do_read()
3371 return -EINVAL; in io_iter_do_read()
3378 struct kiocb *kiocb = &req->rw.kiocb; in io_read()
3380 struct io_async_rw *rw = req->async_data; in io_read()
3386 iter = &rw->iter; in io_read()
3393 req->result = io_size; in io_read()
3396 /* Ensure we clear previously set non-block flag */ in io_read()
3398 kiocb->ki_flags &= ~IOCB_NOWAIT; in io_read()
3400 kiocb->ki_flags |= IOCB_NOWAIT; in io_read()
3404 no_async = force_nonblock && !io_file_supports_async(req->file, READ); in io_read()
3408 ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count); in io_read()
3416 } else if (ret == -EIOCBQUEUED) { in io_read()
3419 } else if (ret == -EAGAIN) { in io_read()
3420 /* IOPOLL retry should happen for io-wq threads */ in io_read()
3421 if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_read()
3423 /* no retry on NONBLOCK marked file */ in io_read()
3424 if (req->file->f_flags & O_NONBLOCK) in io_read()
3427 iov_iter_revert(iter, iov_count - iov_iter_count(iter)); in io_read()
3431 /* make sure -ERESTARTSYS -> -EINTR is done */ in io_read()
3435 /* read it all, or we did blocking attempt. no retry. */ in io_read()
3437 (req->file->f_flags & O_NONBLOCK)) in io_read()
3440 io_size -= ret; in io_read()
3448 return -EAGAIN; in io_read()
3449 rw = req->async_data; in io_read()
3450 /* it's copied and will be cleaned with ->io */ in io_read()
3453 iter = &rw->iter; in io_read()
3454 retry: in io_read()
3455 rw->bytes_done += ret; in io_read()
3456 /* if we can retry, do so with the callbacks armed */ in io_read()
3458 kiocb->ki_flags &= ~IOCB_WAITQ; in io_read()
3459 return -EAGAIN; in io_read()
3463 * Now retry read with the IOCB_WAITQ parts set in the iocb. If we in io_read()
3464 * get -EIOCBQUEUED, then we'll get a notification when the desired in io_read()
3466 * do, then just retry at the new offset. in io_read()
3469 if (ret == -EIOCBQUEUED) { in io_read()
3473 /* we got some bytes, but not all. retry. */ in io_read()
3474 goto retry; in io_read()
3494 if (unlikely(!(req->file->f_mode & FMODE_WRITE))) in io_write_prep()
3495 return -EBADF; in io_write_prep()
3498 if (!req->async_data) in io_write_prep()
3507 struct kiocb *kiocb = &req->rw.kiocb; in io_write()
3509 struct io_async_rw *rw = req->async_data; in io_write()
3514 iter = &rw->iter; in io_write()
3521 req->result = io_size; in io_write()
3523 /* Ensure we clear previously set non-block flag */ in io_write()
3525 kiocb->ki_flags &= ~IOCB_NOWAIT; in io_write()
3527 kiocb->ki_flags |= IOCB_NOWAIT; in io_write()
3530 if (force_nonblock && !io_file_supports_async(req->file, WRITE)) in io_write()
3533 /* file path doesn't support NOWAIT for non-direct_IO */ in io_write()
3534 if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && in io_write()
3535 (req->flags & REQ_F_ISREG)) in io_write()
3538 ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), iov_count); in io_write()
3543 * Open-code file_start_write here to grab freeze protection, in io_write()
3549 if (req->flags & REQ_F_ISREG) { in io_write()
3550 sb_start_write(file_inode(req->file)->i_sb); in io_write()
3551 __sb_writers_release(file_inode(req->file)->i_sb, in io_write()
3554 kiocb->ki_flags |= IOCB_WRITE; in io_write()
3556 if (req->file->f_op->write_iter) in io_write()
3557 ret2 = call_write_iter(req->file, kiocb, iter); in io_write()
3558 else if (req->file->f_op->write) in io_write()
3561 ret2 = -EINVAL; in io_write()
3564 * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just in io_write()
3565 * retry them without IOCB_NOWAIT. in io_write()
3567 if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) in io_write()
3568 ret2 = -EAGAIN; in io_write()
3569 /* no retry on NONBLOCK marked file */ in io_write()
3570 if (ret2 == -EAGAIN && (req->file->f_flags & O_NONBLOCK)) in io_write()
3572 if (!force_nonblock || ret2 != -EAGAIN) { in io_write()
3573 /* IOPOLL retry should happen for io-wq threads */ in io_write()
3574 if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) in io_write()
3581 iov_iter_revert(iter, iov_count - iov_iter_count(iter)); in io_write()
3584 return -EAGAIN; in io_write()
3596 struct io_splice* sp = &req->splice; in __io_splice_prep()
3599 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in __io_splice_prep()
3600 return -EINVAL; in __io_splice_prep()
3602 sp->file_in = NULL; in __io_splice_prep()
3603 sp->len = READ_ONCE(sqe->len); in __io_splice_prep()
3604 sp->flags = READ_ONCE(sqe->splice_flags); in __io_splice_prep()
3606 if (unlikely(sp->flags & ~valid_flags)) in __io_splice_prep()
3607 return -EINVAL; in __io_splice_prep()
3609 sp->file_in = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), in __io_splice_prep()
3610 (sp->flags & SPLICE_F_FD_IN_FIXED)); in __io_splice_prep()
3611 if (!sp->file_in) in __io_splice_prep()
3612 return -EBADF; in __io_splice_prep()
3613 req->flags |= REQ_F_NEED_CLEANUP; in __io_splice_prep()
3615 if (!S_ISREG(file_inode(sp->file_in)->i_mode)) { in __io_splice_prep()
3621 req->work.flags |= IO_WQ_WORK_UNBOUND; in __io_splice_prep()
3630 if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) in io_tee_prep()
3631 return -EINVAL; in io_tee_prep()
3637 struct io_splice *sp = &req->splice; in io_tee()
3638 struct file *in = sp->file_in; in io_tee()
3639 struct file *out = sp->file_out; in io_tee()
3640 unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; in io_tee()
3644 return -EAGAIN; in io_tee()
3645 if (sp->len) in io_tee()
3646 ret = do_tee(in, out, sp->len, flags); in io_tee()
3648 io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED)); in io_tee()
3649 req->flags &= ~REQ_F_NEED_CLEANUP; in io_tee()
3651 if (ret != sp->len) in io_tee()
3659 struct io_splice* sp = &req->splice; in io_splice_prep()
3661 sp->off_in = READ_ONCE(sqe->splice_off_in); in io_splice_prep()
3662 sp->off_out = READ_ONCE(sqe->off); in io_splice_prep()
3668 struct io_splice *sp = &req->splice; in io_splice()
3669 struct file *in = sp->file_in; in io_splice()
3670 struct file *out = sp->file_out; in io_splice()
3671 unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; in io_splice()
3676 return -EAGAIN; in io_splice()
3678 poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; in io_splice()
3679 poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; in io_splice()
3681 if (sp->len) in io_splice()
3682 ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); in io_splice()
3684 io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED)); in io_splice()
3685 req->flags &= ~REQ_F_NEED_CLEANUP; in io_splice()
3687 if (ret != sp->len) in io_splice()
3698 struct io_ring_ctx *ctx = req->ctx; in io_nop()
3700 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) in io_nop()
3701 return -EINVAL; in io_nop()
3709 struct io_ring_ctx *ctx = req->ctx; in io_prep_fsync()
3711 if (!req->file) in io_prep_fsync()
3712 return -EBADF; in io_prep_fsync()
3714 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) in io_prep_fsync()
3715 return -EINVAL; in io_prep_fsync()
3716 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) in io_prep_fsync()
3717 return -EINVAL; in io_prep_fsync()
3719 req->sync.flags = READ_ONCE(sqe->fsync_flags); in io_prep_fsync()
3720 if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) in io_prep_fsync()
3721 return -EINVAL; in io_prep_fsync()
3723 req->sync.off = READ_ONCE(sqe->off); in io_prep_fsync()
3724 req->sync.len = READ_ONCE(sqe->len); in io_prep_fsync()
3730 loff_t end = req->sync.off + req->sync.len; in io_fsync()
3735 return -EAGAIN; in io_fsync()
3737 ret = vfs_fsync_range(req->file, req->sync.off, in io_fsync()
3739 req->sync.flags & IORING_FSYNC_DATASYNC); in io_fsync()
3749 if (sqe->ioprio || sqe->buf_index || sqe->rw_flags) in io_fallocate_prep()
3750 return -EINVAL; in io_fallocate_prep()
3751 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_fallocate_prep()
3752 return -EINVAL; in io_fallocate_prep()
3754 req->sync.off = READ_ONCE(sqe->off); in io_fallocate_prep()
3755 req->sync.len = READ_ONCE(sqe->addr); in io_fallocate_prep()
3756 req->sync.mode = READ_ONCE(sqe->len); in io_fallocate_prep()
3766 return -EAGAIN; in io_fallocate()
3767 ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, in io_fallocate()
3768 req->sync.len); in io_fallocate()
3780 if (unlikely(sqe->ioprio || sqe->buf_index)) in __io_openat_prep()
3781 return -EINVAL; in __io_openat_prep()
3782 if (unlikely(req->flags & REQ_F_FIXED_FILE)) in __io_openat_prep()
3783 return -EBADF; in __io_openat_prep()
3786 if (!(req->open.how.flags & O_PATH) && force_o_largefile()) in __io_openat_prep()
3787 req->open.how.flags |= O_LARGEFILE; in __io_openat_prep()
3789 req->open.dfd = READ_ONCE(sqe->fd); in __io_openat_prep()
3790 fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); in __io_openat_prep()
3791 req->open.filename = getname(fname); in __io_openat_prep()
3792 if (IS_ERR(req->open.filename)) { in __io_openat_prep()
3793 ret = PTR_ERR(req->open.filename); in __io_openat_prep()
3794 req->open.filename = NULL; in __io_openat_prep()
3797 req->open.nofile = rlimit(RLIMIT_NOFILE); in __io_openat_prep()
3798 req->open.ignore_nonblock = false; in __io_openat_prep()
3799 req->flags |= REQ_F_NEED_CLEANUP; in __io_openat_prep()
3807 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) in io_openat_prep()
3808 return -EINVAL; in io_openat_prep()
3809 mode = READ_ONCE(sqe->len); in io_openat_prep()
3810 flags = READ_ONCE(sqe->open_flags); in io_openat_prep()
3811 req->open.how = build_open_how(flags, mode); in io_openat_prep()
3821 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) in io_openat2_prep()
3822 return -EINVAL; in io_openat2_prep()
3823 how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); in io_openat2_prep()
3824 len = READ_ONCE(sqe->len); in io_openat2_prep()
3826 return -EINVAL; in io_openat2_prep()
3828 ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how, in io_openat2_prep()
3842 if (force_nonblock && !req->open.ignore_nonblock) in io_openat2()
3843 return -EAGAIN; in io_openat2()
3845 ret = build_open_flags(&req->open.how, &op); in io_openat2()
3849 ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); in io_openat2()
3853 file = do_filp_open(req->open.dfd, req->open.filename, &op); in io_openat2()
3858 * A work-around to ensure that /proc/self works that way in io_openat2()
3859 * that it should - if we get -EOPNOTSUPP back, then assume in io_openat2()
3861 * context. We should be safe to retry this from the task in io_openat2()
3866 if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) { in io_openat2()
3867 req->open.ignore_nonblock = true; in io_openat2()
3868 refcount_inc(&req->refs); in io_openat2()
3877 putname(req->open.filename); in io_openat2()
3878 req->flags &= ~REQ_F_NEED_CLEANUP; in io_openat2()
3893 struct io_provide_buf *p = &req->pbuf; in io_remove_buffers_prep()
3896 if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off) in io_remove_buffers_prep()
3897 return -EINVAL; in io_remove_buffers_prep()
3899 tmp = READ_ONCE(sqe->fd); in io_remove_buffers_prep()
3901 return -EINVAL; in io_remove_buffers_prep()
3904 p->nbufs = tmp; in io_remove_buffers_prep()
3905 p->bgid = READ_ONCE(sqe->buf_group); in io_remove_buffers_prep()
3919 while (!list_empty(&buf->list)) { in __io_remove_buffers()
3922 nxt = list_first_entry(&buf->list, struct io_buffer, list); in __io_remove_buffers()
3923 list_del(&nxt->list); in __io_remove_buffers()
3930 idr_remove(&ctx->io_buffer_idr, bgid); in __io_remove_buffers()
3938 struct io_provide_buf *p = &req->pbuf; in io_remove_buffers()
3939 struct io_ring_ctx *ctx = req->ctx; in io_remove_buffers()
3945 lockdep_assert_held(&ctx->uring_lock); in io_remove_buffers()
3947 ret = -ENOENT; in io_remove_buffers()
3948 head = idr_find(&ctx->io_buffer_idr, p->bgid); in io_remove_buffers()
3950 ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); in io_remove_buffers()
3962 struct io_provide_buf *p = &req->pbuf; in io_provide_buffers_prep()
3965 if (sqe->ioprio || sqe->rw_flags) in io_provide_buffers_prep()
3966 return -EINVAL; in io_provide_buffers_prep()
3968 tmp = READ_ONCE(sqe->fd); in io_provide_buffers_prep()
3970 return -E2BIG; in io_provide_buffers_prep()
3971 p->nbufs = tmp; in io_provide_buffers_prep()
3972 p->addr = READ_ONCE(sqe->addr); in io_provide_buffers_prep()
3973 p->len = READ_ONCE(sqe->len); in io_provide_buffers_prep()
3975 if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs))) in io_provide_buffers_prep()
3976 return -EFAULT; in io_provide_buffers_prep()
3978 p->bgid = READ_ONCE(sqe->buf_group); in io_provide_buffers_prep()
3979 tmp = READ_ONCE(sqe->off); in io_provide_buffers_prep()
3981 return -E2BIG; in io_provide_buffers_prep()
3982 p->bid = tmp; in io_provide_buffers_prep()
3989 u64 addr = pbuf->addr; in io_add_buffers()
3990 int i, bid = pbuf->bid; in io_add_buffers()
3992 for (i = 0; i < pbuf->nbufs; i++) { in io_add_buffers()
3997 buf->addr = addr; in io_add_buffers()
3998 buf->len = pbuf->len; in io_add_buffers()
3999 buf->bid = bid; in io_add_buffers()
4000 addr += pbuf->len; in io_add_buffers()
4003 INIT_LIST_HEAD(&buf->list); in io_add_buffers()
4006 list_add_tail(&buf->list, &(*head)->list); in io_add_buffers()
4010 return i ? i : -ENOMEM; in io_add_buffers()
4016 struct io_provide_buf *p = &req->pbuf; in io_provide_buffers()
4017 struct io_ring_ctx *ctx = req->ctx; in io_provide_buffers()
4023 lockdep_assert_held(&ctx->uring_lock); in io_provide_buffers()
4025 list = head = idr_find(&ctx->io_buffer_idr, p->bgid); in io_provide_buffers()
4032 ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1, in io_provide_buffers()
4035 __io_remove_buffers(ctx, head, p->bgid, -1U); in io_provide_buffers()
4051 if (sqe->ioprio || sqe->buf_index) in io_epoll_ctl_prep()
4052 return -EINVAL; in io_epoll_ctl_prep()
4053 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) in io_epoll_ctl_prep()
4054 return -EINVAL; in io_epoll_ctl_prep()
4056 req->epoll.epfd = READ_ONCE(sqe->fd); in io_epoll_ctl_prep()
4057 req->epoll.op = READ_ONCE(sqe->len); in io_epoll_ctl_prep()
4058 req->epoll.fd = READ_ONCE(sqe->off); in io_epoll_ctl_prep()
4060 if (ep_op_has_event(req->epoll.op)) { in io_epoll_ctl_prep()
4063 ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); in io_epoll_ctl_prep()
4064 if (copy_from_user(&req->epoll.event, ev, sizeof(*ev))) in io_epoll_ctl_prep()
4065 return -EFAULT; in io_epoll_ctl_prep()
4070 return -EOPNOTSUPP; in io_epoll_ctl_prep()
4078 struct io_epoll *ie = &req->epoll; in io_epoll_ctl()
4081 ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); in io_epoll_ctl()
4082 if (force_nonblock && ret == -EAGAIN) in io_epoll_ctl()
4083 return -EAGAIN; in io_epoll_ctl()
4090 return -EOPNOTSUPP; in io_epoll_ctl()
4097 if (sqe->ioprio || sqe->buf_index || sqe->off) in io_madvise_prep()
4098 return -EINVAL; in io_madvise_prep()
4099 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_madvise_prep()
4100 return -EINVAL; in io_madvise_prep()
4102 req->madvise.addr = READ_ONCE(sqe->addr); in io_madvise_prep()
4103 req->madvise.len = READ_ONCE(sqe->len); in io_madvise_prep()
4104 req->madvise.advice = READ_ONCE(sqe->fadvise_advice); in io_madvise_prep()
4107 return -EOPNOTSUPP; in io_madvise_prep()
4114 struct io_madvise *ma = &req->madvise; in io_madvise()
4118 return -EAGAIN; in io_madvise()
4120 ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); in io_madvise()
4126 return -EOPNOTSUPP; in io_madvise()
4132 if (sqe->ioprio || sqe->buf_index || sqe->addr) in io_fadvise_prep()
4133 return -EINVAL; in io_fadvise_prep()
4134 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_fadvise_prep()
4135 return -EINVAL; in io_fadvise_prep()
4137 req->fadvise.offset = READ_ONCE(sqe->off); in io_fadvise_prep()
4138 req->fadvise.len = READ_ONCE(sqe->len); in io_fadvise_prep()
4139 req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); in io_fadvise_prep()
4145 struct io_fadvise *fa = &req->fadvise; in io_fadvise()
4149 switch (fa->advice) { in io_fadvise()
4155 return -EAGAIN; in io_fadvise()
4159 ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); in io_fadvise()
4168 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) in io_statx_prep()
4169 return -EINVAL; in io_statx_prep()
4170 if (sqe->ioprio || sqe->buf_index) in io_statx_prep()
4171 return -EINVAL; in io_statx_prep()
4172 if (req->flags & REQ_F_FIXED_FILE) in io_statx_prep()
4173 return -EBADF; in io_statx_prep()
4175 req->statx.dfd = READ_ONCE(sqe->fd); in io_statx_prep()
4176 req->statx.mask = READ_ONCE(sqe->len); in io_statx_prep()
4177 req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr)); in io_statx_prep()
4178 req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); in io_statx_prep()
4179 req->statx.flags = READ_ONCE(sqe->statx_flags); in io_statx_prep()
4186 struct io_statx *ctx = &req->statx; in io_statx()
4191 if (ctx->dfd == -1 || ctx->dfd == AT_FDCWD) in io_statx()
4192 req->flags |= REQ_F_NO_FILE_TABLE; in io_statx()
4193 return -EAGAIN; in io_statx()
4196 ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, in io_statx()
4197 ctx->buffer); in io_statx()
4213 req->work.flags |= IO_WQ_WORK_NO_CANCEL; in io_close_prep()
4215 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) in io_close_prep()
4216 return -EINVAL; in io_close_prep()
4217 if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || in io_close_prep()
4218 sqe->rw_flags || sqe->buf_index) in io_close_prep()
4219 return -EINVAL; in io_close_prep()
4220 if (req->flags & REQ_F_FIXED_FILE) in io_close_prep()
4221 return -EBADF; in io_close_prep()
4223 req->close.fd = READ_ONCE(sqe->fd); in io_close_prep()
4224 if ((req->file && req->file->f_op == &io_uring_fops)) in io_close_prep()
4225 return -EBADF; in io_close_prep()
4227 req->close.put_file = NULL; in io_close_prep()
4234 struct io_close *close = &req->close; in io_close()
4238 if (!close->put_file) { in io_close()
4239 ret = __close_fd_get_file(close->fd, &close->put_file); in io_close()
4241 return (ret == -ENOENT) ? -EBADF : ret; in io_close()
4245 if (close->put_file->f_op->flush && force_nonblock) { in io_close()
4247 req->flags &= ~REQ_F_NOWAIT; in io_close()
4248 /* avoid grabbing files - we don't need the files */ in io_close()
4249 req->flags |= REQ_F_NO_FILE_TABLE; in io_close()
4250 return -EAGAIN; in io_close()
4253 /* No ->flush() or already async, safely close from here */ in io_close()
4254 ret = filp_close(close->put_file, req->work.identity->files); in io_close()
4257 fput(close->put_file); in io_close()
4258 close->put_file = NULL; in io_close()
4265 struct io_ring_ctx *ctx = req->ctx; in io_prep_sfr()
4267 if (!req->file) in io_prep_sfr()
4268 return -EBADF; in io_prep_sfr()
4270 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) in io_prep_sfr()
4271 return -EINVAL; in io_prep_sfr()
4272 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) in io_prep_sfr()
4273 return -EINVAL; in io_prep_sfr()
4275 req->sync.off = READ_ONCE(sqe->off); in io_prep_sfr()
4276 req->sync.len = READ_ONCE(sqe->len); in io_prep_sfr()
4277 req->sync.flags = READ_ONCE(sqe->sync_range_flags); in io_prep_sfr()
4287 return -EAGAIN; in io_sync_file_range()
4289 ret = sync_file_range(req->file, req->sync.off, req->sync.len, in io_sync_file_range()
4290 req->sync.flags); in io_sync_file_range()
4301 struct io_async_msghdr *async_msg = req->async_data; in io_setup_async_msg()
4304 return -EAGAIN; in io_setup_async_msg()
4306 if (kmsg->iov != kmsg->fast_iov) in io_setup_async_msg()
4307 kfree(kmsg->iov); in io_setup_async_msg()
4308 return -ENOMEM; in io_setup_async_msg()
4310 async_msg = req->async_data; in io_setup_async_msg()
4311 req->flags |= REQ_F_NEED_CLEANUP; in io_setup_async_msg()
4313 return -EAGAIN; in io_setup_async_msg()
4319 iomsg->iov = iomsg->fast_iov; in io_sendmsg_copy_hdr()
4320 iomsg->msg.msg_name = &iomsg->addr; in io_sendmsg_copy_hdr()
4321 return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, in io_sendmsg_copy_hdr()
4322 req->sr_msg.msg_flags, &iomsg->iov); in io_sendmsg_copy_hdr()
4327 struct io_async_msghdr *async_msg = req->async_data; in io_sendmsg_prep()
4328 struct io_sr_msg *sr = &req->sr_msg; in io_sendmsg_prep()
4331 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_sendmsg_prep()
4332 return -EINVAL; in io_sendmsg_prep()
4334 sr->msg_flags = READ_ONCE(sqe->msg_flags); in io_sendmsg_prep()
4335 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); in io_sendmsg_prep()
4336 sr->len = READ_ONCE(sqe->len); in io_sendmsg_prep()
4339 if (req->ctx->compat) in io_sendmsg_prep()
4340 sr->msg_flags |= MSG_CMSG_COMPAT; in io_sendmsg_prep()
4343 if (!async_msg || !io_op_defs[req->opcode].needs_async_data) in io_sendmsg_prep()
4347 req->flags |= REQ_F_NEED_CLEANUP; in io_sendmsg_prep()
4359 sock = sock_from_file(req->file, &ret); in io_sendmsg()
4363 if (req->async_data) { in io_sendmsg()
4364 kmsg = req->async_data; in io_sendmsg()
4365 kmsg->msg.msg_name = &kmsg->addr; in io_sendmsg()
4367 if (!kmsg->iov) in io_sendmsg()
4368 kmsg->iov = kmsg->fast_iov; in io_sendmsg()
4369 kmsg->msg.msg_iter.iov = kmsg->iov; in io_sendmsg()
4377 flags = req->sr_msg.msg_flags; in io_sendmsg()
4379 req->flags |= REQ_F_NOWAIT; in io_sendmsg()
4383 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); in io_sendmsg()
4384 if (force_nonblock && ret == -EAGAIN) in io_sendmsg()
4386 if (ret == -ERESTARTSYS) in io_sendmsg()
4387 ret = -EINTR; in io_sendmsg()
4389 if (kmsg->iov != kmsg->fast_iov) in io_sendmsg()
4390 kfree(kmsg->iov); in io_sendmsg()
4391 req->flags &= ~REQ_F_NEED_CLEANUP; in io_sendmsg()
4401 struct io_sr_msg *sr = &req->sr_msg; in io_send()
4408 sock = sock_from_file(req->file, &ret); in io_send()
4412 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); in io_send()
4421 flags = req->sr_msg.msg_flags; in io_send()
4423 req->flags |= REQ_F_NOWAIT; in io_send()
4429 if (force_nonblock && ret == -EAGAIN) in io_send()
4430 return -EAGAIN; in io_send()
4431 if (ret == -ERESTARTSYS) in io_send()
4432 ret = -EINTR; in io_send()
4443 struct io_sr_msg *sr = &req->sr_msg; in __io_recvmsg_copy_hdr()
4448 ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg, in __io_recvmsg_copy_hdr()
4449 &iomsg->uaddr, &uiov, &iov_len); in __io_recvmsg_copy_hdr()
4453 if (req->flags & REQ_F_BUFFER_SELECT) { in __io_recvmsg_copy_hdr()
4455 return -EINVAL; in __io_recvmsg_copy_hdr()
4456 if (copy_from_user(iomsg->iov, uiov, sizeof(*uiov))) in __io_recvmsg_copy_hdr()
4457 return -EFAULT; in __io_recvmsg_copy_hdr()
4458 sr->len = iomsg->iov[0].iov_len; in __io_recvmsg_copy_hdr()
4459 iov_iter_init(&iomsg->msg.msg_iter, READ, iomsg->iov, 1, in __io_recvmsg_copy_hdr()
4460 sr->len); in __io_recvmsg_copy_hdr()
4461 iomsg->iov = NULL; in __io_recvmsg_copy_hdr()
4464 &iomsg->iov, &iomsg->msg.msg_iter, in __io_recvmsg_copy_hdr()
4478 struct io_sr_msg *sr = &req->sr_msg; in __io_compat_recvmsg_copy_hdr()
4484 msg_compat = (struct compat_msghdr __user *) sr->umsg; in __io_compat_recvmsg_copy_hdr()
4485 ret = __get_compat_msghdr(&iomsg->msg, msg_compat, &iomsg->uaddr, in __io_compat_recvmsg_copy_hdr()
4491 if (req->flags & REQ_F_BUFFER_SELECT) { in __io_compat_recvmsg_copy_hdr()
4495 return -EINVAL; in __io_compat_recvmsg_copy_hdr()
4497 return -EFAULT; in __io_compat_recvmsg_copy_hdr()
4498 if (__get_user(clen, &uiov->iov_len)) in __io_compat_recvmsg_copy_hdr()
4499 return -EFAULT; in __io_compat_recvmsg_copy_hdr()
4501 return -EINVAL; in __io_compat_recvmsg_copy_hdr()
4502 sr->len = clen; in __io_compat_recvmsg_copy_hdr()
4503 iomsg->iov[0].iov_len = clen; in __io_compat_recvmsg_copy_hdr()
4504 iomsg->iov = NULL; in __io_compat_recvmsg_copy_hdr()
4507 UIO_FASTIOV, &iomsg->iov, in __io_compat_recvmsg_copy_hdr()
4508 &iomsg->msg.msg_iter, true); in __io_compat_recvmsg_copy_hdr()
4520 iomsg->msg.msg_name = &iomsg->addr; in io_recvmsg_copy_hdr()
4521 iomsg->iov = iomsg->fast_iov; in io_recvmsg_copy_hdr()
4524 if (req->ctx->compat) in io_recvmsg_copy_hdr()
4534 struct io_sr_msg *sr = &req->sr_msg; in io_recv_buffer_select()
4537 kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock); in io_recv_buffer_select()
4541 sr->kbuf = kbuf; in io_recv_buffer_select()
4542 req->flags |= REQ_F_BUFFER_SELECTED; in io_recv_buffer_select()
4548 return io_put_kbuf(req, req->sr_msg.kbuf); in io_put_recv_kbuf()
4554 struct io_async_msghdr *async_msg = req->async_data; in io_recvmsg_prep()
4555 struct io_sr_msg *sr = &req->sr_msg; in io_recvmsg_prep()
4558 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_recvmsg_prep()
4559 return -EINVAL; in io_recvmsg_prep()
4561 sr->msg_flags = READ_ONCE(sqe->msg_flags); in io_recvmsg_prep()
4562 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); in io_recvmsg_prep()
4563 sr->len = READ_ONCE(sqe->len); in io_recvmsg_prep()
4564 sr->bgid = READ_ONCE(sqe->buf_group); in io_recvmsg_prep()
4567 if (req->ctx->compat) in io_recvmsg_prep()
4568 sr->msg_flags |= MSG_CMSG_COMPAT; in io_recvmsg_prep()
4571 if (!async_msg || !io_op_defs[req->opcode].needs_async_data) in io_recvmsg_prep()
4575 req->flags |= REQ_F_NEED_CLEANUP; in io_recvmsg_prep()
4588 sock = sock_from_file(req->file, &ret); in io_recvmsg()
4592 if (req->async_data) { in io_recvmsg()
4593 kmsg = req->async_data; in io_recvmsg()
4594 kmsg->msg.msg_name = &kmsg->addr; in io_recvmsg()
4596 if (!kmsg->iov) in io_recvmsg()
4597 kmsg->iov = kmsg->fast_iov; in io_recvmsg()
4598 kmsg->msg.msg_iter.iov = kmsg->iov; in io_recvmsg()
4606 if (req->flags & REQ_F_BUFFER_SELECT) { in io_recvmsg()
4610 kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr); in io_recvmsg()
4611 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov, in io_recvmsg()
4612 1, req->sr_msg.len); in io_recvmsg()
4615 flags = req->sr_msg.msg_flags; in io_recvmsg()
4617 req->flags |= REQ_F_NOWAIT; in io_recvmsg()
4621 ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, in io_recvmsg()
4622 kmsg->uaddr, flags); in io_recvmsg()
4623 if (force_nonblock && ret == -EAGAIN) in io_recvmsg()
4625 if (ret == -ERESTARTSYS) in io_recvmsg()
4626 ret = -EINTR; in io_recvmsg()
4628 if (req->flags & REQ_F_BUFFER_SELECTED) in io_recvmsg()
4630 if (kmsg->iov != kmsg->fast_iov) in io_recvmsg()
4631 kfree(kmsg->iov); in io_recvmsg()
4632 req->flags &= ~REQ_F_NEED_CLEANUP; in io_recvmsg()
4643 struct io_sr_msg *sr = &req->sr_msg; in io_recv()
4645 void __user *buf = sr->buf; in io_recv()
4651 sock = sock_from_file(req->file, &ret); in io_recv()
4655 if (req->flags & REQ_F_BUFFER_SELECT) { in io_recv()
4659 buf = u64_to_user_ptr(kbuf->addr); in io_recv()
4662 ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter); in io_recv()
4673 flags = req->sr_msg.msg_flags; in io_recv()
4675 req->flags |= REQ_F_NOWAIT; in io_recv()
4680 if (force_nonblock && ret == -EAGAIN) in io_recv()
4681 return -EAGAIN; in io_recv()
4682 if (ret == -ERESTARTSYS) in io_recv()
4683 ret = -EINTR; in io_recv()
4685 if (req->flags & REQ_F_BUFFER_SELECTED) in io_recv()
4695 struct io_accept *accept = &req->accept; in io_accept_prep()
4697 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) in io_accept_prep()
4698 return -EINVAL; in io_accept_prep()
4699 if (sqe->ioprio || sqe->len || sqe->buf_index) in io_accept_prep()
4700 return -EINVAL; in io_accept_prep()
4702 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); in io_accept_prep()
4703 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); in io_accept_prep()
4704 accept->flags = READ_ONCE(sqe->accept_flags); in io_accept_prep()
4705 accept->nofile = rlimit(RLIMIT_NOFILE); in io_accept_prep()
4712 struct io_accept *accept = &req->accept; in io_accept()
4716 if (req->file->f_flags & O_NONBLOCK) in io_accept()
4717 req->flags |= REQ_F_NOWAIT; in io_accept()
4719 ret = __sys_accept4_file(req->file, file_flags, accept->addr, in io_accept()
4720 accept->addr_len, accept->flags, in io_accept()
4721 accept->nofile); in io_accept()
4722 if (ret == -EAGAIN && force_nonblock) in io_accept()
4723 return -EAGAIN; in io_accept()
4725 if (ret == -ERESTARTSYS) in io_accept()
4726 ret = -EINTR; in io_accept()
4735 struct io_connect *conn = &req->connect; in io_connect_prep()
4736 struct io_async_connect *io = req->async_data; in io_connect_prep()
4738 if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) in io_connect_prep()
4739 return -EINVAL; in io_connect_prep()
4740 if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags) in io_connect_prep()
4741 return -EINVAL; in io_connect_prep()
4743 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); in io_connect_prep()
4744 conn->addr_len = READ_ONCE(sqe->addr2); in io_connect_prep()
4749 return move_addr_to_kernel(conn->addr, conn->addr_len, in io_connect_prep()
4750 &io->address); in io_connect_prep()
4760 if (req->async_data) { in io_connect()
4761 io = req->async_data; in io_connect()
4763 ret = move_addr_to_kernel(req->connect.addr, in io_connect()
4764 req->connect.addr_len, in io_connect()
4773 ret = __sys_connect_file(req->file, &io->address, in io_connect()
4774 req->connect.addr_len, file_flags); in io_connect()
4775 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { in io_connect()
4776 if (req->async_data) in io_connect()
4777 return -EAGAIN; in io_connect()
4779 ret = -ENOMEM; in io_connect()
4782 io = req->async_data; in io_connect()
4783 memcpy(req->async_data, &__io, sizeof(__io)); in io_connect()
4784 return -EAGAIN; in io_connect()
4786 if (ret == -ERESTARTSYS) in io_connect()
4787 ret = -EINTR; in io_connect()
4797 return -EOPNOTSUPP; in io_sendmsg_prep()
4803 return -EOPNOTSUPP; in io_sendmsg()
4809 return -EOPNOTSUPP; in io_send()
4815 return -EOPNOTSUPP; in io_recvmsg_prep()
4821 return -EOPNOTSUPP; in io_recvmsg()
4827 return -EOPNOTSUPP; in io_recv()
4832 return -EOPNOTSUPP; in io_accept_prep()
4838 return -EOPNOTSUPP; in io_accept()
4843 return -EOPNOTSUPP; in io_connect_prep()
4849 return -EOPNOTSUPP; in io_connect()
4859 static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, in __io_async_wake() argument
4866 if (mask && !(mask & poll->events)) in __io_async_wake()
4869 trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); in __io_async_wake()
4871 list_del_init(&poll->wait.entry); in __io_async_wake()
4873 req->result = mask; in __io_async_wake()
4874 init_task_work(&req->task_work, func); in __io_async_wake()
4875 percpu_ref_get(&req->ctx->refs); in __io_async_wake()
4880 * tsk->sighand->siglock on doing the wakeup. Should not be needed in __io_async_wake()
4883 twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh); in __io_async_wake()
4895 WRITE_ONCE(poll->canceled, true); in __io_async_wake()
4896 tsk = io_wq_get_task(req->ctx->io_wq); in __io_async_wake()
4897 task_work_add(tsk, &req->task_work, TWA_NONE); in __io_async_wake()
4903 static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) in io_poll_rewait() argument
4904 __acquires(&req->ctx->completion_lock) in io_poll_rewait()
4906 struct io_ring_ctx *ctx = req->ctx; in io_poll_rewait()
4908 if (!req->result && !READ_ONCE(poll->canceled)) { in io_poll_rewait()
4909 struct poll_table_struct pt = { ._key = poll->events }; in io_poll_rewait()
4911 req->result = vfs_poll(req->file, &pt) & poll->events; in io_poll_rewait()
4914 spin_lock_irq(&ctx->completion_lock); in io_poll_rewait()
4915 if (!req->result && !READ_ONCE(poll->canceled)) { in io_poll_rewait()
4916 add_wait_queue(poll->head, &poll->wait); in io_poll_rewait()
4925 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ in io_poll_get_double()
4926 if (req->opcode == IORING_OP_POLL_ADD) in io_poll_get_double()
4927 return req->async_data; in io_poll_get_double()
4928 return req->apoll->double_poll; in io_poll_get_double()
4933 if (req->opcode == IORING_OP_POLL_ADD) in io_poll_get_single()
4934 return &req->poll; in io_poll_get_single()
4935 return &req->apoll->poll; in io_poll_get_single()
4940 struct io_poll_iocb *poll = io_poll_get_double(req); in io_poll_remove_double() local
4942 lockdep_assert_held(&req->ctx->completion_lock); in io_poll_remove_double()
4944 if (poll && poll->head) { in io_poll_remove_double()
4945 struct wait_queue_head *head = poll->head; in io_poll_remove_double()
4947 spin_lock(&head->lock); in io_poll_remove_double()
4948 list_del_init(&poll->wait.entry); in io_poll_remove_double()
4949 if (poll->wait.private) in io_poll_remove_double()
4950 refcount_dec(&req->refs); in io_poll_remove_double()
4951 poll->head = NULL; in io_poll_remove_double()
4952 spin_unlock(&head->lock); in io_poll_remove_double()
4958 struct io_ring_ctx *ctx = req->ctx; in io_poll_complete()
4961 req->poll.done = true; in io_poll_complete()
4969 struct io_ring_ctx *ctx = req->ctx; in io_poll_task_func()
4972 if (io_poll_rewait(req, &req->poll)) { in io_poll_task_func()
4973 spin_unlock_irq(&ctx->completion_lock); in io_poll_task_func()
4975 hash_del(&req->hash_node); in io_poll_task_func()
4976 io_poll_complete(req, req->result, 0); in io_poll_task_func()
4977 spin_unlock_irq(&ctx->completion_lock); in io_poll_task_func()
4985 percpu_ref_put(&ctx->refs); in io_poll_task_func()
4991 struct io_kiocb *req = wait->private; in io_poll_double_wake()
4992 struct io_poll_iocb *poll = io_poll_get_single(req); in io_poll_double_wake() local
4996 if (mask && !(mask & poll->events)) in io_poll_double_wake()
4999 list_del_init(&wait->entry); in io_poll_double_wake()
5001 if (poll && poll->head) { in io_poll_double_wake()
5004 spin_lock(&poll->head->lock); in io_poll_double_wake()
5005 done = list_empty(&poll->wait.entry); in io_poll_double_wake()
5007 list_del_init(&poll->wait.entry); in io_poll_double_wake()
5009 wait->private = NULL; in io_poll_double_wake()
5010 spin_unlock(&poll->head->lock); in io_poll_double_wake()
5013 poll->wait.func(&poll->wait, mode, sync, key); in io_poll_double_wake()
5016 refcount_dec(&req->refs); in io_poll_double_wake()
5020 static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, in io_init_poll_iocb() argument
5023 poll->head = NULL; in io_init_poll_iocb()
5024 poll->done = false; in io_init_poll_iocb()
5025 poll->canceled = false; in io_init_poll_iocb()
5026 poll->events = events; in io_init_poll_iocb()
5027 INIT_LIST_HEAD(&poll->wait.entry); in io_init_poll_iocb()
5028 init_waitqueue_func_entry(&poll->wait, wake_func); in io_init_poll_iocb()
5031 static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, in __io_queue_proc() argument
5035 struct io_kiocb *req = pt->req; in __io_queue_proc()
5038 * If poll->head is already set, it's because the file being polled in __io_queue_proc()
5039 * uses multiple waitqueues for poll handling (eg one for read, one in __io_queue_proc()
5042 if (unlikely(poll->head)) { in __io_queue_proc()
5043 struct io_poll_iocb *poll_one = poll; in __io_queue_proc()
5047 pt->error = -EINVAL; in __io_queue_proc()
5050 poll = kmalloc(sizeof(*poll), GFP_ATOMIC); in __io_queue_proc()
5051 if (!poll) { in __io_queue_proc()
5052 pt->error = -ENOMEM; in __io_queue_proc()
5055 io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); in __io_queue_proc()
5056 refcount_inc(&req->refs); in __io_queue_proc()
5057 poll->wait.private = req; in __io_queue_proc()
5058 *poll_ptr = poll; in __io_queue_proc()
5061 pt->error = 0; in __io_queue_proc()
5062 poll->head = head; in __io_queue_proc()
5064 if (poll->events & EPOLLEXCLUSIVE) in __io_queue_proc()
5065 add_wait_queue_exclusive(head, &poll->wait); in __io_queue_proc()
5067 add_wait_queue(head, &poll->wait); in __io_queue_proc()
5074 struct async_poll *apoll = pt->req->apoll; in io_async_queue_proc()
5076 __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); in io_async_queue_proc()
5082 struct async_poll *apoll = req->apoll; in io_async_task_func()
5083 struct io_ring_ctx *ctx = req->ctx; in io_async_task_func()
5085 trace_io_uring_task_run(req->ctx, req->opcode, req->user_data); in io_async_task_func()
5087 if (io_poll_rewait(req, &apoll->poll)) { in io_async_task_func()
5088 spin_unlock_irq(&ctx->completion_lock); in io_async_task_func()
5089 percpu_ref_put(&ctx->refs); in io_async_task_func()
5094 if (hash_hashed(&req->hash_node)) in io_async_task_func()
5095 hash_del(&req->hash_node); in io_async_task_func()
5098 spin_unlock_irq(&ctx->completion_lock); in io_async_task_func()
5100 if (!READ_ONCE(apoll->poll.canceled)) in io_async_task_func()
5103 __io_req_task_cancel(req, -ECANCELED); in io_async_task_func()
5105 percpu_ref_put(&ctx->refs); in io_async_task_func()
5106 kfree(apoll->double_poll); in io_async_task_func()
5113 struct io_kiocb *req = wait->private; in io_async_wake()
5114 struct io_poll_iocb *poll = &req->apoll->poll; in io_async_wake() local
5116 trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data, in io_async_wake()
5119 return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func); in io_async_wake()
5124 struct io_ring_ctx *ctx = req->ctx; in io_poll_req_insert()
5127 list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; in io_poll_req_insert()
5128 hlist_add_head(&req->hash_node, list); in io_poll_req_insert()
5132 struct io_poll_iocb *poll, in __io_arm_poll_handler() argument
5135 __acquires(&ctx->completion_lock) in __io_arm_poll_handler()
5137 struct io_ring_ctx *ctx = req->ctx; in __io_arm_poll_handler()
5140 INIT_HLIST_NODE(&req->hash_node); in __io_arm_poll_handler()
5141 io_init_poll_iocb(poll, mask, wake_func); in __io_arm_poll_handler()
5142 poll->file = req->file; in __io_arm_poll_handler()
5143 poll->wait.private = req; in __io_arm_poll_handler()
5145 ipt->pt._key = mask; in __io_arm_poll_handler()
5146 ipt->req = req; in __io_arm_poll_handler()
5147 ipt->error = -EINVAL; in __io_arm_poll_handler()
5149 mask = vfs_poll(req->file, &ipt->pt) & poll->events; in __io_arm_poll_handler()
5151 spin_lock_irq(&ctx->completion_lock); in __io_arm_poll_handler()
5152 if (likely(poll->head)) { in __io_arm_poll_handler()
5153 spin_lock(&poll->head->lock); in __io_arm_poll_handler()
5154 if (unlikely(list_empty(&poll->wait.entry))) { in __io_arm_poll_handler()
5155 if (ipt->error) in __io_arm_poll_handler()
5157 ipt->error = 0; in __io_arm_poll_handler()
5160 if (mask || ipt->error) in __io_arm_poll_handler()
5161 list_del_init(&poll->wait.entry); in __io_arm_poll_handler()
5163 WRITE_ONCE(poll->canceled, true); in __io_arm_poll_handler()
5164 else if (!poll->done) /* actually waiting for an event */ in __io_arm_poll_handler()
5166 spin_unlock(&poll->head->lock); in __io_arm_poll_handler()
5174 const struct io_op_def *def = &io_op_defs[req->opcode]; in io_arm_poll_handler()
5175 struct io_ring_ctx *ctx = req->ctx; in io_arm_poll_handler()
5181 if (!req->file || !file_can_poll(req->file)) in io_arm_poll_handler()
5183 if (req->flags & REQ_F_POLLED) in io_arm_poll_handler()
5185 if (def->pollin) in io_arm_poll_handler()
5187 else if (def->pollout) in io_arm_poll_handler()
5191 /* if we can't nonblock try, then no point in arming a poll handler */ in io_arm_poll_handler()
5192 if (!io_file_supports_async(req->file, rw)) in io_arm_poll_handler()
5198 apoll->double_poll = NULL; in io_arm_poll_handler()
5200 req->flags |= REQ_F_POLLED; in io_arm_poll_handler()
5201 req->apoll = apoll; in io_arm_poll_handler()
5204 if (def->pollin) in io_arm_poll_handler()
5206 if (def->pollout) in io_arm_poll_handler()
5210 if ((req->opcode == IORING_OP_RECVMSG) && in io_arm_poll_handler()
5211 (req->sr_msg.msg_flags & MSG_ERRQUEUE)) in io_arm_poll_handler()
5218 ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, in io_arm_poll_handler()
5222 spin_unlock_irq(&ctx->completion_lock); in io_arm_poll_handler()
5223 kfree(apoll->double_poll); in io_arm_poll_handler()
5227 spin_unlock_irq(&ctx->completion_lock); in io_arm_poll_handler()
5228 trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask, in io_arm_poll_handler()
5229 apoll->poll.events); in io_arm_poll_handler()
5234 struct io_poll_iocb *poll) in __io_poll_remove_one() argument
5238 spin_lock(&poll->head->lock); in __io_poll_remove_one()
5239 WRITE_ONCE(poll->canceled, true); in __io_poll_remove_one()
5240 if (!list_empty(&poll->wait.entry)) { in __io_poll_remove_one()
5241 list_del_init(&poll->wait.entry); in __io_poll_remove_one()
5244 spin_unlock(&poll->head->lock); in __io_poll_remove_one()
5245 hash_del(&req->hash_node); in __io_poll_remove_one()
5255 if (req->opcode == IORING_OP_POLL_ADD) { in io_poll_remove_one()
5256 do_complete = __io_poll_remove_one(req, &req->poll); in io_poll_remove_one()
5258 struct async_poll *apoll = req->apoll; in io_poll_remove_one()
5260 /* non-poll requests have submit ref still */ in io_poll_remove_one()
5261 do_complete = __io_poll_remove_one(req, &apoll->poll); in io_poll_remove_one()
5264 kfree(apoll->double_poll); in io_poll_remove_one()
5270 io_cqring_fill_event(req, -ECANCELED); in io_poll_remove_one()
5271 io_commit_cqring(req->ctx); in io_poll_remove_one()
5280 * Returns true if we found and killed one or more poll requests
5288 spin_lock_irq(&ctx->completion_lock); in io_poll_remove_all()
5289 for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { in io_poll_remove_all()
5292 list = &ctx->cancel_hash[i]; in io_poll_remove_all()
5298 spin_unlock_irq(&ctx->completion_lock); in io_poll_remove_all()
5311 list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)]; in io_poll_cancel()
5313 if (sqe_addr != req->user_data) in io_poll_cancel()
5317 return -EALREADY; in io_poll_cancel()
5320 return -ENOENT; in io_poll_cancel()
5326 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_poll_remove_prep()
5327 return -EINVAL; in io_poll_remove_prep()
5328 if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || in io_poll_remove_prep()
5329 sqe->poll_events) in io_poll_remove_prep()
5330 return -EINVAL; in io_poll_remove_prep()
5332 req->poll.addr = READ_ONCE(sqe->addr); in io_poll_remove_prep()
5337 * Find a running poll command that matches one specified in sqe->addr,
5342 struct io_ring_ctx *ctx = req->ctx; in io_poll_remove()
5346 addr = req->poll.addr; in io_poll_remove()
5347 spin_lock_irq(&ctx->completion_lock); in io_poll_remove()
5349 spin_unlock_irq(&ctx->completion_lock); in io_poll_remove()
5360 struct io_kiocb *req = wait->private; in io_poll_wake()
5361 struct io_poll_iocb *poll = &req->poll; in io_poll_wake() local
5363 return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func); in io_poll_wake()
5371 __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data); in io_poll_queue_proc()
5376 struct io_poll_iocb *poll = &req->poll; in io_poll_add_prep() local
5379 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_poll_add_prep()
5380 return -EINVAL; in io_poll_add_prep()
5381 if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) in io_poll_add_prep()
5382 return -EINVAL; in io_poll_add_prep()
5384 events = READ_ONCE(sqe->poll32_events); in io_poll_add_prep()
5388 poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP | in io_poll_add_prep()
5395 struct io_poll_iocb *poll = &req->poll; in io_poll_add() local
5396 struct io_ring_ctx *ctx = req->ctx; in io_poll_add()
5402 mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events, in io_poll_add()
5409 spin_unlock_irq(&ctx->completion_lock); in io_poll_add()
5422 struct io_kiocb *req = data->req; in io_timeout_fn()
5423 struct io_ring_ctx *ctx = req->ctx; in io_timeout_fn()
5426 spin_lock_irqsave(&ctx->completion_lock, flags); in io_timeout_fn()
5427 list_del_init(&req->timeout.list); in io_timeout_fn()
5428 atomic_set(&req->ctx->cq_timeouts, in io_timeout_fn()
5429 atomic_read(&req->ctx->cq_timeouts) + 1); in io_timeout_fn()
5431 io_cqring_fill_event(req, -ETIME); in io_timeout_fn()
5433 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_timeout_fn()
5443 struct io_timeout_data *io = req->async_data; in __io_timeout_cancel()
5446 ret = hrtimer_try_to_cancel(&io->timer); in __io_timeout_cancel()
5447 if (ret == -1) in __io_timeout_cancel()
5448 return -EALREADY; in __io_timeout_cancel()
5449 list_del_init(&req->timeout.list); in __io_timeout_cancel()
5452 io_cqring_fill_event(req, -ECANCELED); in __io_timeout_cancel()
5460 int ret = -ENOENT; in io_timeout_cancel()
5462 list_for_each_entry(req, &ctx->timeout_list, timeout.list) { in io_timeout_cancel()
5463 if (user_data == req->user_data) { in io_timeout_cancel()
5469 if (ret == -ENOENT) in io_timeout_cancel()
5478 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_timeout_remove_prep()
5479 return -EINVAL; in io_timeout_remove_prep()
5480 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) in io_timeout_remove_prep()
5481 return -EINVAL; in io_timeout_remove_prep()
5482 if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags) in io_timeout_remove_prep()
5483 return -EINVAL; in io_timeout_remove_prep()
5485 req->timeout_rem.addr = READ_ONCE(sqe->addr); in io_timeout_remove_prep()
5494 struct io_ring_ctx *ctx = req->ctx; in io_timeout_remove()
5497 spin_lock_irq(&ctx->completion_lock); in io_timeout_remove()
5498 ret = io_timeout_cancel(ctx, req->timeout_rem.addr); in io_timeout_remove()
5502 spin_unlock_irq(&ctx->completion_lock); in io_timeout_remove()
5515 u32 off = READ_ONCE(sqe->off); in io_timeout_prep()
5517 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_timeout_prep()
5518 return -EINVAL; in io_timeout_prep()
5519 if (sqe->ioprio || sqe->buf_index || sqe->len != 1) in io_timeout_prep()
5520 return -EINVAL; in io_timeout_prep()
5522 return -EINVAL; in io_timeout_prep()
5523 flags = READ_ONCE(sqe->timeout_flags); in io_timeout_prep()
5525 return -EINVAL; in io_timeout_prep()
5527 req->timeout.off = off; in io_timeout_prep()
5529 if (!req->async_data && io_alloc_async_data(req)) in io_timeout_prep()
5530 return -ENOMEM; in io_timeout_prep()
5532 data = req->async_data; in io_timeout_prep()
5533 data->req = req; in io_timeout_prep()
5535 if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) in io_timeout_prep()
5536 return -EFAULT; in io_timeout_prep()
5539 data->mode = HRTIMER_MODE_ABS; in io_timeout_prep()
5541 data->mode = HRTIMER_MODE_REL; in io_timeout_prep()
5543 hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode); in io_timeout_prep()
5549 struct io_ring_ctx *ctx = req->ctx; in io_timeout()
5550 struct io_timeout_data *data = req->async_data; in io_timeout()
5552 u32 tail, off = req->timeout.off; in io_timeout()
5554 spin_lock_irq(&ctx->completion_lock); in io_timeout()
5557 * sqe->off holds how many events that need to occur for this in io_timeout()
5562 entry = ctx->timeout_list.prev; in io_timeout()
5566 tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); in io_timeout()
5567 req->timeout.target_seq = tail + off; in io_timeout()
5573 list_for_each_prev(entry, &ctx->timeout_list) { in io_timeout()
5580 if (off >= nxt->timeout.target_seq - tail) in io_timeout()
5584 list_add(&req->timeout.list, entry); in io_timeout()
5585 data->timer.function = io_timeout_fn; in io_timeout()
5586 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); in io_timeout()
5587 spin_unlock_irq(&ctx->completion_lock); in io_timeout()
5595 return req->user_data == (unsigned long) data; in io_cancel_cb()
5603 cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false); in io_async_cancel_one()
5609 ret = -EALREADY; in io_async_cancel_one()
5612 ret = -ENOENT; in io_async_cancel_one()
5627 if (ret != -ENOENT) { in io_async_find_and_cancel()
5628 spin_lock_irqsave(&ctx->completion_lock, flags); in io_async_find_and_cancel()
5632 spin_lock_irqsave(&ctx->completion_lock, flags); in io_async_find_and_cancel()
5634 if (ret != -ENOENT) in io_async_find_and_cancel()
5642 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_async_find_and_cancel()
5653 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_async_cancel_prep()
5654 return -EINVAL; in io_async_cancel_prep()
5655 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) in io_async_cancel_prep()
5656 return -EINVAL; in io_async_cancel_prep()
5657 if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags) in io_async_cancel_prep()
5658 return -EINVAL; in io_async_cancel_prep()
5660 req->cancel.addr = READ_ONCE(sqe->addr); in io_async_cancel_prep()
5666 struct io_ring_ctx *ctx = req->ctx; in io_async_cancel()
5668 io_async_find_and_cancel(ctx, req, req->cancel.addr, 0); in io_async_cancel()
5675 if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL)) in io_files_update_prep()
5676 return -EINVAL; in io_files_update_prep()
5677 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) in io_files_update_prep()
5678 return -EINVAL; in io_files_update_prep()
5679 if (sqe->ioprio || sqe->rw_flags) in io_files_update_prep()
5680 return -EINVAL; in io_files_update_prep()
5682 req->files_update.offset = READ_ONCE(sqe->off); in io_files_update_prep()
5683 req->files_update.nr_args = READ_ONCE(sqe->len); in io_files_update_prep()
5684 if (!req->files_update.nr_args) in io_files_update_prep()
5685 return -EINVAL; in io_files_update_prep()
5686 req->files_update.arg = READ_ONCE(sqe->addr); in io_files_update_prep()
5693 struct io_ring_ctx *ctx = req->ctx; in io_files_update()
5698 return -EAGAIN; in io_files_update()
5700 up.offset = req->files_update.offset; in io_files_update()
5701 up.fds = req->files_update.arg; in io_files_update()
5703 mutex_lock(&ctx->uring_lock); in io_files_update()
5704 ret = __io_sqe_files_update(ctx, &up, req->files_update.nr_args); in io_files_update()
5705 mutex_unlock(&ctx->uring_lock); in io_files_update()
5715 switch (req->opcode) { in io_req_prep()
5781 req->opcode); in io_req_prep()
5782 return-EINVAL; in io_req_prep()
5791 return -EAGAIN; in io_req_defer_prep()
5798 struct io_ring_ctx *ctx = req->ctx; in io_get_sequence()
5801 if (req->flags & REQ_F_LINK_HEAD) in io_get_sequence()
5802 list_for_each_entry(pos, &req->link_list, link_list) in io_get_sequence()
5805 total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped; in io_get_sequence()
5806 return total_submitted - nr_reqs; in io_get_sequence()
5811 struct io_ring_ctx *ctx = req->ctx; in io_req_defer()
5817 if (likely(list_empty_careful(&ctx->defer_list) && in io_req_defer()
5818 !(req->flags & REQ_F_IO_DRAIN))) in io_req_defer()
5823 if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) in io_req_defer()
5826 if (!req->async_data) { in io_req_defer()
5834 return -ENOMEM; in io_req_defer()
5836 spin_lock_irq(&ctx->completion_lock); in io_req_defer()
5837 if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { in io_req_defer()
5838 spin_unlock_irq(&ctx->completion_lock); in io_req_defer()
5841 return -EIOCBQUEUED; in io_req_defer()
5844 trace_io_uring_defer(ctx, req, req->user_data); in io_req_defer()
5845 de->req = req; in io_req_defer()
5846 de->seq = seq; in io_req_defer()
5847 list_add_tail(&de->list, &ctx->defer_list); in io_req_defer()
5848 spin_unlock_irq(&ctx->completion_lock); in io_req_defer()
5849 return -EIOCBQUEUED; in io_req_defer()
5854 struct io_ring_ctx *ctx = req->ctx; in io_req_drop_files()
5857 spin_lock_irqsave(&ctx->inflight_lock, flags); in io_req_drop_files()
5858 list_del(&req->inflight_entry); in io_req_drop_files()
5859 if (waitqueue_active(&ctx->inflight_wait)) in io_req_drop_files()
5860 wake_up(&ctx->inflight_wait); in io_req_drop_files()
5861 spin_unlock_irqrestore(&ctx->inflight_lock, flags); in io_req_drop_files()
5862 req->flags &= ~REQ_F_INFLIGHT; in io_req_drop_files()
5863 put_files_struct(req->work.identity->files); in io_req_drop_files()
5864 put_nsproxy(req->work.identity->nsproxy); in io_req_drop_files()
5865 req->work.flags &= ~IO_WQ_WORK_FILES; in io_req_drop_files()
5870 if (req->flags & REQ_F_BUFFER_SELECTED) { in __io_clean_op()
5871 switch (req->opcode) { in __io_clean_op()
5875 kfree((void *)(unsigned long)req->rw.addr); in __io_clean_op()
5879 kfree(req->sr_msg.kbuf); in __io_clean_op()
5882 req->flags &= ~REQ_F_BUFFER_SELECTED; in __io_clean_op()
5885 if (req->flags & REQ_F_NEED_CLEANUP) { in __io_clean_op()
5886 switch (req->opcode) { in __io_clean_op()
5893 struct io_async_rw *io = req->async_data; in __io_clean_op()
5894 if (io->free_iovec) in __io_clean_op()
5895 kfree(io->free_iovec); in __io_clean_op()
5900 struct io_async_msghdr *io = req->async_data; in __io_clean_op()
5901 if (io->iov != io->fast_iov) in __io_clean_op()
5902 kfree(io->iov); in __io_clean_op()
5907 io_put_file(req, req->splice.file_in, in __io_clean_op()
5908 (req->splice.flags & SPLICE_F_FD_IN_FIXED)); in __io_clean_op()
5912 if (req->open.filename) in __io_clean_op()
5913 putname(req->open.filename); in __io_clean_op()
5916 req->flags &= ~REQ_F_NEED_CLEANUP; in __io_clean_op()
5919 if (req->flags & REQ_F_INFLIGHT) in __io_clean_op()
5926 struct io_ring_ctx *ctx = req->ctx; in io_issue_sqe()
5929 switch (req->opcode) { in io_issue_sqe()
6022 ret = -EINVAL; in io_issue_sqe()
6030 if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) { in io_issue_sqe()
6035 mutex_lock(&ctx->uring_lock); in io_issue_sqe()
6040 mutex_unlock(&ctx->uring_lock); in io_issue_sqe()
6057 if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) == in io_wq_submit_work()
6059 ret = -ECANCELED; in io_wq_submit_work()
6070 if (ret != -EAGAIN) in io_wq_submit_work()
6089 table = &ctx->file_data->table[index >> IORING_FILE_TABLE_SHIFT]; in io_file_from_index()
6090 return table->files[index & IORING_FILE_TABLE_MASK]; in io_file_from_index()
6096 struct io_ring_ctx *ctx = req->ctx; in io_file_get()
6100 if (unlikely((unsigned int)fd >= ctx->nr_user_files)) in io_file_get()
6102 fd = array_index_nospec(fd, ctx->nr_user_files); in io_file_get()
6105 req->fixed_file_refs = &ctx->file_data->node->refs; in io_file_get()
6106 percpu_ref_get(req->fixed_file_refs); in io_file_get()
6121 fixed = (req->flags & REQ_F_FIXED_FILE) != 0; in io_req_set_file()
6122 if (unlikely(!fixed && io_async_submit(req->ctx))) in io_req_set_file()
6123 return -EBADF; in io_req_set_file()
6125 req->file = io_file_get(state, req, fd, fixed); in io_req_set_file()
6126 if (req->file || io_op_defs[req->opcode].needs_file_no_error) in io_req_set_file()
6128 return -EBADF; in io_req_set_file()
6135 struct io_kiocb *req = data->req; in io_link_timeout_fn()
6136 struct io_ring_ctx *ctx = req->ctx; in io_link_timeout_fn()
6140 spin_lock_irqsave(&ctx->completion_lock, flags); in io_link_timeout_fn()
6146 if (!list_empty(&req->link_list)) { in io_link_timeout_fn()
6147 prev = list_entry(req->link_list.prev, struct io_kiocb, in io_link_timeout_fn()
6149 if (refcount_inc_not_zero(&prev->refs)) in io_link_timeout_fn()
6150 list_del_init(&req->link_list); in io_link_timeout_fn()
6155 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_link_timeout_fn()
6159 io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); in io_link_timeout_fn()
6162 io_req_complete(req, -ETIME); in io_link_timeout_fn()
6173 if (!list_empty(&req->link_list)) { in __io_queue_linked_timeout()
6174 struct io_timeout_data *data = req->async_data; in __io_queue_linked_timeout()
6176 data->timer.function = io_link_timeout_fn; in __io_queue_linked_timeout()
6177 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), in __io_queue_linked_timeout()
6178 data->mode); in __io_queue_linked_timeout()
6184 struct io_ring_ctx *ctx = req->ctx; in io_queue_linked_timeout()
6186 spin_lock_irq(&ctx->completion_lock); in io_queue_linked_timeout()
6188 spin_unlock_irq(&ctx->completion_lock); in io_queue_linked_timeout()
6198 if (!(req->flags & REQ_F_LINK_HEAD)) in io_prep_linked_timeout()
6200 if (req->flags & REQ_F_LINK_TIMEOUT) in io_prep_linked_timeout()
6203 nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, in io_prep_linked_timeout()
6205 if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT) in io_prep_linked_timeout()
6208 nxt->flags |= REQ_F_LTIMEOUT_ACTIVE; in io_prep_linked_timeout()
6209 req->flags |= REQ_F_LINK_TIMEOUT; in io_prep_linked_timeout()
6222 if ((req->flags & REQ_F_WORK_INITIALIZED) && in __io_queue_sqe()
6223 (req->work.flags & IO_WQ_WORK_CREDS) && in __io_queue_sqe()
6224 req->work.identity->creds != current_cred()) { in __io_queue_sqe()
6227 if (old_creds == req->work.identity->creds) in __io_queue_sqe()
6230 old_creds = override_creds(req->work.identity->creds); in __io_queue_sqe()
6237 * doesn't support non-blocking read/write attempts in __io_queue_sqe()
6239 if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { in __io_queue_sqe()
6257 if (!(req->flags & REQ_F_FORCE_ASYNC)) in __io_queue_sqe()
6262 /* un-prep timeout, so it'll be killed as any other linked */ in __io_queue_sqe()
6263 req->flags &= ~REQ_F_LINK_TIMEOUT; in __io_queue_sqe()
6280 if (ret != -EIOCBQUEUED) { in io_queue_sqe()
6286 } else if (req->flags & REQ_F_FORCE_ASYNC) { in io_queue_sqe()
6287 if (!req->async_data) { in io_queue_sqe()
6306 if (unlikely(req->flags & REQ_F_FAIL_LINK)) { in io_queue_link_head()
6308 io_req_complete(req, -ECANCELED); in io_queue_link_head()
6316 struct io_ring_ctx *ctx = req->ctx; in io_submit_sqe()
6336 if (req->flags & REQ_F_IO_DRAIN) { in io_submit_sqe()
6337 head->flags |= REQ_F_IO_DRAIN; in io_submit_sqe()
6338 ctx->drain_next = 1; in io_submit_sqe()
6343 head->flags |= REQ_F_FAIL_LINK; in io_submit_sqe()
6347 list_add_tail(&req->link_list, &head->link_list); in io_submit_sqe()
6350 if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { in io_submit_sqe()
6355 if (unlikely(ctx->drain_next)) { in io_submit_sqe()
6356 req->flags |= REQ_F_IO_DRAIN; in io_submit_sqe()
6357 ctx->drain_next = 0; in io_submit_sqe()
6359 if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { in io_submit_sqe()
6360 req->flags |= REQ_F_LINK_HEAD; in io_submit_sqe()
6361 INIT_LIST_HEAD(&req->link_list); in io_submit_sqe()
6365 req->flags |= REQ_F_FAIL_LINK; in io_submit_sqe()
6380 if (!list_empty(&state->comp.list)) in io_submit_state_end()
6381 io_submit_flush_completions(&state->comp); in io_submit_state_end()
6382 blk_finish_plug(&state->plug); in io_submit_state_end()
6384 if (state->free_reqs) in io_submit_state_end()
6385 kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); in io_submit_state_end()
6394 blk_start_plug(&state->plug); in io_submit_state_start()
6395 state->comp.nr = 0; in io_submit_state_start()
6396 INIT_LIST_HEAD(&state->comp.list); in io_submit_state_start()
6397 state->comp.ctx = ctx; in io_submit_state_start()
6398 state->free_reqs = 0; in io_submit_state_start()
6399 state->file = NULL; in io_submit_state_start()
6400 state->ios_left = max_ios; in io_submit_state_start()
6405 struct io_rings *rings = ctx->rings; in io_commit_sqring()
6412 smp_store_release(&rings->sq.head, ctx->cached_sq_head); in io_commit_sqring()
6421 * prevent a re-load down the line.
6425 u32 *sq_array = ctx->sq_array; in io_get_sqe()
6436 head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]); in io_get_sqe()
6437 if (likely(head < ctx->sq_entries)) in io_get_sqe()
6438 return &ctx->sq_sqes[head]; in io_get_sqe()
6441 ctx->cached_sq_dropped++; in io_get_sqe()
6442 WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped); in io_get_sqe()
6448 ctx->cached_sq_head++; in io_consume_sqe()
6460 if (!ctx->restricted) in io_check_restriction()
6463 if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) in io_check_restriction()
6466 if ((sqe_flags & ctx->restrictions.sqe_flags_required) != in io_check_restriction()
6467 ctx->restrictions.sqe_flags_required) in io_check_restriction()
6470 if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | in io_check_restriction()
6471 ctx->restrictions.sqe_flags_required)) in io_check_restriction()
6488 req->opcode = READ_ONCE(sqe->opcode); in io_init_req()
6489 req->user_data = READ_ONCE(sqe->user_data); in io_init_req()
6490 req->async_data = NULL; in io_init_req()
6491 req->file = NULL; in io_init_req()
6492 req->ctx = ctx; in io_init_req()
6493 req->flags = 0; in io_init_req()
6495 refcount_set(&req->refs, 2); in io_init_req()
6496 req->task = current; in io_init_req()
6497 req->result = 0; in io_init_req()
6499 if (unlikely(req->opcode >= IORING_OP_LAST)) in io_init_req()
6500 return -EINVAL; in io_init_req()
6503 return -EFAULT; in io_init_req()
6505 sqe_flags = READ_ONCE(sqe->flags); in io_init_req()
6508 return -EINVAL; in io_init_req()
6511 return -EACCES; in io_init_req()
6514 !io_op_defs[req->opcode].buffer_select) in io_init_req()
6515 return -EOPNOTSUPP; in io_init_req()
6517 id = READ_ONCE(sqe->personality); in io_init_req()
6521 iod = idr_find(&ctx->personality_idr, id); in io_init_req()
6523 return -EINVAL; in io_init_req()
6524 refcount_inc(&iod->count); in io_init_req()
6527 get_cred(iod->creds); in io_init_req()
6528 req->work.identity = iod; in io_init_req()
6529 req->work.flags |= IO_WQ_WORK_CREDS; in io_init_req()
6533 req->flags |= sqe_flags; in io_init_req()
6535 if (!io_op_defs[req->opcode].needs_file) in io_init_req()
6538 ret = io_req_set_file(state, req, READ_ONCE(sqe->fd)); in io_init_req()
6539 state->ios_left--; in io_init_req()
6550 if (test_bit(0, &ctx->sq_check_overflow)) { in io_submit_sqes()
6551 if (!list_empty(&ctx->cq_overflow_list) && in io_submit_sqes()
6553 return -EBUSY; in io_submit_sqes()
6557 nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx)); in io_submit_sqes()
6559 if (!percpu_ref_tryget_many(&ctx->refs, nr)) in io_submit_sqes()
6560 return -EAGAIN; in io_submit_sqes()
6562 percpu_counter_add(&current->io_uring->inflight, nr); in io_submit_sqes()
6563 refcount_add(nr, &current->usage); in io_submit_sqes()
6580 submitted = -EAGAIN; in io_submit_sqes()
6584 /* will complete beyond this point, count as submitted */ in io_submit_sqes()
6595 trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data, in io_submit_sqes()
6603 int ref_used = (submitted == -EAGAIN) ? 0 : submitted; in io_submit_sqes()
6604 struct io_uring_task *tctx = current->io_uring; in io_submit_sqes()
6605 int unused = nr - ref_used; in io_submit_sqes()
6607 percpu_ref_put_many(&ctx->refs, unused); in io_submit_sqes()
6608 percpu_counter_sub(&tctx->inflight, unused); in io_submit_sqes()
6624 spin_lock_irq(&ctx->completion_lock); in io_ring_set_wakeup_flag()
6625 ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; in io_ring_set_wakeup_flag()
6626 spin_unlock_irq(&ctx->completion_lock); in io_ring_set_wakeup_flag()
6631 spin_lock_irq(&ctx->completion_lock); in io_ring_clear_wakeup_flag()
6632 ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; in io_ring_clear_wakeup_flag()
6633 spin_unlock_irq(&ctx->completion_lock); in io_ring_clear_wakeup_flag()
6646 spin_lock_irqsave(&ctx->completion_lock, flags); in io_sq_wake_function()
6647 ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; in io_sq_wake_function()
6648 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_sq_wake_function()
6662 unsigned long timeout = start_jiffies + ctx->sq_thread_idle; in __io_sq_thread()
6663 struct io_sq_data *sqd = ctx->sq_data; in __io_sq_thread()
6668 if (!list_empty(&ctx->iopoll_list)) { in __io_sq_thread()
6671 mutex_lock(&ctx->uring_lock); in __io_sq_thread()
6672 if (!list_empty(&ctx->iopoll_list) && !need_resched()) in __io_sq_thread()
6674 mutex_unlock(&ctx->uring_lock); in __io_sq_thread()
6680 * If submit got -EBUSY, flag us as needing the application in __io_sq_thread()
6683 if (!to_submit || ret == -EBUSY || need_resched()) { in __io_sq_thread()
6699 if (!list_empty(&ctx->iopoll_list) || need_resched() || in __io_sq_thread()
6700 (!time_after(jiffies, timeout) && ret != -EBUSY && in __io_sq_thread()
6701 !percpu_ref_is_dying(&ctx->refs))) in __io_sq_thread()
6704 prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry, in __io_sq_thread()
6714 if ((ctx->flags & IORING_SETUP_IOPOLL) && in __io_sq_thread()
6715 !list_empty_careful(&ctx->iopoll_list)) { in __io_sq_thread()
6716 finish_wait(&sqd->wait, &ctx->sqo_wait_entry); in __io_sq_thread()
6721 if (!to_submit || ret == -EBUSY) in __io_sq_thread()
6725 finish_wait(&sqd->wait, &ctx->sqo_wait_entry); in __io_sq_thread()
6732 mutex_lock(&ctx->uring_lock); in __io_sq_thread()
6733 if (likely(!percpu_ref_is_dying(&ctx->refs))) in __io_sq_thread()
6735 mutex_unlock(&ctx->uring_lock); in __io_sq_thread()
6737 if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait)) in __io_sq_thread()
6738 wake_up(&ctx->sqo_sq_wait); in __io_sq_thread()
6747 while (!list_empty(&sqd->ctx_new_list)) { in io_sqd_init_new()
6748 ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list); in io_sqd_init_new()
6749 init_wait(&ctx->sqo_wait_entry); in io_sqd_init_new()
6750 ctx->sqo_wait_entry.func = io_sq_wake_function; in io_sqd_init_new()
6751 list_move_tail(&ctx->sqd_list, &sqd->ctx_list); in io_sqd_init_new()
6752 complete(&ctx->sq_thread_comp); in io_sqd_init_new()
6772 * the users are synchronized on the sqd->ctx_lock. in io_sq_thread()
6777 if (unlikely(!list_empty(&sqd->ctx_new_list))) in io_sq_thread()
6780 cap_entries = !list_is_singular(&sqd->ctx_list); in io_sq_thread()
6782 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { in io_sq_thread()
6783 if (current->cred != ctx->creds) { in io_sq_thread()
6786 old_cred = override_creds(ctx->creds); in io_sq_thread()
6790 current->loginuid = ctx->loginuid; in io_sq_thread()
6791 current->sessionid = ctx->sessionid; in io_sq_thread()
6805 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) in io_sq_thread()
6809 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) in io_sq_thread()
6835 struct io_ring_ctx *ctx = iowq->ctx; in io_should_wake()
6840 * regardless of event count. in io_should_wake()
6842 return io_cqring_events(ctx, noflush) >= iowq->to_wait || in io_should_wake()
6843 atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; in io_should_wake()
6854 return -1; in io_wake_function()
6865 if (current->jobctl & JOBCTL_TASK_WORK) { in io_run_task_work_sig()
6866 spin_lock_irq(&current->sighand->siglock); in io_run_task_work_sig()
6867 current->jobctl &= ~JOBCTL_TASK_WORK; in io_run_task_work_sig()
6869 spin_unlock_irq(&current->sighand->siglock); in io_run_task_work_sig()
6872 return -EINTR; in io_run_task_work_sig()
6891 struct io_rings *rings = ctx->rings; in io_cqring_wait()
6914 iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); in io_cqring_wait()
6917 prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, in io_cqring_wait()
6929 finish_wait(&ctx->wait, &iowq.wq); in io_cqring_wait()
6931 restore_saved_sigmask_unless(ret == -EINTR); in io_cqring_wait()
6933 return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; in io_cqring_wait()
6939 if (ctx->ring_sock) { in __io_sqe_files_unregister()
6940 struct sock *sock = ctx->ring_sock->sk; in __io_sqe_files_unregister()
6943 while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) in __io_sqe_files_unregister()
6949 for (i = 0; i < ctx->nr_user_files; i++) { in __io_sqe_files_unregister()
6964 complete(&data->done); in io_file_ref_kill()
6969 struct fixed_file_data *data = ctx->file_data; in io_sqe_files_unregister()
6974 return -ENXIO; in io_sqe_files_unregister()
6976 spin_lock(&data->lock); in io_sqe_files_unregister()
6977 ref_node = data->node; in io_sqe_files_unregister()
6978 spin_unlock(&data->lock); in io_sqe_files_unregister()
6980 percpu_ref_kill(&ref_node->refs); in io_sqe_files_unregister()
6982 percpu_ref_kill(&data->refs); in io_sqe_files_unregister()
6985 flush_delayed_work(&ctx->file_put_work); in io_sqe_files_unregister()
6986 wait_for_completion(&data->done); in io_sqe_files_unregister()
6989 nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); in io_sqe_files_unregister()
6991 kfree(data->table[i].files); in io_sqe_files_unregister()
6992 kfree(data->table); in io_sqe_files_unregister()
6993 percpu_ref_exit(&data->refs); in io_sqe_files_unregister()
6995 ctx->file_data = NULL; in io_sqe_files_unregister()
6996 ctx->nr_user_files = 0; in io_sqe_files_unregister()
7002 if (refcount_dec_and_test(&sqd->refs)) { in io_put_sq_data()
7004 * The park is a bit of a work-around, without it we get in io_put_sq_data()
7008 if (sqd->thread) { in io_put_sq_data()
7009 kthread_park(sqd->thread); in io_put_sq_data()
7010 kthread_stop(sqd->thread); in io_put_sq_data()
7023 f = fdget(p->wq_fd); in io_attach_sq_data()
7025 return ERR_PTR(-ENXIO); in io_attach_sq_data()
7026 if (f.file->f_op != &io_uring_fops) { in io_attach_sq_data()
7028 return ERR_PTR(-EINVAL); in io_attach_sq_data()
7031 ctx_attach = f.file->private_data; in io_attach_sq_data()
7032 sqd = ctx_attach->sq_data; in io_attach_sq_data()
7035 return ERR_PTR(-EINVAL); in io_attach_sq_data()
7038 refcount_inc(&sqd->refs); in io_attach_sq_data()
7047 if (p->flags & IORING_SETUP_ATTACH_WQ) in io_get_sq_data()
7052 return ERR_PTR(-ENOMEM); in io_get_sq_data()
7054 refcount_set(&sqd->refs, 1); in io_get_sq_data()
7055 INIT_LIST_HEAD(&sqd->ctx_list); in io_get_sq_data()
7056 INIT_LIST_HEAD(&sqd->ctx_new_list); in io_get_sq_data()
7057 mutex_init(&sqd->ctx_lock); in io_get_sq_data()
7058 mutex_init(&sqd->lock); in io_get_sq_data()
7059 init_waitqueue_head(&sqd->wait); in io_get_sq_data()
7064 __releases(&sqd->lock) in io_sq_thread_unpark()
7066 if (!sqd->thread) in io_sq_thread_unpark()
7068 kthread_unpark(sqd->thread); in io_sq_thread_unpark()
7069 mutex_unlock(&sqd->lock); in io_sq_thread_unpark()
7073 __acquires(&sqd->lock) in io_sq_thread_park()
7075 if (!sqd->thread) in io_sq_thread_park()
7077 mutex_lock(&sqd->lock); in io_sq_thread_park()
7078 kthread_park(sqd->thread); in io_sq_thread_park()
7083 struct io_sq_data *sqd = ctx->sq_data; in io_sq_thread_stop()
7086 if (sqd->thread) { in io_sq_thread_stop()
7093 wake_up_process(sqd->thread); in io_sq_thread_stop()
7094 wait_for_completion(&ctx->sq_thread_comp); in io_sq_thread_stop()
7099 mutex_lock(&sqd->ctx_lock); in io_sq_thread_stop()
7100 list_del(&ctx->sqd_list); in io_sq_thread_stop()
7101 mutex_unlock(&sqd->ctx_lock); in io_sq_thread_stop()
7103 if (sqd->thread) { in io_sq_thread_stop()
7104 finish_wait(&sqd->wait, &ctx->sqo_wait_entry); in io_sq_thread_stop()
7109 ctx->sq_data = NULL; in io_sq_thread_stop()
7117 if (ctx->io_wq) { in io_finish_async()
7118 io_wq_destroy(ctx->io_wq); in io_finish_async()
7119 ctx->io_wq = NULL; in io_finish_async()
7131 struct sock *sk = ctx->ring_sock->sk; in __io_sqe_files_scm()
7138 return -ENOMEM; in __io_sqe_files_scm()
7143 return -ENOMEM; in __io_sqe_files_scm()
7146 skb->sk = sk; in __io_sqe_files_scm()
7149 fpl->user = get_uid(ctx->user); in __io_sqe_files_scm()
7155 fpl->fp[nr_files] = get_file(file); in __io_sqe_files_scm()
7156 unix_inflight(fpl->user, fpl->fp[nr_files]); in __io_sqe_files_scm()
7161 fpl->max = SCM_MAX_FD; in __io_sqe_files_scm()
7162 fpl->count = nr_files; in __io_sqe_files_scm()
7164 skb->destructor = unix_destruct_scm; in __io_sqe_files_scm()
7165 refcount_add(skb->truesize, &sk->sk_wmem_alloc); in __io_sqe_files_scm()
7166 skb_queue_head(&sk->sk_receive_queue, skb); in __io_sqe_files_scm()
7169 fput(fpl->fp[i]); in __io_sqe_files_scm()
7189 left = ctx->nr_user_files; in io_sqe_files_scm()
7196 left -= this_files; in io_sqe_files_scm()
7203 while (total < ctx->nr_user_files) { in io_sqe_files_scm()
7226 struct fixed_file_table *table = &file_data->table[i]; in io_sqe_alloc_file_tables()
7230 table->files = kcalloc(this_files, sizeof(struct file *), in io_sqe_alloc_file_tables()
7232 if (!table->files) in io_sqe_alloc_file_tables()
7234 nr_files -= this_files; in io_sqe_alloc_file_tables()
7241 struct fixed_file_table *table = &file_data->table[i]; in io_sqe_alloc_file_tables()
7242 kfree(table->files); in io_sqe_alloc_file_tables()
7250 struct sock *sock = ctx->ring_sock->sk; in io_ring_file_put()
7251 struct sk_buff_head list, *head = &sock->sk_receive_queue; in io_ring_file_put()
7266 for (i = 0; i < fp->count; i++) { in io_ring_file_put()
7269 if (fp->fp[i] != file) in io_ring_file_put()
7272 unix_notinflight(fp->user, fp->fp[i]); in io_ring_file_put()
7273 left = fp->count - 1 - i; in io_ring_file_put()
7275 memmove(&fp->fp[i], &fp->fp[i + 1], in io_ring_file_put()
7278 fp->count--; in io_ring_file_put()
7279 if (!fp->count) { in io_ring_file_put()
7299 spin_lock_irq(&head->lock); in io_ring_file_put()
7302 spin_unlock_irq(&head->lock); in io_ring_file_put()
7316 struct fixed_file_data *file_data = ref_node->file_data; in __io_file_put_work()
7317 struct io_ring_ctx *ctx = file_data->ctx; in __io_file_put_work()
7320 list_for_each_entry_safe(pfile, tmp, &ref_node->file_list, list) { in __io_file_put_work()
7321 list_del(&pfile->list); in __io_file_put_work()
7322 io_ring_file_put(ctx, pfile->file); in __io_file_put_work()
7326 percpu_ref_exit(&ref_node->refs); in __io_file_put_work()
7328 percpu_ref_put(&file_data->refs); in __io_file_put_work()
7337 node = llist_del_all(&ctx->file_put_llist); in io_file_put_work()
7341 struct llist_node *next = node->next; in io_file_put_work()
7358 data = ref_node->file_data; in io_file_data_ref_zero()
7359 ctx = data->ctx; in io_file_data_ref_zero()
7361 spin_lock(&data->lock); in io_file_data_ref_zero()
7362 ref_node->done = true; in io_file_data_ref_zero()
7364 while (!list_empty(&data->ref_list)) { in io_file_data_ref_zero()
7365 ref_node = list_first_entry(&data->ref_list, in io_file_data_ref_zero()
7368 if (!ref_node->done) in io_file_data_ref_zero()
7370 list_del(&ref_node->node); in io_file_data_ref_zero()
7371 first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); in io_file_data_ref_zero()
7373 spin_unlock(&data->lock); in io_file_data_ref_zero()
7375 if (percpu_ref_is_dying(&data->refs)) in io_file_data_ref_zero()
7379 mod_delayed_work(system_wq, &ctx->file_put_work, 0); in io_file_data_ref_zero()
7381 queue_delayed_work(system_wq, &ctx->file_put_work, delay); in io_file_data_ref_zero()
7391 return ERR_PTR(-ENOMEM); in alloc_fixed_file_ref_node()
7393 if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, in alloc_fixed_file_ref_node()
7396 return ERR_PTR(-ENOMEM); in alloc_fixed_file_ref_node()
7398 INIT_LIST_HEAD(&ref_node->node); in alloc_fixed_file_ref_node()
7399 INIT_LIST_HEAD(&ref_node->file_list); in alloc_fixed_file_ref_node()
7400 ref_node->file_data = ctx->file_data; in alloc_fixed_file_ref_node()
7401 ref_node->done = false; in alloc_fixed_file_ref_node()
7407 percpu_ref_exit(&ref_node->refs); in destroy_fixed_file_ref_node()
7417 int fd, ret = -ENOMEM; in io_sqe_files_register()
7421 if (ctx->file_data) in io_sqe_files_register()
7422 return -EBUSY; in io_sqe_files_register()
7424 return -EINVAL; in io_sqe_files_register()
7426 return -EMFILE; in io_sqe_files_register()
7428 file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL); in io_sqe_files_register()
7430 return -ENOMEM; in io_sqe_files_register()
7431 file_data->ctx = ctx; in io_sqe_files_register()
7432 init_completion(&file_data->done); in io_sqe_files_register()
7433 INIT_LIST_HEAD(&file_data->ref_list); in io_sqe_files_register()
7434 spin_lock_init(&file_data->lock); in io_sqe_files_register()
7437 file_data->table = kcalloc(nr_tables, sizeof(*file_data->table), in io_sqe_files_register()
7439 if (!file_data->table) in io_sqe_files_register()
7442 if (percpu_ref_init(&file_data->refs, io_file_ref_kill, in io_sqe_files_register()
7448 ctx->file_data = file_data; in io_sqe_files_register()
7450 for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { in io_sqe_files_register()
7455 ret = -EFAULT; in io_sqe_files_register()
7459 if (fd == -1) in io_sqe_files_register()
7463 ret = -EBADF; in io_sqe_files_register()
7474 if (file->f_op == &io_uring_fops) { in io_sqe_files_register()
7478 table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT]; in io_sqe_files_register()
7480 table->files[index] = file; in io_sqe_files_register()
7495 file_data->node = ref_node; in io_sqe_files_register()
7496 spin_lock(&file_data->lock); in io_sqe_files_register()
7497 list_add_tail(&ref_node->node, &file_data->ref_list); in io_sqe_files_register()
7498 spin_unlock(&file_data->lock); in io_sqe_files_register()
7499 percpu_ref_get(&file_data->refs); in io_sqe_files_register()
7502 for (i = 0; i < ctx->nr_user_files; i++) { in io_sqe_files_register()
7508 kfree(file_data->table[i].files); in io_sqe_files_register()
7509 ctx->nr_user_files = 0; in io_sqe_files_register()
7511 percpu_ref_exit(&file_data->refs); in io_sqe_files_register()
7513 kfree(file_data->table); in io_sqe_files_register()
7515 ctx->file_data = NULL; in io_sqe_files_register()
7523 struct sock *sock = ctx->ring_sock->sk; in io_sqe_file_register()
7524 struct sk_buff_head *head = &sock->sk_receive_queue; in io_sqe_file_register()
7532 spin_lock_irq(&head->lock); in io_sqe_file_register()
7537 if (fpl->count < SCM_MAX_FD) { in io_sqe_file_register()
7539 spin_unlock_irq(&head->lock); in io_sqe_file_register()
7540 fpl->fp[fpl->count] = get_file(file); in io_sqe_file_register()
7541 unix_inflight(fpl->user, fpl->fp[fpl->count]); in io_sqe_file_register()
7542 fpl->count++; in io_sqe_file_register()
7543 spin_lock_irq(&head->lock); in io_sqe_file_register()
7549 spin_unlock_irq(&head->lock); in io_sqe_file_register()
7566 struct fixed_file_ref_node *ref_node = data->node; in io_queue_file_removal()
7570 return -ENOMEM; in io_queue_file_removal()
7572 pfile->file = file; in io_queue_file_removal()
7573 list_add(&pfile->list, &ref_node->file_list); in io_queue_file_removal()
7582 struct fixed_file_data *data = ctx->file_data; in __io_sqe_files_update()
7590 if (check_add_overflow(up->offset, nr_args, &done)) in __io_sqe_files_update()
7591 return -EOVERFLOW; in __io_sqe_files_update()
7592 if (done > ctx->nr_user_files) in __io_sqe_files_update()
7593 return -EINVAL; in __io_sqe_files_update()
7600 fds = u64_to_user_ptr(up->fds); in __io_sqe_files_update()
7607 err = -EFAULT; in __io_sqe_files_update()
7610 i = array_index_nospec(up->offset, ctx->nr_user_files); in __io_sqe_files_update()
7611 table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; in __io_sqe_files_update()
7613 if (table->files[index]) { in __io_sqe_files_update()
7614 file = table->files[index]; in __io_sqe_files_update()
7618 table->files[index] = NULL; in __io_sqe_files_update()
7621 if (fd != -1) { in __io_sqe_files_update()
7624 err = -EBADF; in __io_sqe_files_update()
7635 if (file->f_op == &io_uring_fops) { in __io_sqe_files_update()
7637 err = -EBADF; in __io_sqe_files_update()
7640 table->files[index] = file; in __io_sqe_files_update()
7643 table->files[index] = NULL; in __io_sqe_files_update()
7648 nr_args--; in __io_sqe_files_update()
7650 up->offset++; in __io_sqe_files_update()
7654 percpu_ref_kill(&data->node->refs); in __io_sqe_files_update()
7655 spin_lock(&data->lock); in __io_sqe_files_update()
7656 list_add_tail(&ref_node->node, &data->ref_list); in __io_sqe_files_update()
7657 data->node = ref_node; in __io_sqe_files_update()
7658 spin_unlock(&data->lock); in __io_sqe_files_update()
7659 percpu_ref_get(&ctx->file_data->refs); in __io_sqe_files_update()
7671 if (!ctx->file_data) in io_sqe_files_update()
7672 return -ENXIO; in io_sqe_files_update()
7674 return -EINVAL; in io_sqe_files_update()
7676 return -EFAULT; in io_sqe_files_update()
7678 return -EINVAL; in io_sqe_files_update()
7700 data.user = ctx->user; in io_init_wq_offload()
7704 if (!(p->flags & IORING_SETUP_ATTACH_WQ)) { in io_init_wq_offload()
7706 concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); in io_init_wq_offload()
7708 ctx->io_wq = io_wq_create(concurrency, &data); in io_init_wq_offload()
7709 if (IS_ERR(ctx->io_wq)) { in io_init_wq_offload()
7710 ret = PTR_ERR(ctx->io_wq); in io_init_wq_offload()
7711 ctx->io_wq = NULL; in io_init_wq_offload()
7716 f = fdget(p->wq_fd); in io_init_wq_offload()
7718 return -EBADF; in io_init_wq_offload()
7720 if (f.file->f_op != &io_uring_fops) { in io_init_wq_offload()
7721 ret = -EINVAL; in io_init_wq_offload()
7725 ctx_attach = f.file->private_data; in io_init_wq_offload()
7727 if (!io_wq_get(ctx_attach->io_wq, &data)) { in io_init_wq_offload()
7728 ret = -EINVAL; in io_init_wq_offload()
7732 ctx->io_wq = ctx_attach->io_wq; in io_init_wq_offload()
7745 return -ENOMEM; in io_uring_alloc_task_context()
7747 ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); in io_uring_alloc_task_context()
7753 xa_init(&tctx->xa); in io_uring_alloc_task_context()
7754 init_waitqueue_head(&tctx->wait); in io_uring_alloc_task_context()
7755 tctx->last = NULL; in io_uring_alloc_task_context()
7756 atomic_set(&tctx->in_idle, 0); in io_uring_alloc_task_context()
7757 tctx->sqpoll = false; in io_uring_alloc_task_context()
7758 io_init_identity(&tctx->__identity); in io_uring_alloc_task_context()
7759 tctx->identity = &tctx->__identity; in io_uring_alloc_task_context()
7760 task->io_uring = tctx; in io_uring_alloc_task_context()
7766 struct io_uring_task *tctx = tsk->io_uring; in __io_uring_free()
7768 WARN_ON_ONCE(!xa_empty(&tctx->xa)); in __io_uring_free()
7769 WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1); in __io_uring_free()
7770 if (tctx->identity != &tctx->__identity) in __io_uring_free()
7771 kfree(tctx->identity); in __io_uring_free()
7772 percpu_counter_destroy(&tctx->inflight); in __io_uring_free()
7774 tsk->io_uring = NULL; in __io_uring_free()
7782 if (ctx->flags & IORING_SETUP_SQPOLL) { in io_sq_offload_create()
7785 ret = -EPERM; in io_sq_offload_create()
7795 ctx->sq_data = sqd; in io_sq_offload_create()
7797 mutex_lock(&sqd->ctx_lock); in io_sq_offload_create()
7798 list_add(&ctx->sqd_list, &sqd->ctx_new_list); in io_sq_offload_create()
7799 mutex_unlock(&sqd->ctx_lock); in io_sq_offload_create()
7802 ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); in io_sq_offload_create()
7803 if (!ctx->sq_thread_idle) in io_sq_offload_create()
7804 ctx->sq_thread_idle = HZ; in io_sq_offload_create()
7806 if (sqd->thread) in io_sq_offload_create()
7809 if (p->flags & IORING_SETUP_SQ_AFF) { in io_sq_offload_create()
7810 int cpu = p->sq_thread_cpu; in io_sq_offload_create()
7812 ret = -EINVAL; in io_sq_offload_create()
7818 sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd, in io_sq_offload_create()
7819 cpu, "io_uring-sq"); in io_sq_offload_create()
7821 sqd->thread = kthread_create(io_sq_thread, sqd, in io_sq_offload_create()
7822 "io_uring-sq"); in io_sq_offload_create()
7824 if (IS_ERR(sqd->thread)) { in io_sq_offload_create()
7825 ret = PTR_ERR(sqd->thread); in io_sq_offload_create()
7826 sqd->thread = NULL; in io_sq_offload_create()
7829 ret = io_uring_alloc_task_context(sqd->thread); in io_sq_offload_create()
7832 } else if (p->flags & IORING_SETUP_SQ_AFF) { in io_sq_offload_create()
7834 ret = -EINVAL; in io_sq_offload_create()
7851 struct io_sq_data *sqd = ctx->sq_data; in io_sq_offload_start()
7853 if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread) in io_sq_offload_start()
7854 wake_up_process(sqd->thread); in io_sq_offload_start()
7860 atomic_long_sub(nr_pages, &user->locked_vm); in __io_unaccount_mem()
7872 cur_pages = atomic_long_read(&user->locked_vm); in __io_account_mem()
7875 return -ENOMEM; in __io_account_mem()
7876 } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, in __io_account_mem()
7885 if (ctx->limit_mem) in io_unaccount_mem()
7886 __io_unaccount_mem(ctx->user, nr_pages); in io_unaccount_mem()
7888 if (ctx->mm_account) { in io_unaccount_mem()
7890 ctx->mm_account->locked_vm -= nr_pages; in io_unaccount_mem()
7892 atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); in io_unaccount_mem()
7901 if (ctx->limit_mem) { in io_account_mem()
7902 ret = __io_account_mem(ctx->user, nr_pages); in io_account_mem()
7907 if (ctx->mm_account) { in io_account_mem()
7909 ctx->mm_account->locked_vm += nr_pages; in io_account_mem()
7911 atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); in io_account_mem()
7982 if (!ctx->user_bufs) in io_sqe_buffer_unregister()
7983 return -ENXIO; in io_sqe_buffer_unregister()
7985 for (i = 0; i < ctx->nr_user_bufs; i++) { in io_sqe_buffer_unregister()
7986 struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; in io_sqe_buffer_unregister()
7988 for (j = 0; j < imu->nr_bvecs; j++) in io_sqe_buffer_unregister()
7989 unpin_user_page(imu->bvec[j].bv_page); in io_sqe_buffer_unregister()
7991 if (imu->acct_pages) in io_sqe_buffer_unregister()
7992 io_unaccount_mem(ctx, imu->acct_pages, ACCT_PINNED); in io_sqe_buffer_unregister()
7993 kvfree(imu->bvec); in io_sqe_buffer_unregister()
7994 imu->nr_bvecs = 0; in io_sqe_buffer_unregister()
7997 kfree(ctx->user_bufs); in io_sqe_buffer_unregister()
7998 ctx->user_bufs = NULL; in io_sqe_buffer_unregister()
7999 ctx->nr_user_bufs = 0; in io_sqe_buffer_unregister()
8009 if (ctx->compat) { in io_copy_iov()
8015 return -EFAULT; in io_copy_iov()
8017 dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); in io_copy_iov()
8018 dst->iov_len = ciov.iov_len; in io_copy_iov()
8024 return -EFAULT; in io_copy_iov()
8051 for (i = 0; i < ctx->nr_user_bufs; i++) { in headpage_already_acct()
8052 struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; in headpage_already_acct()
8054 for (j = 0; j < imu->nr_bvecs; j++) { in headpage_already_acct()
8055 if (!PageCompound(imu->bvec[j].bv_page)) in headpage_already_acct()
8057 if (compound_head(imu->bvec[j].bv_page) == hpage) in headpage_already_acct()
8073 imu->acct_pages++; in io_buffer_account_pin()
8083 imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; in io_buffer_account_pin()
8087 if (!imu->acct_pages) in io_buffer_account_pin()
8090 ret = io_account_mem(ctx, imu->acct_pages, ACCT_PINNED); in io_buffer_account_pin()
8092 imu->acct_pages = 0; in io_buffer_account_pin()
8103 int ret = -EINVAL; in io_sqe_buffer_register()
8105 if (ctx->user_bufs) in io_sqe_buffer_register()
8106 return -EBUSY; in io_sqe_buffer_register()
8108 return -EINVAL; in io_sqe_buffer_register()
8110 ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf), in io_sqe_buffer_register()
8112 if (!ctx->user_bufs) in io_sqe_buffer_register()
8113 return -ENOMEM; in io_sqe_buffer_register()
8116 struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; in io_sqe_buffer_register()
8128 * constraints here, we'll -EINVAL later when IO is in io_sqe_buffer_register()
8131 ret = -EFAULT; in io_sqe_buffer_register()
8140 end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; in io_sqe_buffer_register()
8142 nr_pages = end - start; in io_sqe_buffer_register()
8154 ret = -ENOMEM; in io_sqe_buffer_register()
8160 imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec), in io_sqe_buffer_register()
8162 ret = -ENOMEM; in io_sqe_buffer_register()
8163 if (!imu->bvec) in io_sqe_buffer_register()
8167 mmap_read_lock(current->mm); in io_sqe_buffer_register()
8176 if (vma->vm_file && in io_sqe_buffer_register()
8177 !is_file_hugepages(vma->vm_file)) { in io_sqe_buffer_register()
8178 ret = -EOPNOTSUPP; in io_sqe_buffer_register()
8183 ret = pret < 0 ? pret : -EFAULT; in io_sqe_buffer_register()
8185 mmap_read_unlock(current->mm); in io_sqe_buffer_register()
8193 kvfree(imu->bvec); in io_sqe_buffer_register()
8200 kvfree(imu->bvec); in io_sqe_buffer_register()
8209 vec_len = min_t(size_t, size, PAGE_SIZE - off); in io_sqe_buffer_register()
8210 imu->bvec[j].bv_page = pages[j]; in io_sqe_buffer_register()
8211 imu->bvec[j].bv_len = vec_len; in io_sqe_buffer_register()
8212 imu->bvec[j].bv_offset = off; in io_sqe_buffer_register()
8214 size -= vec_len; in io_sqe_buffer_register()
8217 imu->ubuf = ubuf; in io_sqe_buffer_register()
8218 imu->len = iov.iov_len; in io_sqe_buffer_register()
8219 imu->nr_bvecs = nr_pages; in io_sqe_buffer_register()
8221 ctx->nr_user_bufs++; in io_sqe_buffer_register()
8238 if (ctx->cq_ev_fd) in io_eventfd_register()
8239 return -EBUSY; in io_eventfd_register()
8242 return -EFAULT; in io_eventfd_register()
8244 ctx->cq_ev_fd = eventfd_ctx_fdget(fd); in io_eventfd_register()
8245 if (IS_ERR(ctx->cq_ev_fd)) { in io_eventfd_register()
8246 int ret = PTR_ERR(ctx->cq_ev_fd); in io_eventfd_register()
8247 ctx->cq_ev_fd = NULL; in io_eventfd_register()
8256 if (ctx->cq_ev_fd) { in io_eventfd_unregister()
8257 eventfd_ctx_put(ctx->cq_ev_fd); in io_eventfd_unregister()
8258 ctx->cq_ev_fd = NULL; in io_eventfd_unregister()
8262 return -ENXIO; in io_eventfd_unregister()
8270 __io_remove_buffers(ctx, buf, id, -1U); in __io_destroy_buffers()
8276 idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx); in io_destroy_buffers()
8277 idr_destroy(&ctx->io_buffer_idr); in io_destroy_buffers()
8285 if (ctx->sqo_task) { in io_ring_ctx_free()
8286 put_task_struct(ctx->sqo_task); in io_ring_ctx_free()
8287 ctx->sqo_task = NULL; in io_ring_ctx_free()
8288 mmdrop(ctx->mm_account); in io_ring_ctx_free()
8289 ctx->mm_account = NULL; in io_ring_ctx_free()
8293 if (ctx->sqo_blkcg_css) in io_ring_ctx_free()
8294 css_put(ctx->sqo_blkcg_css); in io_ring_ctx_free()
8300 idr_destroy(&ctx->personality_idr); in io_ring_ctx_free()
8303 if (ctx->ring_sock) { in io_ring_ctx_free()
8304 ctx->ring_sock->file = NULL; /* so that iput() is called */ in io_ring_ctx_free()
8305 sock_release(ctx->ring_sock); in io_ring_ctx_free()
8309 io_mem_free(ctx->rings); in io_ring_ctx_free()
8310 io_mem_free(ctx->sq_sqes); in io_ring_ctx_free()
8312 percpu_ref_exit(&ctx->refs); in io_ring_ctx_free()
8313 free_uid(ctx->user); in io_ring_ctx_free()
8314 put_cred(ctx->creds); in io_ring_ctx_free()
8315 kfree(ctx->cancel_hash); in io_ring_ctx_free()
8316 kmem_cache_free(req_cachep, ctx->fallback_req); in io_ring_ctx_free()
8322 struct io_ring_ctx *ctx = file->private_data; in io_uring_poll()
8325 poll_wait(file, &ctx->cq_wait, wait); in io_uring_poll()
8341 struct io_ring_ctx *ctx = file->private_data; in io_uring_fasync()
8343 return fasync_helper(fd, file, on, &ctx->cq_fasync); in io_uring_fasync()
8351 iod = idr_remove(&ctx->personality_idr, id); in io_remove_personalities()
8353 put_cred(iod->creds); in io_remove_personalities()
8354 if (refcount_dec_and_test(&iod->count)) in io_remove_personalities()
8367 * submitted async (out-of-line), then completions can come in while in io_ring_exit_work()
8372 if (ctx->rings) in io_ring_exit_work()
8375 } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); in io_ring_exit_work()
8381 mutex_lock(&ctx->uring_lock); in io_ring_ctx_wait_and_kill()
8382 percpu_ref_kill(&ctx->refs); in io_ring_ctx_wait_and_kill()
8383 mutex_unlock(&ctx->uring_lock); in io_ring_ctx_wait_and_kill()
8388 if (ctx->io_wq) in io_ring_ctx_wait_and_kill()
8389 io_wq_cancel_all(ctx->io_wq); in io_ring_ctx_wait_and_kill()
8392 if (ctx->rings) in io_ring_ctx_wait_and_kill()
8395 idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); in io_ring_ctx_wait_and_kill()
8402 io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries), in io_ring_ctx_wait_and_kill()
8405 INIT_WORK(&ctx->exit_work, io_ring_exit_work); in io_ring_ctx_wait_and_kill()
8412 queue_work(system_unbound_wq, &ctx->exit_work); in io_ring_ctx_wait_and_kill()
8417 struct io_ring_ctx *ctx = file->private_data; in io_uring_release()
8419 file->private_data = NULL; in io_uring_release()
8428 return !files || ((work->flags & IO_WQ_WORK_FILES) && in io_wq_files_match()
8429 work->identity->files == files); in io_wq_files_match()
8439 if (!(preq->flags & REQ_F_LINK_HEAD)) in io_match_link()
8442 list_for_each_entry(link, &preq->link_list, link_list) { in io_match_link()
8462 spin_lock_irq(&ctx->completion_lock); in io_poll_remove_link()
8463 for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { in io_poll_remove_link()
8466 list = &ctx->cancel_hash[i]; in io_poll_remove_link()
8475 spin_unlock_irq(&ctx->completion_lock); in io_poll_remove_link()
8485 spin_lock_irq(&ctx->completion_lock); in io_timeout_remove_link()
8486 list_for_each_entry(preq, &ctx->timeout_list, timeout.list) { in io_timeout_remove_link()
8493 spin_unlock_irq(&ctx->completion_lock); in io_timeout_remove_link()
8502 if (req->flags & REQ_F_LINK_TIMEOUT) { in io_cancel_link_cb()
8504 struct io_ring_ctx *ctx = req->ctx; in io_cancel_link_cb()
8507 spin_lock_irqsave(&ctx->completion_lock, flags); in io_cancel_link_cb()
8509 spin_unlock_irqrestore(&ctx->completion_lock, flags); in io_cancel_link_cb()
8521 cret = io_wq_cancel_work(ctx->io_wq, &req->work); in io_attempt_cancel()
8526 cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true); in io_attempt_cancel()
8530 /* if we have a poll link holding this pending, cancel that */ in io_attempt_cancel()
8545 spin_lock_irq(&ctx->completion_lock); in io_cancel_defer_files()
8546 list_for_each_entry_reverse(de, &ctx->defer_list, list) { in io_cancel_defer_files()
8547 if (io_task_match(de->req, task) && in io_cancel_defer_files()
8548 io_match_files(de->req, files)) { in io_cancel_defer_files()
8549 list_cut_position(&list, &ctx->defer_list, &de->list); in io_cancel_defer_files()
8553 spin_unlock_irq(&ctx->completion_lock); in io_cancel_defer_files()
8557 list_del_init(&de->list); in io_cancel_defer_files()
8558 req_set_fail_links(de->req); in io_cancel_defer_files()
8559 io_put_req(de->req); in io_cancel_defer_files()
8560 io_req_complete(de->req, -ECANCELED); in io_cancel_defer_files()
8571 if (list_empty_careful(&ctx->inflight_list)) in io_uring_cancel_files()
8575 io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); in io_uring_cancel_files()
8577 while (!list_empty_careful(&ctx->inflight_list)) { in io_uring_cancel_files()
8581 spin_lock_irq(&ctx->inflight_lock); in io_uring_cancel_files()
8582 list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { in io_uring_cancel_files()
8583 if (files && (req->work.flags & IO_WQ_WORK_FILES) && in io_uring_cancel_files()
8584 req->work.identity->files != files) in io_uring_cancel_files()
8587 if (!refcount_inc_not_zero(&req->refs)) in io_uring_cancel_files()
8593 prepare_to_wait(&ctx->inflight_wait, &wait, in io_uring_cancel_files()
8595 spin_unlock_irq(&ctx->inflight_lock); in io_uring_cancel_files()
8606 finish_wait(&ctx->inflight_wait, &wait); in io_uring_cancel_files()
8630 cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true); in __io_uring_cancel_task_requests()
8635 if (!(ctx->flags & IORING_SETUP_SQPOLL)) { in __io_uring_cancel_task_requests()
8636 while (!list_empty_careful(&ctx->iopoll_list)) { in __io_uring_cancel_task_requests()
8659 if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { in io_uring_cancel_task_requests()
8660 task = ctx->sq_data->thread; in io_uring_cancel_task_requests()
8661 atomic_inc(&task->io_uring->in_idle); in io_uring_cancel_task_requests()
8662 io_sq_thread_park(ctx->sq_data); in io_uring_cancel_task_requests()
8677 if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { in io_uring_cancel_task_requests()
8678 atomic_dec(&task->io_uring->in_idle); in io_uring_cancel_task_requests()
8683 if (task->io_uring->identity->files == files) in io_uring_cancel_task_requests()
8684 task->io_uring->identity->files = NULL; in io_uring_cancel_task_requests()
8685 io_sq_thread_unpark(ctx->sq_data); in io_uring_cancel_task_requests()
8694 struct io_uring_task *tctx = current->io_uring; in io_uring_add_task_file()
8702 tctx = current->io_uring; in io_uring_add_task_file()
8704 if (tctx->last != file) { in io_uring_add_task_file()
8705 void *old = xa_load(&tctx->xa, (unsigned long)file); in io_uring_add_task_file()
8709 xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL); in io_uring_add_task_file()
8711 tctx->last = file; in io_uring_add_task_file()
8719 if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL)) in io_uring_add_task_file()
8720 tctx->sqpoll = true; in io_uring_add_task_file()
8726 * Remove this io_uring_file -> task mapping.
8730 struct io_uring_task *tctx = current->io_uring; in io_uring_del_task_file()
8732 if (tctx->last == file) in io_uring_del_task_file()
8733 tctx->last = NULL; in io_uring_del_task_file()
8734 file = xa_erase(&tctx->xa, (unsigned long)file); in io_uring_del_task_file()
8745 if (!current->io_uring) in io_uring_attempt_task_drop()
8749 * task file note. If the task is exiting, drop regardless of count. in io_uring_attempt_task_drop()
8751 if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || in io_uring_attempt_task_drop()
8752 atomic_long_read(&file->f_count) == 2) in io_uring_attempt_task_drop()
8758 struct io_uring_task *tctx = current->io_uring; in __io_uring_files_cancel()
8763 atomic_inc(&tctx->in_idle); in __io_uring_files_cancel()
8765 xa_for_each(&tctx->xa, index, file) { in __io_uring_files_cancel()
8766 struct io_ring_ctx *ctx = file->private_data; in __io_uring_files_cancel()
8773 atomic_dec(&tctx->in_idle); in __io_uring_files_cancel()
8782 inflight = percpu_counter_sum(&tctx->inflight); in tctx_inflight()
8783 if (!tctx->sqpoll) in tctx_inflight()
8788 * add the pending count for those. in tctx_inflight()
8790 xa_for_each(&tctx->xa, index, file) { in tctx_inflight()
8791 struct io_ring_ctx *ctx = file->private_data; in tctx_inflight()
8793 if (ctx->flags & IORING_SETUP_SQPOLL) { in tctx_inflight()
8794 struct io_uring_task *__tctx = ctx->sqo_task->io_uring; in tctx_inflight()
8796 inflight += percpu_counter_sum(&__tctx->inflight); in tctx_inflight()
8809 struct io_uring_task *tctx = current->io_uring; in __io_uring_task_cancel()
8814 atomic_inc(&tctx->in_idle); in __io_uring_task_cancel()
8823 prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); in __io_uring_task_cancel()
8826 * If we've seen completions, retry. This avoids a race where in __io_uring_task_cancel()
8834 finish_wait(&tctx->wait, &wait); in __io_uring_task_cancel()
8835 atomic_dec(&tctx->in_idle); in __io_uring_task_cancel()
8847 struct io_ring_ctx *ctx = file->private_data; in io_uring_validate_mmap_request()
8855 ptr = ctx->rings; in io_uring_validate_mmap_request()
8858 ptr = ctx->sq_sqes; in io_uring_validate_mmap_request()
8861 return ERR_PTR(-EINVAL); in io_uring_validate_mmap_request()
8866 return ERR_PTR(-EINVAL); in io_uring_validate_mmap_request()
8875 size_t sz = vma->vm_end - vma->vm_start; in io_uring_mmap()
8879 ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); in io_uring_mmap()
8884 return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); in io_uring_mmap()
8891 return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL; in io_uring_mmap()
8922 prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); in io_sqpoll_wait_sq()
8930 finish_wait(&ctx->sqo_sq_wait, &wait); in io_sqpoll_wait_sq()
8938 long ret = -EBADF; in SYSCALL_DEFINE6()
8946 return -EINVAL; in SYSCALL_DEFINE6()
8950 return -EBADF; in SYSCALL_DEFINE6()
8952 ret = -EOPNOTSUPP; in SYSCALL_DEFINE6()
8953 if (f.file->f_op != &io_uring_fops) in SYSCALL_DEFINE6()
8956 ret = -ENXIO; in SYSCALL_DEFINE6()
8957 ctx = f.file->private_data; in SYSCALL_DEFINE6()
8958 if (!percpu_ref_tryget(&ctx->refs)) in SYSCALL_DEFINE6()
8961 ret = -EBADFD; in SYSCALL_DEFINE6()
8962 if (ctx->flags & IORING_SETUP_R_DISABLED) in SYSCALL_DEFINE6()
8967 * Just return the requested submit count, and wake the thread if in SYSCALL_DEFINE6()
8971 if (ctx->flags & IORING_SETUP_SQPOLL) { in SYSCALL_DEFINE6()
8972 if (!list_empty_careful(&ctx->cq_overflow_list)) in SYSCALL_DEFINE6()
8975 wake_up(&ctx->sq_data->wait); in SYSCALL_DEFINE6()
8983 mutex_lock(&ctx->uring_lock); in SYSCALL_DEFINE6()
8985 mutex_unlock(&ctx->uring_lock); in SYSCALL_DEFINE6()
8991 min_complete = min(min_complete, ctx->cq_entries); in SYSCALL_DEFINE6()
8999 if (ctx->flags & IORING_SETUP_IOPOLL && in SYSCALL_DEFINE6()
9000 !(ctx->flags & IORING_SETUP_SQPOLL)) { in SYSCALL_DEFINE6()
9008 percpu_ref_put(&ctx->refs); in SYSCALL_DEFINE6()
9018 const struct cred *cred = iod->creds; in io_uring_show_cred()
9027 seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); in io_uring_show_cred()
9028 seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); in io_uring_show_cred()
9029 seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); in io_uring_show_cred()
9030 seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); in io_uring_show_cred()
9031 seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); in io_uring_show_cred()
9032 seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); in io_uring_show_cred()
9033 seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); in io_uring_show_cred()
9034 seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); in io_uring_show_cred()
9036 gi = cred->group_info; in io_uring_show_cred()
9037 for (g = 0; g < gi->ngroups; g++) { in io_uring_show_cred()
9039 from_kgid_munged(uns, gi->gid[g])); in io_uring_show_cred()
9042 cap = cred->cap_effective; in io_uring_show_cred()
9044 seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); in io_uring_show_cred()
9061 has_lock = mutex_trylock(&ctx->uring_lock); in __io_uring_show_fdinfo()
9063 if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) in __io_uring_show_fdinfo()
9064 sq = ctx->sq_data; in __io_uring_show_fdinfo()
9066 seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); in __io_uring_show_fdinfo()
9067 seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); in __io_uring_show_fdinfo()
9068 seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); in __io_uring_show_fdinfo()
9069 for (i = 0; has_lock && i < ctx->nr_user_files; i++) { in __io_uring_show_fdinfo()
9073 table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; in __io_uring_show_fdinfo()
9074 f = table->files[i & IORING_FILE_TABLE_MASK]; in __io_uring_show_fdinfo()
9076 seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); in __io_uring_show_fdinfo()
9080 seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); in __io_uring_show_fdinfo()
9081 for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { in __io_uring_show_fdinfo()
9082 struct io_mapped_ubuf *buf = &ctx->user_bufs[i]; in __io_uring_show_fdinfo()
9084 seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, in __io_uring_show_fdinfo()
9085 (unsigned int) buf->len); in __io_uring_show_fdinfo()
9087 if (has_lock && !idr_is_empty(&ctx->personality_idr)) { in __io_uring_show_fdinfo()
9089 idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); in __io_uring_show_fdinfo()
9092 spin_lock_irq(&ctx->completion_lock); in __io_uring_show_fdinfo()
9093 for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { in __io_uring_show_fdinfo()
9094 struct hlist_head *list = &ctx->cancel_hash[i]; in __io_uring_show_fdinfo()
9098 seq_printf(m, " op=%d, task_works=%d\n", req->opcode, in __io_uring_show_fdinfo()
9099 req->task->task_works != NULL); in __io_uring_show_fdinfo()
9101 spin_unlock_irq(&ctx->completion_lock); in __io_uring_show_fdinfo()
9103 mutex_unlock(&ctx->uring_lock); in __io_uring_show_fdinfo()
9108 struct io_ring_ctx *ctx = f->private_data; in io_uring_show_fdinfo()
9110 if (percpu_ref_tryget(&ctx->refs)) { in io_uring_show_fdinfo()
9112 percpu_ref_put(&ctx->refs); in io_uring_show_fdinfo()
9125 .poll = io_uring_poll,
9139 ctx->sq_entries = p->sq_entries; in io_allocate_scq_urings()
9140 ctx->cq_entries = p->cq_entries; in io_allocate_scq_urings()
9142 size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); in io_allocate_scq_urings()
9144 return -EOVERFLOW; in io_allocate_scq_urings()
9148 return -ENOMEM; in io_allocate_scq_urings()
9150 ctx->rings = rings; in io_allocate_scq_urings()
9151 ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); in io_allocate_scq_urings()
9152 rings->sq_ring_mask = p->sq_entries - 1; in io_allocate_scq_urings()
9153 rings->cq_ring_mask = p->cq_entries - 1; in io_allocate_scq_urings()
9154 rings->sq_ring_entries = p->sq_entries; in io_allocate_scq_urings()
9155 rings->cq_ring_entries = p->cq_entries; in io_allocate_scq_urings()
9156 ctx->sq_mask = rings->sq_ring_mask; in io_allocate_scq_urings()
9157 ctx->cq_mask = rings->cq_ring_mask; in io_allocate_scq_urings()
9159 size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); in io_allocate_scq_urings()
9161 io_mem_free(ctx->rings); in io_allocate_scq_urings()
9162 ctx->rings = NULL; in io_allocate_scq_urings()
9163 return -EOVERFLOW; in io_allocate_scq_urings()
9166 ctx->sq_sqes = io_mem_alloc(size); in io_allocate_scq_urings()
9167 if (!ctx->sq_sqes) { in io_allocate_scq_urings()
9168 io_mem_free(ctx->rings); in io_allocate_scq_urings()
9169 ctx->rings = NULL; in io_allocate_scq_urings()
9170 return -ENOMEM; in io_allocate_scq_urings()
9190 &ctx->ring_sock); in io_uring_get_fd()
9209 ctx->ring_sock->file = file; in io_uring_get_fd()
9221 sock_release(ctx->ring_sock); in io_uring_get_fd()
9222 ctx->ring_sock = NULL; in io_uring_get_fd()
9236 return -EINVAL; in io_uring_create()
9238 if (!(p->flags & IORING_SETUP_CLAMP)) in io_uring_create()
9239 return -EINVAL; in io_uring_create()
9251 p->sq_entries = roundup_pow_of_two(entries); in io_uring_create()
9252 if (p->flags & IORING_SETUP_CQSIZE) { in io_uring_create()
9255 * to a power-of-two, if it isn't already. We do NOT impose in io_uring_create()
9258 if (!p->cq_entries) in io_uring_create()
9259 return -EINVAL; in io_uring_create()
9260 if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { in io_uring_create()
9261 if (!(p->flags & IORING_SETUP_CLAMP)) in io_uring_create()
9262 return -EINVAL; in io_uring_create()
9263 p->cq_entries = IORING_MAX_CQ_ENTRIES; in io_uring_create()
9265 p->cq_entries = roundup_pow_of_two(p->cq_entries); in io_uring_create()
9266 if (p->cq_entries < p->sq_entries) in io_uring_create()
9267 return -EINVAL; in io_uring_create()
9269 p->cq_entries = 2 * p->sq_entries; in io_uring_create()
9277 ring_pages(p->sq_entries, p->cq_entries)); in io_uring_create()
9287 __io_unaccount_mem(user, ring_pages(p->sq_entries, in io_uring_create()
9288 p->cq_entries)); in io_uring_create()
9290 return -ENOMEM; in io_uring_create()
9292 ctx->compat = in_compat_syscall(); in io_uring_create()
9293 ctx->user = user; in io_uring_create()
9294 ctx->creds = get_current_cred(); in io_uring_create()
9296 ctx->loginuid = current->loginuid; in io_uring_create()
9297 ctx->sessionid = current->sessionid; in io_uring_create()
9299 ctx->sqo_task = get_task_struct(current); in io_uring_create()
9307 mmgrab(current->mm); in io_uring_create()
9308 ctx->mm_account = current->mm; in io_uring_create()
9318 ctx->sqo_blkcg_css = blkcg_css(); in io_uring_create()
9319 ret = css_tryget_online(ctx->sqo_blkcg_css); in io_uring_create()
9323 ctx->sqo_blkcg_css = NULL; in io_uring_create()
9324 ret = -ENODEV; in io_uring_create()
9333 * will un-account as well. in io_uring_create()
9335 io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries), in io_uring_create()
9337 ctx->limit_mem = limit_mem; in io_uring_create()
9347 if (!(p->flags & IORING_SETUP_R_DISABLED)) in io_uring_create()
9350 memset(&p->sq_off, 0, sizeof(p->sq_off)); in io_uring_create()
9351 p->sq_off.head = offsetof(struct io_rings, sq.head); in io_uring_create()
9352 p->sq_off.tail = offsetof(struct io_rings, sq.tail); in io_uring_create()
9353 p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); in io_uring_create()
9354 p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); in io_uring_create()
9355 p->sq_off.flags = offsetof(struct io_rings, sq_flags); in io_uring_create()
9356 p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); in io_uring_create()
9357 p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; in io_uring_create()
9359 memset(&p->cq_off, 0, sizeof(p->cq_off)); in io_uring_create()
9360 p->cq_off.head = offsetof(struct io_rings, cq.head); in io_uring_create()
9361 p->cq_off.tail = offsetof(struct io_rings, cq.tail); in io_uring_create()
9362 p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); in io_uring_create()
9363 p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); in io_uring_create()
9364 p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); in io_uring_create()
9365 p->cq_off.cqes = offsetof(struct io_rings, cqes); in io_uring_create()
9366 p->cq_off.flags = offsetof(struct io_rings, cq_flags); in io_uring_create()
9368 p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | in io_uring_create()
9374 ret = -EFAULT; in io_uring_create()
9386 trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); in io_uring_create()
9404 return -EFAULT; in io_uring_setup()
9407 return -EINVAL; in io_uring_setup()
9414 return -EINVAL; in io_uring_setup()
9433 return -EOVERFLOW; in io_probe()
9436 return -ENOMEM; in io_probe()
9438 ret = -EFAULT; in io_probe()
9441 ret = -EINVAL; in io_probe()
9445 p->last_op = IORING_OP_LAST - 1; in io_probe()
9450 p->ops[i].op = i; in io_probe()
9452 p->ops[i].flags = IO_URING_OP_SUPPORTED; in io_probe()
9454 p->ops_len = i; in io_probe()
9458 ret = -EFAULT; in io_probe()
9471 return -ENOMEM; in io_register_personality()
9474 id->creds = get_current_cred(); in io_register_personality()
9476 ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL); in io_register_personality()
9478 put_cred(id->creds); in io_register_personality()
9488 iod = idr_remove(&ctx->personality_idr, id); in io_unregister_personality()
9490 put_cred(iod->creds); in io_unregister_personality()
9491 if (refcount_dec_and_test(&iod->count)) in io_unregister_personality()
9496 return -EINVAL; in io_unregister_personality()
9507 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) in io_register_restrictions()
9508 return -EBADFD; in io_register_restrictions()
9511 if (ctx->restrictions.registered) in io_register_restrictions()
9512 return -EBUSY; in io_register_restrictions()
9515 return -EINVAL; in io_register_restrictions()
9519 return -EOVERFLOW; in io_register_restrictions()
9531 ret = -EINVAL; in io_register_restrictions()
9536 ctx->restrictions.register_op); in io_register_restrictions()
9540 ret = -EINVAL; in io_register_restrictions()
9544 __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); in io_register_restrictions()
9547 ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; in io_register_restrictions()
9550 ctx->restrictions.sqe_flags_required = res[i].sqe_flags; in io_register_restrictions()
9553 ret = -EINVAL; in io_register_restrictions()
9561 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); in io_register_restrictions()
9563 ctx->restrictions.registered = true; in io_register_restrictions()
9571 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) in io_register_enable_rings()
9572 return -EBADFD; in io_register_enable_rings()
9574 if (ctx->restrictions.registered) in io_register_enable_rings()
9575 ctx->restricted = 1; in io_register_enable_rings()
9577 ctx->flags &= ~IORING_SETUP_R_DISABLED; in io_register_enable_rings()
9600 __releases(ctx->uring_lock) in __io_uring_register()
9601 __acquires(ctx->uring_lock) in __io_uring_register()
9610 if (percpu_ref_is_dying(&ctx->refs)) in __io_uring_register()
9611 return -ENXIO; in __io_uring_register()
9614 percpu_ref_kill(&ctx->refs); in __io_uring_register()
9624 mutex_unlock(&ctx->uring_lock); in __io_uring_register()
9626 ret = wait_for_completion_interruptible(&ctx->ref_comp); in __io_uring_register()
9634 mutex_lock(&ctx->uring_lock); in __io_uring_register()
9637 percpu_ref_resurrect(&ctx->refs); in __io_uring_register()
9642 if (ctx->restricted) { in __io_uring_register()
9644 ret = -EINVAL; in __io_uring_register()
9648 if (!test_bit(opcode, ctx->restrictions.register_op)) { in __io_uring_register()
9649 ret = -EACCES; in __io_uring_register()
9659 ret = -EINVAL; in __io_uring_register()
9668 ret = -EINVAL; in __io_uring_register()
9678 ret = -EINVAL; in __io_uring_register()
9685 ctx->eventfd_async = 1; in __io_uring_register()
9687 ctx->eventfd_async = 0; in __io_uring_register()
9690 ret = -EINVAL; in __io_uring_register()
9696 ret = -EINVAL; in __io_uring_register()
9702 ret = -EINVAL; in __io_uring_register()
9708 ret = -EINVAL; in __io_uring_register()
9714 ret = -EINVAL; in __io_uring_register()
9723 ret = -EINVAL; in __io_uring_register()
9730 percpu_ref_reinit(&ctx->refs); in __io_uring_register()
9732 reinit_completion(&ctx->ref_comp); in __io_uring_register()
9741 long ret = -EBADF; in SYSCALL_DEFINE4()
9746 return -EBADF; in SYSCALL_DEFINE4()
9748 ret = -EOPNOTSUPP; in SYSCALL_DEFINE4()
9749 if (f.file->f_op != &io_uring_fops) in SYSCALL_DEFINE4()
9752 ctx = f.file->private_data; in SYSCALL_DEFINE4()
9754 mutex_lock(&ctx->uring_lock); in SYSCALL_DEFINE4()
9756 mutex_unlock(&ctx->uring_lock); in SYSCALL_DEFINE4()
9757 trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, in SYSCALL_DEFINE4()
9758 ctx->cq_ev_fd != NULL, ret); in SYSCALL_DEFINE4()