Lines Matching +full:segment +full:- +full:no +full:- +full:remap

1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2013-2014 Jens Axboe
6 * Copyright (C) 2013-2014 Christoph Hellwig
10 #include <linux/backing-dev.h>
13 #include <linux/blk-integrity.h>
30 #include <linux/blk-crypto.h>
35 #include <linux/blk-mq.h>
36 #include <linux/t10-pi.h>
38 #include "blk-mq.h"
39 #include "blk-mq-debugfs.h"
40 #include "blk-mq-tag.h"
41 #include "blk-pm.h"
42 #include "blk-stat.h"
43 #include "blk-mq-sched.h"
44 #include "blk-rq-qos.h"
45 #include "blk-ioprio.h"
62 return -1; in blk_mq_poll_stats_bkt()
64 return ddir + BLK_MQ_POLL_STATS_BKTS - 2; in blk_mq_poll_stats_bkt()
75 return xa_load(&q->hctx_table, in blk_qc_to_hctx()
82 unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1); in blk_qc_to_rq()
85 return blk_mq_tag_to_rq(hctx->sched_tags, tag); in blk_qc_to_rq()
86 return blk_mq_tag_to_rq(hctx->tags, tag); in blk_qc_to_rq()
91 return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) | in blk_rq_to_qc()
92 (rq->tag != -1 ? in blk_rq_to_qc()
93 rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL)); in blk_rq_to_qc()
102 return !list_empty_careful(&hctx->dispatch) || in blk_mq_hctx_has_pending()
103 sbitmap_any_bit_set(&hctx->ctx_map) || in blk_mq_hctx_has_pending()
113 const int bit = ctx->index_hw[hctx->type]; in blk_mq_hctx_mark_pending()
115 if (!sbitmap_test_bit(&hctx->ctx_map, bit)) in blk_mq_hctx_mark_pending()
116 sbitmap_set_bit(&hctx->ctx_map, bit); in blk_mq_hctx_mark_pending()
122 const int bit = ctx->index_hw[hctx->type]; in blk_mq_hctx_clear_pending()
124 sbitmap_clear_bit(&hctx->ctx_map, bit); in blk_mq_hctx_clear_pending()
136 if (rq->part && blk_do_io_stat(rq) && in blk_mq_check_inflight()
137 (!mi->part->bd_partno || rq->part == mi->part) && in blk_mq_check_inflight()
139 mi->inflight[rq_data_dir(rq)]++; in blk_mq_check_inflight()
166 mutex_lock(&q->mq_freeze_lock); in blk_freeze_queue_start()
167 if (++q->mq_freeze_depth == 1) { in blk_freeze_queue_start()
168 percpu_ref_kill(&q->q_usage_counter); in blk_freeze_queue_start()
169 mutex_unlock(&q->mq_freeze_lock); in blk_freeze_queue_start()
173 mutex_unlock(&q->mq_freeze_lock); in blk_freeze_queue_start()
180 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); in blk_mq_freeze_queue_wait()
187 return wait_event_timeout(q->mq_freeze_wq, in blk_mq_freeze_queue_wait_timeout()
188 percpu_ref_is_zero(&q->q_usage_counter), in blk_mq_freeze_queue_wait_timeout()
194 * Guarantee no request is in use, so we can change any data structure of
203 * no blk_unfreeze_queue(), and blk_freeze_queue() is not in blk_freeze_queue()
222 mutex_lock(&q->mq_freeze_lock); in __blk_mq_unfreeze_queue()
224 q->q_usage_counter.data->force_atomic = true; in __blk_mq_unfreeze_queue()
225 q->mq_freeze_depth--; in __blk_mq_unfreeze_queue()
226 WARN_ON_ONCE(q->mq_freeze_depth < 0); in __blk_mq_unfreeze_queue()
227 if (!q->mq_freeze_depth) { in __blk_mq_unfreeze_queue()
228 percpu_ref_resurrect(&q->q_usage_counter); in __blk_mq_unfreeze_queue()
229 wake_up_all(&q->mq_freeze_wq); in __blk_mq_unfreeze_queue()
231 mutex_unlock(&q->mq_freeze_lock); in __blk_mq_unfreeze_queue()
248 spin_lock_irqsave(&q->queue_lock, flags); in blk_mq_quiesce_queue_nowait()
249 if (!q->quiesce_depth++) in blk_mq_quiesce_queue_nowait()
251 spin_unlock_irqrestore(&q->queue_lock, flags); in blk_mq_quiesce_queue_nowait()
256 * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
265 synchronize_srcu(q->srcu); in blk_mq_wait_quiesce_done()
272 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
277 * sure no dispatch can happen until the queue is unquiesced via
288 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
299 spin_lock_irqsave(&q->queue_lock, flags); in blk_mq_unquiesce_queue()
300 if (WARN_ON_ONCE(q->quiesce_depth <= 0)) { in blk_mq_unquiesce_queue()
302 } else if (!--q->quiesce_depth) { in blk_mq_unquiesce_queue()
306 spin_unlock_irqrestore(&q->queue_lock, flags); in blk_mq_unquiesce_queue()
321 blk_mq_tag_wakeup_all(hctx->tags, true); in blk_mq_wake_waiters()
328 INIT_LIST_HEAD(&rq->queuelist); in blk_rq_init()
329 rq->q = q; in blk_rq_init()
330 rq->__sector = (sector_t) -1; in blk_rq_init()
331 INIT_HLIST_NODE(&rq->hash); in blk_rq_init()
332 RB_CLEAR_NODE(&rq->rb_node); in blk_rq_init()
333 rq->tag = BLK_MQ_NO_TAG; in blk_rq_init()
334 rq->internal_tag = BLK_MQ_NO_TAG; in blk_rq_init()
335 rq->start_time_ns = ktime_get_ns(); in blk_rq_init()
336 rq->part = NULL; in blk_rq_init()
344 struct blk_mq_ctx *ctx = data->ctx; in blk_mq_rq_ctx_init()
345 struct blk_mq_hw_ctx *hctx = data->hctx; in blk_mq_rq_ctx_init()
346 struct request_queue *q = data->q; in blk_mq_rq_ctx_init()
347 struct request *rq = tags->static_rqs[tag]; in blk_mq_rq_ctx_init()
349 rq->q = q; in blk_mq_rq_ctx_init()
350 rq->mq_ctx = ctx; in blk_mq_rq_ctx_init()
351 rq->mq_hctx = hctx; in blk_mq_rq_ctx_init()
352 rq->cmd_flags = data->cmd_flags; in blk_mq_rq_ctx_init()
354 if (data->flags & BLK_MQ_REQ_PM) in blk_mq_rq_ctx_init()
355 data->rq_flags |= RQF_PM; in blk_mq_rq_ctx_init()
357 data->rq_flags |= RQF_IO_STAT; in blk_mq_rq_ctx_init()
358 rq->rq_flags = data->rq_flags; in blk_mq_rq_ctx_init()
360 if (!(data->rq_flags & RQF_ELV)) { in blk_mq_rq_ctx_init()
361 rq->tag = tag; in blk_mq_rq_ctx_init()
362 rq->internal_tag = BLK_MQ_NO_TAG; in blk_mq_rq_ctx_init()
364 rq->tag = BLK_MQ_NO_TAG; in blk_mq_rq_ctx_init()
365 rq->internal_tag = tag; in blk_mq_rq_ctx_init()
367 rq->timeout = 0; in blk_mq_rq_ctx_init()
370 rq->start_time_ns = ktime_get_ns(); in blk_mq_rq_ctx_init()
372 rq->start_time_ns = 0; in blk_mq_rq_ctx_init()
373 rq->part = NULL; in blk_mq_rq_ctx_init()
375 rq->alloc_time_ns = alloc_time_ns; in blk_mq_rq_ctx_init()
377 rq->io_start_time_ns = 0; in blk_mq_rq_ctx_init()
378 rq->stats_sectors = 0; in blk_mq_rq_ctx_init()
379 rq->nr_phys_segments = 0; in blk_mq_rq_ctx_init()
381 rq->nr_integrity_segments = 0; in blk_mq_rq_ctx_init()
383 rq->end_io = NULL; in blk_mq_rq_ctx_init()
384 rq->end_io_data = NULL; in blk_mq_rq_ctx_init()
387 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_rq_ctx_init()
389 WRITE_ONCE(rq->deadline, 0); in blk_mq_rq_ctx_init()
392 if (rq->rq_flags & RQF_ELV) { in blk_mq_rq_ctx_init()
393 struct elevator_queue *e = data->q->elevator; in blk_mq_rq_ctx_init()
395 INIT_HLIST_NODE(&rq->hash); in blk_mq_rq_ctx_init()
396 RB_CLEAR_NODE(&rq->rb_node); in blk_mq_rq_ctx_init()
398 if (!op_is_flush(data->cmd_flags) && in blk_mq_rq_ctx_init()
399 e->type->ops.prepare_request) { in blk_mq_rq_ctx_init()
400 e->type->ops.prepare_request(rq); in blk_mq_rq_ctx_init()
401 rq->rq_flags |= RQF_ELVPRIV; in blk_mq_rq_ctx_init()
418 tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset); in __blk_mq_alloc_requests_batch()
427 prefetch(tags->static_rqs[tag]); in __blk_mq_alloc_requests_batch()
430 rq_list_add(data->cached_rq, rq); in __blk_mq_alloc_requests_batch()
434 percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); in __blk_mq_alloc_requests_batch()
435 data->nr_tags -= nr; in __blk_mq_alloc_requests_batch()
437 return rq_list_pop(data->cached_rq); in __blk_mq_alloc_requests_batch()
442 struct request_queue *q = data->q; in __blk_mq_alloc_requests()
451 if (data->cmd_flags & REQ_NOWAIT) in __blk_mq_alloc_requests()
452 data->flags |= BLK_MQ_REQ_NOWAIT; in __blk_mq_alloc_requests()
454 if (q->elevator) { in __blk_mq_alloc_requests()
455 struct elevator_queue *e = q->elevator; in __blk_mq_alloc_requests()
457 data->rq_flags |= RQF_ELV; in __blk_mq_alloc_requests()
464 if (!op_is_flush(data->cmd_flags) && in __blk_mq_alloc_requests()
465 !blk_op_is_passthrough(data->cmd_flags) && in __blk_mq_alloc_requests()
466 e->type->ops.limit_depth && in __blk_mq_alloc_requests()
467 !(data->flags & BLK_MQ_REQ_RESERVED)) in __blk_mq_alloc_requests()
468 e->type->ops.limit_depth(data->cmd_flags, data); in __blk_mq_alloc_requests()
472 data->ctx = blk_mq_get_ctx(q); in __blk_mq_alloc_requests()
473 data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); in __blk_mq_alloc_requests()
474 if (!(data->rq_flags & RQF_ELV)) in __blk_mq_alloc_requests()
475 blk_mq_tag_busy(data->hctx); in __blk_mq_alloc_requests()
477 if (data->flags & BLK_MQ_REQ_RESERVED) in __blk_mq_alloc_requests()
478 data->rq_flags |= RQF_RESV; in __blk_mq_alloc_requests()
483 if (data->nr_tags > 1) { in __blk_mq_alloc_requests()
487 data->nr_tags = 1; in __blk_mq_alloc_requests()
497 if (data->flags & BLK_MQ_REQ_NOWAIT) in __blk_mq_alloc_requests()
522 .nr_tags = plug->nr_ios, in blk_mq_rq_cache_fill()
523 .cached_rq = &plug->cached_rq, in blk_mq_rq_cache_fill()
530 plug->nr_ios = 1; in blk_mq_rq_cache_fill()
542 struct blk_plug *plug = current->plug; in blk_mq_alloc_cached_request()
547 if (rq_list_empty(plug->cached_rq)) { in blk_mq_alloc_cached_request()
548 if (plug->nr_ios == 1) in blk_mq_alloc_cached_request()
555 rq = rq_list_peek(&plug->cached_rq); in blk_mq_alloc_cached_request()
556 if (!rq || rq->q != q) in blk_mq_alloc_cached_request()
559 if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) in blk_mq_alloc_cached_request()
561 if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) in blk_mq_alloc_cached_request()
564 plug->cached_rq = rq_list_next(rq); in blk_mq_alloc_cached_request()
566 rq->cmd_flags = opf; in blk_mq_alloc_cached_request()
567 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_alloc_cached_request()
594 rq->__data_len = 0; in blk_mq_alloc_request()
595 rq->__sector = (sector_t) -1; in blk_mq_alloc_request()
596 rq->bio = rq->biotail = NULL; in blk_mq_alloc_request()
600 return ERR_PTR(-EWOULDBLOCK); in blk_mq_alloc_request()
625 * different hardware context. No need to complicate the low level in blk_mq_alloc_request_hctx()
630 return ERR_PTR(-EINVAL); in blk_mq_alloc_request_hctx()
632 if (hctx_idx >= q->nr_hw_queues) in blk_mq_alloc_request_hctx()
633 return ERR_PTR(-EIO); in blk_mq_alloc_request_hctx()
643 ret = -EXDEV; in blk_mq_alloc_request_hctx()
644 data.hctx = xa_load(&q->hctx_table, hctx_idx); in blk_mq_alloc_request_hctx()
647 cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); in blk_mq_alloc_request_hctx()
652 if (!q->elevator) in blk_mq_alloc_request_hctx()
660 ret = -EWOULDBLOCK; in blk_mq_alloc_request_hctx()
666 rq->__data_len = 0; in blk_mq_alloc_request_hctx()
667 rq->__sector = (sector_t) -1; in blk_mq_alloc_request_hctx()
668 rq->bio = rq->biotail = NULL; in blk_mq_alloc_request_hctx()
679 struct request_queue *q = rq->q; in __blk_mq_free_request()
680 struct blk_mq_ctx *ctx = rq->mq_ctx; in __blk_mq_free_request()
681 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in __blk_mq_free_request()
682 const int sched_tag = rq->internal_tag; in __blk_mq_free_request()
686 rq->mq_hctx = NULL; in __blk_mq_free_request()
687 if (rq->tag != BLK_MQ_NO_TAG) in __blk_mq_free_request()
688 blk_mq_put_tag(hctx->tags, ctx, rq->tag); in __blk_mq_free_request()
690 blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); in __blk_mq_free_request()
697 struct request_queue *q = rq->q; in blk_mq_free_request()
698 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_free_request()
700 if ((rq->rq_flags & RQF_ELVPRIV) && in blk_mq_free_request()
701 q->elevator->type->ops.finish_request) in blk_mq_free_request()
702 q->elevator->type->ops.finish_request(rq); in blk_mq_free_request()
704 if (rq->rq_flags & RQF_MQ_INFLIGHT) in blk_mq_free_request()
708 laptop_io_completion(q->disk->bdi); in blk_mq_free_request()
712 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_free_request()
722 while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) in blk_mq_free_plug_rqs()
729 rq->q->disk ? rq->q->disk->disk_name : "?", in blk_dump_rq_flags()
730 (__force unsigned long long) rq->cmd_flags); in blk_dump_rq_flags()
736 rq->bio, rq->biotail, blk_rq_bytes(rq)); in blk_dump_rq_flags()
744 bio->bi_status = error; in req_bio_endio()
750 if (bio->bi_iter.bi_size != nbytes) in req_bio_endio()
751 bio->bi_status = BLK_STS_IOERR; in req_bio_endio()
753 bio->bi_iter.bi_sector = rq->__sector; in req_bio_endio()
758 if (unlikely(rq->rq_flags & RQF_QUIET)) in req_bio_endio()
761 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) in req_bio_endio()
767 if (req->part && blk_do_io_stat(req)) { in blk_account_io_completion()
771 part_stat_add(req->part, sectors[sgrp], bytes >> 9); in blk_account_io_completion()
782 req->q->disk ? req->q->disk->disk_name : "?", in blk_print_req_error()
785 (__force u32)(req->cmd_flags & ~REQ_OP_MASK), in blk_print_req_error()
786 req->nr_phys_segments, in blk_print_req_error()
787 IOPRIO_PRIO_CLASS(req->ioprio)); in blk_print_req_error()
796 const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0; in blk_complete_request()
798 struct bio *bio = req->bio; in blk_complete_request()
807 req->q->integrity.profile->complete_fn(req, total_bytes); in blk_complete_request()
813 struct bio *next = bio->bi_next; in blk_complete_request()
819 bio->bi_iter.bi_sector = req->__sector; in blk_complete_request()
831 if (!req->end_io) { in blk_complete_request()
832 req->bio = NULL; in blk_complete_request()
833 req->__data_len = 0; in blk_complete_request()
838 * blk_update_request - Complete multiple bytes without completing the request
856 * %false - this request doesn't have any more data
857 * %true - this request has more data
866 if (!req->bio) in blk_update_request()
872 req->q->integrity.profile->complete_fn(req, nr_bytes); in blk_update_request()
876 !(req->rq_flags & RQF_QUIET)) && in blk_update_request()
877 !test_bit(GD_DEAD, &req->q->disk->state)) { in blk_update_request()
885 while (req->bio) { in blk_update_request()
886 struct bio *bio = req->bio; in blk_update_request()
887 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); in blk_update_request()
889 if (bio_bytes == bio->bi_iter.bi_size) in blk_update_request()
890 req->bio = bio->bi_next; in blk_update_request()
897 nr_bytes -= bio_bytes; in blk_update_request()
906 if (!req->bio) { in blk_update_request()
912 req->__data_len = 0; in blk_update_request()
916 req->__data_len -= total_bytes; in blk_update_request()
920 req->__sector += total_bytes >> 9; in blk_update_request()
923 if (req->rq_flags & RQF_MIXED_MERGE) { in blk_update_request()
924 req->cmd_flags &= ~REQ_FAILFAST_MASK; in blk_update_request()
925 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; in blk_update_request()
928 if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { in blk_update_request()
930 * If total number of sectors is less than the first segment in blk_update_request()
935 req->__data_len = blk_rq_cur_bytes(req); in blk_update_request()
939 req->nr_phys_segments = blk_recalc_rq_segments(req); in blk_update_request()
951 update_io_ticks(req->part, jiffies, true); in __blk_account_io_done()
952 part_stat_inc(req->part, ios[sgrp]); in __blk_account_io_done()
953 part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); in __blk_account_io_done()
964 if (blk_do_io_stat(req) && req->part && in blk_account_io_done()
965 !(req->rq_flags & RQF_FLUSH_SEQ)) in blk_account_io_done()
972 * All non-passthrough requests are created from a bio with one in __blk_account_io_start()
974 * generated by the state machine in blk-flush.c is cloned onto the in __blk_account_io_start()
975 * lower device by dm-multipath we can get here without a bio. in __blk_account_io_start()
977 if (rq->bio) in __blk_account_io_start()
978 rq->part = rq->bio->bi_bdev; in __blk_account_io_start()
980 rq->part = rq->q->disk->part0; in __blk_account_io_start()
983 update_io_ticks(rq->part, jiffies, false); in __blk_account_io_start()
995 if (rq->rq_flags & RQF_STATS) { in __blk_mq_end_request_acct()
996 blk_mq_poll_stats_start(rq->q); in __blk_mq_end_request_acct()
1009 if (rq->end_io) { in __blk_mq_end_request()
1010 rq_qos_done(rq->q, rq); in __blk_mq_end_request()
1011 if (rq->end_io(rq, error) == RQ_END_IO_FREE) in __blk_mq_end_request()
1032 struct request_queue *q = hctx->queue; in blk_mq_flush_tag_batch()
1036 * update hctx->nr_active in batch in blk_mq_flush_tag_batch()
1038 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) in blk_mq_flush_tag_batch()
1041 blk_mq_put_tags(hctx->tags, tag_array, nr_tags); in blk_mq_flush_tag_batch()
1042 percpu_ref_put_many(&q->q_usage_counter, nr_tags); in blk_mq_flush_tag_batch()
1052 if (iob->need_ts) in blk_mq_end_request_batch()
1055 while ((rq = rq_list_pop(&iob->req_list)) != NULL) { in blk_mq_end_request_batch()
1056 prefetch(rq->bio); in blk_mq_end_request_batch()
1057 prefetch(rq->rq_next); in blk_mq_end_request_batch()
1060 if (iob->need_ts) in blk_mq_end_request_batch()
1063 rq_qos_done(rq->q, rq); in blk_mq_end_request_batch()
1069 if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) in blk_mq_end_request_batch()
1072 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_end_request_batch()
1079 if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { in blk_mq_end_request_batch()
1083 cur_hctx = rq->mq_hctx; in blk_mq_end_request_batch()
1085 tags[nr_tags++] = rq->tag; in blk_mq_end_request_batch()
1099 rq->q->mq_ops->complete(rq); in blk_complete_reqs()
1123 !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) in blk_mq_complete_need_ipi()
1135 if (cpu == rq->mq_ctx->cpu || in blk_mq_complete_need_ipi()
1136 (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) && in blk_mq_complete_need_ipi()
1137 cpus_share_cache(cpu, rq->mq_ctx->cpu))) in blk_mq_complete_need_ipi()
1141 return cpu_online(rq->mq_ctx->cpu); in blk_mq_complete_need_ipi()
1149 cpu = rq->mq_ctx->cpu; in blk_mq_complete_send_ipi()
1151 if (llist_add(&rq->ipi_list, list)) { in blk_mq_complete_send_ipi()
1152 INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); in blk_mq_complete_send_ipi()
1153 smp_call_function_single_async(cpu, &rq->csd); in blk_mq_complete_send_ipi()
1163 if (llist_add(&rq->ipi_list, list)) in blk_mq_raise_softirq()
1170 WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); in blk_mq_complete_request_remote()
1177 if (rq->mq_hctx->nr_ctx == 1 || in blk_mq_complete_request_remote()
1178 rq->cmd_flags & REQ_POLLED) in blk_mq_complete_request_remote()
1186 if (rq->q->nr_hw_queues == 1) { in blk_mq_complete_request_remote()
1195 * blk_mq_complete_request - end I/O on a request
1199 * Complete a request by scheduling the ->complete_rq operation.
1204 rq->q->mq_ops->complete(rq); in blk_mq_complete_request()
1209 * blk_mq_start_request - Start processing a request
1218 struct request_queue *q = rq->q; in blk_mq_start_request()
1222 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { in blk_mq_start_request()
1223 rq->io_start_time_ns = ktime_get_ns(); in blk_mq_start_request()
1224 rq->stats_sectors = blk_rq_sectors(rq); in blk_mq_start_request()
1225 rq->rq_flags |= RQF_STATS; in blk_mq_start_request()
1232 WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); in blk_mq_start_request()
1236 q->integrity.profile->prepare_fn(rq); in blk_mq_start_request()
1238 if (rq->bio && rq->bio->bi_opf & REQ_POLLED) in blk_mq_start_request()
1239 WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq)); in blk_mq_start_request()
1250 if (plug->multiple_queues) in blk_plug_max_rq_count()
1257 struct request *last = rq_list_peek(&plug->mq_list); in blk_add_rq_to_plug()
1259 if (!plug->rq_count) { in blk_add_rq_to_plug()
1260 trace_block_plug(rq->q); in blk_add_rq_to_plug()
1261 } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || in blk_add_rq_to_plug()
1262 (!blk_queue_nomerges(rq->q) && in blk_add_rq_to_plug()
1266 trace_block_plug(rq->q); in blk_add_rq_to_plug()
1269 if (!plug->multiple_queues && last && last->q != rq->q) in blk_add_rq_to_plug()
1270 plug->multiple_queues = true; in blk_add_rq_to_plug()
1271 if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) in blk_add_rq_to_plug()
1272 plug->has_elevator = true; in blk_add_rq_to_plug()
1273 rq->rq_next = NULL; in blk_add_rq_to_plug()
1274 rq_list_add(&plug->mq_list, rq); in blk_add_rq_to_plug()
1275 plug->rq_count++; in blk_add_rq_to_plug()
1279 * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
1302 if (current->plug) in blk_execute_rq_nowait()
1303 blk_add_rq_to_plug(current->plug, rq); in blk_execute_rq_nowait()
1316 struct blk_rq_wait *wait = rq->end_io_data; in blk_end_sync_rq()
1318 wait->ret = ret; in blk_end_sync_rq()
1319 complete(&wait->done); in blk_end_sync_rq()
1325 if (!rq->mq_hctx) in blk_rq_is_poll()
1327 if (rq->mq_hctx->type != HCTX_TYPE_POLL) in blk_rq_is_poll()
1329 if (WARN_ON_ONCE(!rq->bio)) in blk_rq_is_poll()
1338 bio_poll(rq->bio, NULL, 0); in blk_rq_poll_completion()
1344 * blk_execute_rq - insert a request into queue for execution
1362 rq->end_io_data = &wait; in blk_execute_rq()
1363 rq->end_io = blk_end_sync_rq; in blk_execute_rq()
1391 struct request_queue *q = rq->q; in __blk_mq_requeue_request()
1399 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in __blk_mq_requeue_request()
1400 rq->rq_flags &= ~RQF_TIMED_OUT; in __blk_mq_requeue_request()
1408 /* this request will be re-inserted to io scheduler queue */ in blk_mq_requeue_request()
1422 spin_lock_irq(&q->requeue_lock); in blk_mq_requeue_work()
1423 list_splice_init(&q->requeue_list, &rq_list); in blk_mq_requeue_work()
1424 spin_unlock_irq(&q->requeue_lock); in blk_mq_requeue_work()
1427 if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP))) in blk_mq_requeue_work()
1430 rq->rq_flags &= ~RQF_SOFTBARRIER; in blk_mq_requeue_work()
1431 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1437 if (rq->rq_flags & RQF_DONTPREP) in blk_mq_requeue_work()
1445 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1455 struct request_queue *q = rq->q; in blk_mq_add_to_requeue_list()
1462 BUG_ON(rq->rq_flags & RQF_SOFTBARRIER); in blk_mq_add_to_requeue_list()
1464 spin_lock_irqsave(&q->requeue_lock, flags); in blk_mq_add_to_requeue_list()
1466 rq->rq_flags |= RQF_SOFTBARRIER; in blk_mq_add_to_requeue_list()
1467 list_add(&rq->queuelist, &q->requeue_list); in blk_mq_add_to_requeue_list()
1469 list_add_tail(&rq->queuelist, &q->requeue_list); in blk_mq_add_to_requeue_list()
1471 spin_unlock_irqrestore(&q->requeue_lock, flags); in blk_mq_add_to_requeue_list()
1479 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0); in blk_mq_kick_requeue_list()
1486 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, in blk_mq_delay_kick_requeue_list()
1519 req->rq_flags |= RQF_TIMED_OUT; in blk_mq_rq_timed_out()
1520 if (req->q->mq_ops->timeout) { in blk_mq_rq_timed_out()
1523 ret = req->q->mq_ops->timeout(req); in blk_mq_rq_timed_out()
1538 if (rq->rq_flags & RQF_TIMED_OUT) in blk_mq_req_expired()
1541 deadline = READ_ONCE(rq->deadline); in blk_mq_req_expired()
1555 if (rq->end_io(rq, 0) == RQ_END_IO_FREE) in blk_mq_put_rq_ref()
1599 if (!percpu_ref_tryget(&q->q_usage_counter)) in blk_mq_timeout_work()
1605 mod_timer(&q->timeout, next); in blk_mq_timeout_work()
1609 * we end up here it means that no requests are pending and in blk_mq_timeout_work()
1610 * also that no request has been pending for a while. Mark in blk_mq_timeout_work()
1630 struct blk_mq_hw_ctx *hctx = flush_data->hctx; in flush_busy_ctx()
1631 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; in flush_busy_ctx()
1632 enum hctx_type type = hctx->type; in flush_busy_ctx()
1634 spin_lock(&ctx->lock); in flush_busy_ctx()
1635 list_splice_tail_init(&ctx->rq_lists[type], flush_data->list); in flush_busy_ctx()
1637 spin_unlock(&ctx->lock); in flush_busy_ctx()
1643 * to the for-dispatch
1652 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); in blk_mq_flush_busy_ctxs()
1665 struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; in dispatch_rq_from_ctx()
1666 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; in dispatch_rq_from_ctx()
1667 enum hctx_type type = hctx->type; in dispatch_rq_from_ctx()
1669 spin_lock(&ctx->lock); in dispatch_rq_from_ctx()
1670 if (!list_empty(&ctx->rq_lists[type])) { in dispatch_rq_from_ctx()
1671 dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next); in dispatch_rq_from_ctx()
1672 list_del_init(&dispatch_data->rq->queuelist); in dispatch_rq_from_ctx()
1673 if (list_empty(&ctx->rq_lists[type])) in dispatch_rq_from_ctx()
1676 spin_unlock(&ctx->lock); in dispatch_rq_from_ctx()
1678 return !dispatch_data->rq; in dispatch_rq_from_ctx()
1684 unsigned off = start ? start->index_hw[hctx->type] : 0; in blk_mq_dequeue_from_ctx()
1690 __sbitmap_for_each_set(&hctx->ctx_map, off, in blk_mq_dequeue_from_ctx()
1698 struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; in __blk_mq_alloc_driver_tag()
1699 unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; in __blk_mq_alloc_driver_tag()
1702 blk_mq_tag_busy(rq->mq_hctx); in __blk_mq_alloc_driver_tag()
1704 if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { in __blk_mq_alloc_driver_tag()
1705 bt = &rq->mq_hctx->tags->breserved_tags; in __blk_mq_alloc_driver_tag()
1708 if (!hctx_may_queue(rq->mq_hctx, bt)) in __blk_mq_alloc_driver_tag()
1716 rq->tag = tag + tag_offset; in __blk_mq_alloc_driver_tag()
1722 if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) in __blk_mq_get_driver_tag()
1725 if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && in __blk_mq_get_driver_tag()
1726 !(rq->rq_flags & RQF_MQ_INFLIGHT)) { in __blk_mq_get_driver_tag()
1727 rq->rq_flags |= RQF_MQ_INFLIGHT; in __blk_mq_get_driver_tag()
1730 hctx->tags->rqs[rq->tag] = rq; in __blk_mq_get_driver_tag()
1741 spin_lock(&hctx->dispatch_wait_lock); in blk_mq_dispatch_wake()
1742 if (!list_empty(&wait->entry)) { in blk_mq_dispatch_wake()
1745 list_del_init(&wait->entry); in blk_mq_dispatch_wake()
1746 sbq = &hctx->tags->bitmap_tags; in blk_mq_dispatch_wake()
1747 atomic_dec(&sbq->ws_active); in blk_mq_dispatch_wake()
1749 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_dispatch_wake()
1757 * the tag wakeups. For non-shared tags, we can simply mark us needing a
1764 struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; in blk_mq_mark_tag_wait()
1769 if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { in blk_mq_mark_tag_wait()
1783 wait = &hctx->dispatch_wait; in blk_mq_mark_tag_wait()
1784 if (!list_empty_careful(&wait->entry)) in blk_mq_mark_tag_wait()
1787 wq = &bt_wait_ptr(sbq, hctx)->wait; in blk_mq_mark_tag_wait()
1789 spin_lock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1790 spin_lock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1791 if (!list_empty(&wait->entry)) { in blk_mq_mark_tag_wait()
1792 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1793 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1797 atomic_inc(&sbq->ws_active); in blk_mq_mark_tag_wait()
1798 wait->flags &= ~WQ_FLAG_EXCLUSIVE; in blk_mq_mark_tag_wait()
1808 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1809 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1817 list_del_init(&wait->entry); in blk_mq_mark_tag_wait()
1818 atomic_dec(&sbq->ws_active); in blk_mq_mark_tag_wait()
1819 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1820 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1829 * - EWMA is one simple way to compute running average value
1830 * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1831 * - take 4 as factor for avoiding to get too small(0) result, and this
1838 ewma = hctx->dispatch_busy; in blk_mq_update_dispatch_busy()
1843 ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1; in blk_mq_update_dispatch_busy()
1848 hctx->dispatch_busy = ewma; in blk_mq_update_dispatch_busy()
1866 list_add(&rq->queuelist, list); in blk_mq_handle_dev_resource()
1875 * specific zone due to LLD level zone-write locking or other zone in blk_mq_handle_zone_resource()
1879 list_add(&rq->queuelist, zone_list); in blk_mq_handle_zone_resource()
1892 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_prep_dispatch_rq()
1893 int budget_token = -1; in blk_mq_prep_dispatch_rq()
1896 budget_token = blk_mq_get_dispatch_budget(rq->q); in blk_mq_prep_dispatch_rq()
1910 * we'll re-run it below. in blk_mq_prep_dispatch_rq()
1918 blk_mq_put_dispatch_budget(rq->q, budget_token); in blk_mq_prep_dispatch_rq()
1947 struct request_queue *q = hctx->queue; in blk_mq_dispatch_rq_list()
1966 WARN_ON_ONCE(hctx != rq->mq_hctx); in blk_mq_dispatch_rq_list()
1971 list_del_init(&rq->queuelist); in blk_mq_dispatch_rq_list()
1976 * Flag last if we have no more requests, or if we have more in blk_mq_dispatch_rq_list()
1987 * once the request is queued to lld, no need to cover the in blk_mq_dispatch_rq_list()
1991 nr_budgets--; in blk_mq_dispatch_rq_list()
1992 ret = q->mq_ops->queue_rq(hctx, &bd); in blk_mq_dispatch_rq_list()
2025 ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued) in blk_mq_dispatch_rq_list()
2026 q->mq_ops->commit_rqs(hctx); in blk_mq_dispatch_rq_list()
2028 * Any items that need requeuing? Stuff them into hctx->dispatch, in blk_mq_dispatch_rq_list()
2033 /* For non-shared tags, the RESTART check will suffice */ in blk_mq_dispatch_rq_list()
2035 (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); in blk_mq_dispatch_rq_list()
2040 spin_lock(&hctx->lock); in blk_mq_dispatch_rq_list()
2041 list_splice_tail_init(list, &hctx->dispatch); in blk_mq_dispatch_rq_list()
2042 spin_unlock(&hctx->lock); in blk_mq_dispatch_rq_list()
2045 * Order adding requests to hctx->dispatch and checking in blk_mq_dispatch_rq_list()
2048 * miss the new added requests to hctx->dispatch, meantime in blk_mq_dispatch_rq_list()
2055 * it is no longer set that means that it was cleared by another in blk_mq_dispatch_rq_list()
2060 * waitqueue is no longer active, ensure that we run the queue in blk_mq_dispatch_rq_list()
2063 * If no I/O scheduler has been configured it is possible that in blk_mq_dispatch_rq_list()
2067 * - blk_mq_run_hw_queue() checks whether or not a queue has in blk_mq_dispatch_rq_list()
2069 * - Some but not all block drivers stop a queue before in blk_mq_dispatch_rq_list()
2070 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq in blk_mq_dispatch_rq_list()
2071 * and dm-rq. in blk_mq_dispatch_rq_list()
2083 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) in blk_mq_dispatch_rq_list()
2097 * __blk_mq_run_hw_queue - Run a hardware queue.
2110 blk_mq_run_dispatch_ops(hctx->queue, in __blk_mq_run_hw_queue()
2116 int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); in blk_mq_first_mapped_cpu()
2119 cpu = cpumask_first(hctx->cpumask); in blk_mq_first_mapped_cpu()
2126 * For now we just round-robin here, switching for every
2132 int next_cpu = hctx->next_cpu; in blk_mq_hctx_next_cpu()
2134 if (hctx->queue->nr_hw_queues == 1) in blk_mq_hctx_next_cpu()
2137 if (--hctx->next_cpu_batch <= 0) { in blk_mq_hctx_next_cpu()
2139 next_cpu = cpumask_next_and(next_cpu, hctx->cpumask, in blk_mq_hctx_next_cpu()
2143 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; in blk_mq_hctx_next_cpu()
2157 * Make sure to re-select CPU next time once after CPUs in blk_mq_hctx_next_cpu()
2158 * in hctx->cpumask become online again. in blk_mq_hctx_next_cpu()
2160 hctx->next_cpu = next_cpu; in blk_mq_hctx_next_cpu()
2161 hctx->next_cpu_batch = 1; in blk_mq_hctx_next_cpu()
2165 hctx->next_cpu = next_cpu; in blk_mq_hctx_next_cpu()
2170 * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
2184 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { in __blk_mq_delay_run_hw_queue()
2185 if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { in __blk_mq_delay_run_hw_queue()
2191 kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, in __blk_mq_delay_run_hw_queue()
2196 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
2209 * blk_mq_run_hw_queue - Start to run a hardware queue.
2229 __blk_mq_run_dispatch_ops(hctx->queue, false, in blk_mq_run_hw_queue()
2230 need_run = !blk_queue_quiesced(hctx->queue) && in blk_mq_run_hw_queue()
2239 * Return prefered queue to dispatch from (if any) for non-mq aware IO
2252 struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT]; in blk_mq_get_sq_hctx()
2260 * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
2276 * Dispatch from this hctx either if there's no hctx preferred in blk_mq_run_hw_queues()
2281 !list_empty_careful(&hctx->dispatch)) in blk_mq_run_hw_queues()
2288 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
2306 * if another hctx is re-delaying the other's work in blk_mq_delay_run_hw_queues()
2309 if (delayed_work_pending(&hctx->run_work)) in blk_mq_delay_run_hw_queues()
2312 * Dispatch from this hctx either if there's no hctx preferred in blk_mq_delay_run_hw_queues()
2317 !list_empty_careful(&hctx->dispatch)) in blk_mq_delay_run_hw_queues()
2334 cancel_delayed_work(&hctx->run_work); in blk_mq_stop_hw_queue()
2336 set_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_stop_hw_queue()
2361 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_start_hw_queue()
2382 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_start_stopped_hw_queue()
2416 struct blk_mq_ctx *ctx = rq->mq_ctx; in __blk_mq_insert_req_list()
2417 enum hctx_type type = hctx->type; in __blk_mq_insert_req_list()
2419 lockdep_assert_held(&ctx->lock); in __blk_mq_insert_req_list()
2424 list_add(&rq->queuelist, &ctx->rq_lists[type]); in __blk_mq_insert_req_list()
2426 list_add_tail(&rq->queuelist, &ctx->rq_lists[type]); in __blk_mq_insert_req_list()
2432 struct blk_mq_ctx *ctx = rq->mq_ctx; in __blk_mq_insert_request()
2434 lockdep_assert_held(&ctx->lock); in __blk_mq_insert_request()
2441 * blk_mq_request_bypass_insert - Insert a request at dispatch list.
2452 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_request_bypass_insert()
2454 spin_lock(&hctx->lock); in blk_mq_request_bypass_insert()
2456 list_add(&rq->queuelist, &hctx->dispatch); in blk_mq_request_bypass_insert()
2458 list_add_tail(&rq->queuelist, &hctx->dispatch); in blk_mq_request_bypass_insert()
2459 spin_unlock(&hctx->lock); in blk_mq_request_bypass_insert()
2470 enum hctx_type type = hctx->type; in blk_mq_insert_requests()
2473 * preemption doesn't flush plug list, so it's possible ctx->cpu is in blk_mq_insert_requests()
2477 BUG_ON(rq->mq_ctx != ctx); in blk_mq_insert_requests()
2481 spin_lock(&ctx->lock); in blk_mq_insert_requests()
2482 list_splice_tail_init(list, &ctx->rq_lists[type]); in blk_mq_insert_requests()
2484 spin_unlock(&ctx->lock); in blk_mq_insert_requests()
2490 if (hctx->queue->mq_ops->commit_rqs) { in blk_mq_commit_rqs()
2491 trace_block_unplug(hctx->queue, *queued, !from_schedule); in blk_mq_commit_rqs()
2492 hctx->queue->mq_ops->commit_rqs(hctx); in blk_mq_commit_rqs()
2502 if (bio->bi_opf & REQ_RAHEAD) in blk_mq_bio_to_request()
2503 rq->cmd_flags |= REQ_FAILFAST_MASK; in blk_mq_bio_to_request()
2505 rq->__sector = bio->bi_iter.bi_sector; in blk_mq_bio_to_request()
2518 struct request_queue *q = rq->q; in __blk_mq_issue_directly()
2530 ret = q->mq_ops->queue_rq(hctx, &bd); in __blk_mq_issue_directly()
2552 struct request_queue *q = rq->q; in __blk_mq_try_issue_directly()
2569 if ((rq->rq_flags & RQF_ELV) && !bypass_insert) in __blk_mq_try_issue_directly()
2594 * blk_mq_try_issue_directly - Try to send a request directly to device driver.
2599 * request directly to device driver. Else, insert at hctx->dispatch queue, so
2617 return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last); in blk_mq_request_issue_directly()
2627 while ((rq = rq_list_pop(&plug->mq_list))) { in blk_mq_plug_issue_direct()
2628 bool last = rq_list_empty(plug->mq_list); in blk_mq_plug_issue_direct()
2631 if (hctx != rq->mq_hctx) { in blk_mq_plug_issue_direct()
2634 hctx = rq->mq_hctx; in blk_mq_plug_issue_direct()
2667 q->mq_ops->queue_rqs(&plug->mq_list); in __blk_mq_flush_plug_list()
2679 struct request *rq = rq_list_pop(&plug->mq_list); in blk_mq_dispatch_plug_list()
2682 this_hctx = rq->mq_hctx; in blk_mq_dispatch_plug_list()
2683 this_ctx = rq->mq_ctx; in blk_mq_dispatch_plug_list()
2684 } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { in blk_mq_dispatch_plug_list()
2688 list_add_tail(&rq->queuelist, &list); in blk_mq_dispatch_plug_list()
2690 } while (!rq_list_empty(plug->mq_list)); in blk_mq_dispatch_plug_list()
2692 plug->mq_list = requeue_list; in blk_mq_dispatch_plug_list()
2693 trace_block_unplug(this_hctx->queue, depth, !from_sched); in blk_mq_dispatch_plug_list()
2701 if (rq_list_empty(plug->mq_list)) in blk_mq_flush_plug_list()
2703 plug->rq_count = 0; in blk_mq_flush_plug_list()
2705 if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { in blk_mq_flush_plug_list()
2708 rq = rq_list_peek(&plug->mq_list); in blk_mq_flush_plug_list()
2709 q = rq->q; in blk_mq_flush_plug_list()
2712 * Peek first request and see if we have a ->queue_rqs() hook. in blk_mq_flush_plug_list()
2721 if (q->mq_ops->queue_rqs && in blk_mq_flush_plug_list()
2722 !(rq->mq_hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { in blk_mq_flush_plug_list()
2725 if (rq_list_empty(plug->mq_list)) in blk_mq_flush_plug_list()
2731 if (rq_list_empty(plug->mq_list)) in blk_mq_flush_plug_list()
2737 } while (!rq_list_empty(plug->mq_list)); in blk_mq_flush_plug_list()
2751 list_del_init(&rq->queuelist); in blk_mq_try_issue_list_directly()
2772 hctx->queue->mq_ops->commit_rqs && queued) in blk_mq_try_issue_list_directly()
2773 hctx->queue->mq_ops->commit_rqs(hctx); in blk_mq_try_issue_list_directly()
2796 .cmd_flags = bio->bi_opf, in blk_mq_get_new_requests()
2809 data.nr_tags = plug->nr_ios; in blk_mq_get_new_requests()
2810 plug->nr_ios = 1; in blk_mq_get_new_requests()
2811 data.cached_rq = &plug->cached_rq; in blk_mq_get_new_requests()
2818 if (bio->bi_opf & REQ_NOWAIT) in blk_mq_get_new_requests()
2832 rq = rq_list_peek(&plug->cached_rq); in blk_mq_get_cached_request()
2833 if (!rq || rq->q != q) in blk_mq_get_cached_request()
2841 if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type) in blk_mq_get_cached_request()
2843 if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) in blk_mq_get_cached_request()
2847 * If any qos ->throttle() end up blocking, we will have flushed the in blk_mq_get_cached_request()
2851 plug->cached_rq = rq_list_next(rq); in blk_mq_get_cached_request()
2854 rq->cmd_flags = (*bio)->bi_opf; in blk_mq_get_cached_request()
2855 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_get_cached_request()
2862 if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) in bio_set_ioprio()
2863 bio->bi_ioprio = get_current_ioprio(); in bio_set_ioprio()
2868 * blk_mq_submit_bio - Create and send a request to block device.
2882 struct request_queue *q = bdev_get_queue(bio->bi_bdev); in blk_mq_submit_bio()
2884 const int is_sync = op_is_sync(bio->bi_opf); in blk_mq_submit_bio()
2890 if (bio_may_exceed_limits(bio, &q->limits)) in blk_mq_submit_bio()
2891 bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); in blk_mq_submit_bio()
2915 bio->bi_status = ret; in blk_mq_submit_bio()
2921 if (op_is_flush(bio->bi_opf)) { in blk_mq_submit_bio()
2928 else if ((rq->rq_flags & RQF_ELV) || in blk_mq_submit_bio()
2929 (rq->mq_hctx->dispatch_busy && in blk_mq_submit_bio()
2930 (q->nr_hw_queues == 1 || !is_sync))) in blk_mq_submit_bio()
2933 blk_mq_run_dispatch_ops(rq->q, in blk_mq_submit_bio()
2934 blk_mq_try_issue_directly(rq->mq_hctx, rq)); in blk_mq_submit_bio()
2939 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
2944 struct request_queue *q = rq->q; in blk_insert_cloned_request()
2952 * a non-read/write command (discard, write same,etc.) the in blk_insert_cloned_request()
2953 * low-level device driver will set the relevant queue limit to in blk_insert_cloned_request()
2954 * 0 to prevent blk-lib from issuing more of the offending in blk_insert_cloned_request()
2968 * The queue settings related to segment counting may differ from the in blk_insert_cloned_request()
2971 rq->nr_phys_segments = blk_recalc_rq_segments(rq); in blk_insert_cloned_request()
2972 if (rq->nr_phys_segments > queue_max_segments(q)) { in blk_insert_cloned_request()
2974 __func__, rq->nr_phys_segments, queue_max_segments(q)); in blk_insert_cloned_request()
2978 if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) in blk_insert_cloned_request()
3000 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
3010 while ((bio = rq->bio) != NULL) { in blk_rq_unprep_clone()
3011 rq->bio = bio->bi_next; in blk_rq_unprep_clone()
3019 * blk_rq_prep_clone - Helper function to setup clone request
3046 bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask, in blk_rq_prep_clone()
3054 if (rq->bio) { in blk_rq_prep_clone()
3055 rq->biotail->bi_next = bio; in blk_rq_prep_clone()
3056 rq->biotail = bio; in blk_rq_prep_clone()
3058 rq->bio = rq->biotail = bio; in blk_rq_prep_clone()
3064 rq->__sector = blk_rq_pos(rq_src); in blk_rq_prep_clone()
3065 rq->__data_len = blk_rq_bytes(rq_src); in blk_rq_prep_clone()
3066 if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { in blk_rq_prep_clone()
3067 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; in blk_rq_prep_clone()
3068 rq->special_vec = rq_src->special_vec; in blk_rq_prep_clone()
3070 rq->nr_phys_segments = rq_src->nr_phys_segments; in blk_rq_prep_clone()
3071 rq->ioprio = rq_src->ioprio; in blk_rq_prep_clone()
3073 if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) in blk_rq_prep_clone()
3083 return -ENOMEM; in blk_rq_prep_clone()
3094 if (rq->bio) { in blk_steal_bios()
3095 if (list->tail) in blk_steal_bios()
3096 list->tail->bi_next = rq->bio; in blk_steal_bios()
3098 list->head = rq->bio; in blk_steal_bios()
3099 list->tail = rq->biotail; in blk_steal_bios()
3101 rq->bio = NULL; in blk_steal_bios()
3102 rq->biotail = NULL; in blk_steal_bios()
3105 rq->__data_len = 0; in blk_steal_bios()
3122 * There is no need to clear mapping if driver tags is not initialized in blk_mq_clear_rq_mapping()
3128 list_for_each_entry(page, &tags->page_list, lru) { in blk_mq_clear_rq_mapping()
3130 unsigned long end = start + order_to_size(page->private); in blk_mq_clear_rq_mapping()
3133 for (i = 0; i < drv_tags->nr_tags; i++) { in blk_mq_clear_rq_mapping()
3134 struct request *rq = drv_tags->rqs[i]; in blk_mq_clear_rq_mapping()
3139 cmpxchg(&drv_tags->rqs[i], rq, NULL); in blk_mq_clear_rq_mapping()
3148 * after the ->lock is released. in blk_mq_clear_rq_mapping()
3150 spin_lock_irqsave(&drv_tags->lock, flags); in blk_mq_clear_rq_mapping()
3151 spin_unlock_irqrestore(&drv_tags->lock, flags); in blk_mq_clear_rq_mapping()
3160 if (list_empty(&tags->page_list)) in blk_mq_free_rqs()
3163 if (blk_mq_is_shared_tags(set->flags)) in blk_mq_free_rqs()
3164 drv_tags = set->shared_tags; in blk_mq_free_rqs()
3166 drv_tags = set->tags[hctx_idx]; in blk_mq_free_rqs()
3168 if (tags->static_rqs && set->ops->exit_request) { in blk_mq_free_rqs()
3171 for (i = 0; i < tags->nr_tags; i++) { in blk_mq_free_rqs()
3172 struct request *rq = tags->static_rqs[i]; in blk_mq_free_rqs()
3176 set->ops->exit_request(set, rq, hctx_idx); in blk_mq_free_rqs()
3177 tags->static_rqs[i] = NULL; in blk_mq_free_rqs()
3183 while (!list_empty(&tags->page_list)) { in blk_mq_free_rqs()
3184 page = list_first_entry(&tags->page_list, struct page, lru); in blk_mq_free_rqs()
3185 list_del_init(&page->lru); in blk_mq_free_rqs()
3191 __free_pages(page, page->private); in blk_mq_free_rqs()
3197 kfree(tags->rqs); in blk_mq_free_rq_map()
3198 tags->rqs = NULL; in blk_mq_free_rq_map()
3199 kfree(tags->static_rqs); in blk_mq_free_rq_map()
3200 tags->static_rqs = NULL; in blk_mq_free_rq_map()
3210 for (i = 0; i < set->nr_maps; i++) { in hctx_idx_to_type()
3211 unsigned int start = set->map[i].queue_offset; in hctx_idx_to_type()
3212 unsigned int end = start + set->map[i].nr_queues; in hctx_idx_to_type()
3218 if (i >= set->nr_maps) in hctx_idx_to_type()
3229 return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx); in blk_mq_get_hctx_node()
3241 node = set->numa_node; in blk_mq_alloc_rq_map()
3244 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); in blk_mq_alloc_rq_map()
3248 tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *), in blk_mq_alloc_rq_map()
3251 if (!tags->rqs) { in blk_mq_alloc_rq_map()
3256 tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *), in blk_mq_alloc_rq_map()
3259 if (!tags->static_rqs) { in blk_mq_alloc_rq_map()
3260 kfree(tags->rqs); in blk_mq_alloc_rq_map()
3273 if (set->ops->init_request) { in blk_mq_init_request()
3274 ret = set->ops->init_request(set, rq, hctx_idx, node); in blk_mq_init_request()
3279 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_init_request()
3292 node = set->numa_node; in blk_mq_alloc_rqs()
3294 INIT_LIST_HEAD(&tags->page_list); in blk_mq_alloc_rqs()
3300 rq_size = round_up(sizeof(struct request) + set->cmd_size, in blk_mq_alloc_rqs()
3310 while (this_order && left < order_to_size(this_order - 1)) in blk_mq_alloc_rqs()
3311 this_order--; in blk_mq_alloc_rqs()
3319 if (!this_order--) in blk_mq_alloc_rqs()
3328 page->private = this_order; in blk_mq_alloc_rqs()
3329 list_add_tail(&page->lru, &tags->page_list); in blk_mq_alloc_rqs()
3334 * to additional allocations like via ops->init_request(). in blk_mq_alloc_rqs()
3338 to_do = min(entries_per_page, depth - i); in blk_mq_alloc_rqs()
3339 left -= to_do * rq_size; in blk_mq_alloc_rqs()
3343 tags->static_rqs[i] = rq; in blk_mq_alloc_rqs()
3345 tags->static_rqs[i] = NULL; in blk_mq_alloc_rqs()
3357 return -ENOMEM; in blk_mq_alloc_rqs()
3369 if (rq->mq_hctx != iter_data->hctx) in blk_mq_has_request()
3371 iter_data->has_rq = true; in blk_mq_has_request()
3377 struct blk_mq_tags *tags = hctx->sched_tags ? in blk_mq_hctx_has_requests()
3378 hctx->sched_tags : hctx->tags; in blk_mq_hctx_has_requests()
3390 if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu) in blk_mq_last_cpu_in_hctx()
3392 if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids) in blk_mq_last_cpu_in_hctx()
3402 if (!cpumask_test_cpu(cpu, hctx->cpumask) || in blk_mq_hctx_notify_offline()
3413 set_bit(BLK_MQ_S_INACTIVE, &hctx->state); in blk_mq_hctx_notify_offline()
3419 * frozen and there are no requests. in blk_mq_hctx_notify_offline()
3421 if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) { in blk_mq_hctx_notify_offline()
3424 percpu_ref_put(&hctx->queue->q_usage_counter); in blk_mq_hctx_notify_offline()
3435 if (cpumask_test_cpu(cpu, hctx->cpumask)) in blk_mq_hctx_notify_online()
3436 clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); in blk_mq_hctx_notify_online()
3453 if (!cpumask_test_cpu(cpu, hctx->cpumask)) in blk_mq_hctx_notify_dead()
3456 ctx = __blk_mq_get_ctx(hctx->queue, cpu); in blk_mq_hctx_notify_dead()
3457 type = hctx->type; in blk_mq_hctx_notify_dead()
3459 spin_lock(&ctx->lock); in blk_mq_hctx_notify_dead()
3460 if (!list_empty(&ctx->rq_lists[type])) { in blk_mq_hctx_notify_dead()
3461 list_splice_init(&ctx->rq_lists[type], &tmp); in blk_mq_hctx_notify_dead()
3464 spin_unlock(&ctx->lock); in blk_mq_hctx_notify_dead()
3469 spin_lock(&hctx->lock); in blk_mq_hctx_notify_dead()
3470 list_splice_tail_init(&tmp, &hctx->dispatch); in blk_mq_hctx_notify_dead()
3471 spin_unlock(&hctx->lock); in blk_mq_hctx_notify_dead()
3479 if (!(hctx->flags & BLK_MQ_F_STACKING)) in blk_mq_remove_cpuhp()
3481 &hctx->cpuhp_online); in blk_mq_remove_cpuhp()
3483 &hctx->cpuhp_dead); in blk_mq_remove_cpuhp()
3488 * tags->rqs[] for avoiding potential UAF.
3503 cmpxchg(&tags->rqs[i], flush_rq, NULL); in blk_mq_clear_flush_rq_mapping()
3509 * after the ->lock is released. in blk_mq_clear_flush_rq_mapping()
3511 spin_lock_irqsave(&tags->lock, flags); in blk_mq_clear_flush_rq_mapping()
3512 spin_unlock_irqrestore(&tags->lock, flags); in blk_mq_clear_flush_rq_mapping()
3515 /* hctx->ctxs will be freed in queue's release handler */
3520 struct request *flush_rq = hctx->fq->flush_rq; in blk_mq_exit_hctx()
3526 blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], in blk_mq_exit_hctx()
3527 set->queue_depth, flush_rq); in blk_mq_exit_hctx()
3528 if (set->ops->exit_request) in blk_mq_exit_hctx()
3529 set->ops->exit_request(set, flush_rq, hctx_idx); in blk_mq_exit_hctx()
3531 if (set->ops->exit_hctx) in blk_mq_exit_hctx()
3532 set->ops->exit_hctx(hctx, hctx_idx); in blk_mq_exit_hctx()
3536 xa_erase(&q->hctx_table, hctx_idx); in blk_mq_exit_hctx()
3538 spin_lock(&q->unused_hctx_lock); in blk_mq_exit_hctx()
3539 list_add(&hctx->hctx_list, &q->unused_hctx_list); in blk_mq_exit_hctx()
3540 spin_unlock(&q->unused_hctx_lock); in blk_mq_exit_hctx()
3560 hctx->queue_num = hctx_idx; in blk_mq_init_hctx()
3562 if (!(hctx->flags & BLK_MQ_F_STACKING)) in blk_mq_init_hctx()
3564 &hctx->cpuhp_online); in blk_mq_init_hctx()
3565 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); in blk_mq_init_hctx()
3567 hctx->tags = set->tags[hctx_idx]; in blk_mq_init_hctx()
3569 if (set->ops->init_hctx && in blk_mq_init_hctx()
3570 set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) in blk_mq_init_hctx()
3573 if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, in blk_mq_init_hctx()
3574 hctx->numa_node)) in blk_mq_init_hctx()
3577 if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) in blk_mq_init_hctx()
3583 if (set->ops->exit_request) in blk_mq_init_hctx()
3584 set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); in blk_mq_init_hctx()
3586 if (set->ops->exit_hctx) in blk_mq_init_hctx()
3587 set->ops->exit_hctx(hctx, hctx_idx); in blk_mq_init_hctx()
3590 return -1; in blk_mq_init_hctx()
3604 if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) in blk_mq_alloc_hctx()
3607 atomic_set(&hctx->nr_active, 0); in blk_mq_alloc_hctx()
3609 node = set->numa_node; in blk_mq_alloc_hctx()
3610 hctx->numa_node = node; in blk_mq_alloc_hctx()
3612 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); in blk_mq_alloc_hctx()
3613 spin_lock_init(&hctx->lock); in blk_mq_alloc_hctx()
3614 INIT_LIST_HEAD(&hctx->dispatch); in blk_mq_alloc_hctx()
3615 hctx->queue = q; in blk_mq_alloc_hctx()
3616 hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_alloc_hctx()
3618 INIT_LIST_HEAD(&hctx->hctx_list); in blk_mq_alloc_hctx()
3624 hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), in blk_mq_alloc_hctx()
3626 if (!hctx->ctxs) in blk_mq_alloc_hctx()
3629 if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), in blk_mq_alloc_hctx()
3632 hctx->nr_ctx = 0; in blk_mq_alloc_hctx()
3634 spin_lock_init(&hctx->dispatch_wait_lock); in blk_mq_alloc_hctx()
3635 init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); in blk_mq_alloc_hctx()
3636 INIT_LIST_HEAD(&hctx->dispatch_wait.entry); in blk_mq_alloc_hctx()
3638 hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); in blk_mq_alloc_hctx()
3639 if (!hctx->fq) in blk_mq_alloc_hctx()
3647 sbitmap_free(&hctx->ctx_map); in blk_mq_alloc_hctx()
3649 kfree(hctx->ctxs); in blk_mq_alloc_hctx()
3651 free_cpumask_var(hctx->cpumask); in blk_mq_alloc_hctx()
3661 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_init_cpu_queues()
3665 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); in blk_mq_init_cpu_queues()
3669 __ctx->cpu = i; in blk_mq_init_cpu_queues()
3670 spin_lock_init(&__ctx->lock); in blk_mq_init_cpu_queues()
3672 INIT_LIST_HEAD(&__ctx->rq_lists[k]); in blk_mq_init_cpu_queues()
3674 __ctx->queue = q; in blk_mq_init_cpu_queues()
3680 for (j = 0; j < set->nr_maps; j++) { in blk_mq_init_cpu_queues()
3682 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) in blk_mq_init_cpu_queues()
3683 hctx->numa_node = cpu_to_node(i); in blk_mq_init_cpu_queues()
3695 tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags); in blk_mq_alloc_map_and_rqs()
3711 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_map_and_rqs()
3712 set->tags[hctx_idx] = set->shared_tags; in __blk_mq_alloc_map_and_rqs()
3717 set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx, in __blk_mq_alloc_map_and_rqs()
3718 set->queue_depth); in __blk_mq_alloc_map_and_rqs()
3720 return set->tags[hctx_idx]; in __blk_mq_alloc_map_and_rqs()
3736 if (!blk_mq_is_shared_tags(set->flags)) in __blk_mq_free_map_and_rqs()
3737 blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx); in __blk_mq_free_map_and_rqs()
3739 set->tags[hctx_idx] = NULL; in __blk_mq_free_map_and_rqs()
3748 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_map_swqueue()
3751 cpumask_clear(hctx->cpumask); in blk_mq_map_swqueue()
3752 hctx->nr_ctx = 0; in blk_mq_map_swqueue()
3753 hctx->dispatch_from = NULL; in blk_mq_map_swqueue()
3763 ctx = per_cpu_ptr(q->queue_ctx, i); in blk_mq_map_swqueue()
3764 for (j = 0; j < set->nr_maps; j++) { in blk_mq_map_swqueue()
3765 if (!set->map[j].nr_queues) { in blk_mq_map_swqueue()
3766 ctx->hctxs[j] = blk_mq_map_queue_type(q, in blk_mq_map_swqueue()
3770 hctx_idx = set->map[j].mq_map[i]; in blk_mq_map_swqueue()
3772 if (!set->tags[hctx_idx] && in blk_mq_map_swqueue()
3777 * case, remap the current ctx to hctx[0] which in blk_mq_map_swqueue()
3780 set->map[j].mq_map[i] = 0; in blk_mq_map_swqueue()
3784 ctx->hctxs[j] = hctx; in blk_mq_map_swqueue()
3790 if (cpumask_test_cpu(i, hctx->cpumask)) in blk_mq_map_swqueue()
3793 cpumask_set_cpu(i, hctx->cpumask); in blk_mq_map_swqueue()
3794 hctx->type = j; in blk_mq_map_swqueue()
3795 ctx->index_hw[hctx->type] = hctx->nr_ctx; in blk_mq_map_swqueue()
3796 hctx->ctxs[hctx->nr_ctx++] = ctx; in blk_mq_map_swqueue()
3802 BUG_ON(!hctx->nr_ctx); in blk_mq_map_swqueue()
3806 ctx->hctxs[j] = blk_mq_map_queue_type(q, in blk_mq_map_swqueue()
3812 * If no software queues are mapped to this hardware queue, in blk_mq_map_swqueue()
3815 if (!hctx->nr_ctx) { in blk_mq_map_swqueue()
3817 * fallback in case of a new remap fails in blk_mq_map_swqueue()
3823 hctx->tags = NULL; in blk_mq_map_swqueue()
3827 hctx->tags = set->tags[i]; in blk_mq_map_swqueue()
3828 WARN_ON(!hctx->tags); in blk_mq_map_swqueue()
3835 sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx); in blk_mq_map_swqueue()
3840 hctx->next_cpu = blk_mq_first_mapped_cpu(hctx); in blk_mq_map_swqueue()
3841 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; in blk_mq_map_swqueue()
3856 hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; in queue_set_hctx_shared()
3859 hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; in queue_set_hctx_shared()
3869 lockdep_assert_held(&set->tag_list_lock); in blk_mq_update_tag_set_shared()
3871 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_update_tag_set_shared()
3880 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_del_queue_tag_set()
3882 mutex_lock(&set->tag_list_lock); in blk_mq_del_queue_tag_set()
3883 list_del(&q->tag_set_list); in blk_mq_del_queue_tag_set()
3884 if (list_is_singular(&set->tag_list)) { in blk_mq_del_queue_tag_set()
3886 set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_del_queue_tag_set()
3890 mutex_unlock(&set->tag_list_lock); in blk_mq_del_queue_tag_set()
3891 INIT_LIST_HEAD(&q->tag_set_list); in blk_mq_del_queue_tag_set()
3897 mutex_lock(&set->tag_list_lock); in blk_mq_add_queue_tag_set()
3902 if (!list_empty(&set->tag_list) && in blk_mq_add_queue_tag_set()
3903 !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { in blk_mq_add_queue_tag_set()
3904 set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_add_queue_tag_set()
3908 if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) in blk_mq_add_queue_tag_set()
3910 list_add_tail(&q->tag_set_list, &set->tag_list); in blk_mq_add_queue_tag_set()
3912 mutex_unlock(&set->tag_list_lock); in blk_mq_add_queue_tag_set()
3915 /* All allocations will be freed in release handler of q->mq_kobj */
3923 return -ENOMEM; in blk_mq_alloc_ctxs()
3925 ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx); in blk_mq_alloc_ctxs()
3926 if (!ctxs->queue_ctx) in blk_mq_alloc_ctxs()
3930 struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu); in blk_mq_alloc_ctxs()
3931 ctx->ctxs = ctxs; in blk_mq_alloc_ctxs()
3934 q->mq_kobj = &ctxs->kobj; in blk_mq_alloc_ctxs()
3935 q->queue_ctx = ctxs->queue_ctx; in blk_mq_alloc_ctxs()
3940 return -ENOMEM; in blk_mq_alloc_ctxs()
3945 * request queue's release handler for avoiding use-after-free
3946 * and headache because q->mq_kobj shouldn't have been introduced,
3955 WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); in blk_mq_release()
3958 list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) { in blk_mq_release()
3959 list_del_init(&hctx->hctx_list); in blk_mq_release()
3960 kobject_put(&hctx->kobj); in blk_mq_release()
3963 xa_destroy(&q->hctx_table); in blk_mq_release()
3978 q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING); in blk_mq_init_queue_data()
3980 return ERR_PTR(-ENOMEM); in blk_mq_init_queue_data()
3981 q->queuedata = queuedata; in blk_mq_init_queue_data()
3997 * blk_mq_destroy_queue - shutdown a request queue
4001 * the initial reference. All future requests will failed with -ENODEV.
4035 disk = __alloc_disk_node(q, set->numa_node, lkclass); in __blk_mq_alloc_disk()
4038 return ERR_PTR(-ENOMEM); in __blk_mq_alloc_disk()
4040 set_bit(GD_OWNS_QUEUE, &disk->state); in __blk_mq_alloc_disk()
4066 spin_lock(&q->unused_hctx_lock); in blk_mq_alloc_and_init_hctx()
4067 list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { in blk_mq_alloc_and_init_hctx()
4068 if (tmp->numa_node == node) { in blk_mq_alloc_and_init_hctx()
4074 list_del_init(&hctx->hctx_list); in blk_mq_alloc_and_init_hctx()
4075 spin_unlock(&q->unused_hctx_lock); in blk_mq_alloc_and_init_hctx()
4088 kobject_put(&hctx->kobj); in blk_mq_alloc_and_init_hctx()
4100 mutex_lock(&q->sysfs_lock); in blk_mq_realloc_hw_ctxs()
4101 for (i = 0; i < set->nr_hw_queues; i++) { in blk_mq_realloc_hw_ctxs()
4104 struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i); in blk_mq_realloc_hw_ctxs()
4107 old_node = old_hctx->numa_node; in blk_mq_realloc_hw_ctxs()
4122 * hctxs and keep the previous q->nr_hw_queues. in blk_mq_realloc_hw_ctxs()
4124 if (i != set->nr_hw_queues) { in blk_mq_realloc_hw_ctxs()
4125 j = q->nr_hw_queues; in blk_mq_realloc_hw_ctxs()
4128 q->nr_hw_queues = set->nr_hw_queues; in blk_mq_realloc_hw_ctxs()
4131 xa_for_each_start(&q->hctx_table, j, hctx, j) in blk_mq_realloc_hw_ctxs()
4133 mutex_unlock(&q->sysfs_lock); in blk_mq_realloc_hw_ctxs()
4138 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_update_poll_flag()
4140 if (set->nr_maps > HCTX_TYPE_POLL && in blk_mq_update_poll_flag()
4141 set->map[HCTX_TYPE_POLL].nr_queues) in blk_mq_update_poll_flag()
4151 !!(set->flags & BLK_MQ_F_BLOCKING)); in blk_mq_init_allocated_queue()
4154 q->mq_ops = set->ops; in blk_mq_init_allocated_queue()
4156 q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, in blk_mq_init_allocated_queue()
4159 if (!q->poll_cb) in blk_mq_init_allocated_queue()
4165 /* init q->mq_kobj and sw queues' kobjects */ in blk_mq_init_allocated_queue()
4168 INIT_LIST_HEAD(&q->unused_hctx_list); in blk_mq_init_allocated_queue()
4169 spin_lock_init(&q->unused_hctx_lock); in blk_mq_init_allocated_queue()
4171 xa_init(&q->hctx_table); in blk_mq_init_allocated_queue()
4174 if (!q->nr_hw_queues) in blk_mq_init_allocated_queue()
4177 INIT_WORK(&q->timeout_work, blk_mq_timeout_work); in blk_mq_init_allocated_queue()
4178 blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); in blk_mq_init_allocated_queue()
4180 q->tag_set = set; in blk_mq_init_allocated_queue()
4182 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; in blk_mq_init_allocated_queue()
4185 INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); in blk_mq_init_allocated_queue()
4186 INIT_LIST_HEAD(&q->requeue_list); in blk_mq_init_allocated_queue()
4187 spin_lock_init(&q->requeue_lock); in blk_mq_init_allocated_queue()
4189 q->nr_requests = set->queue_depth; in blk_mq_init_allocated_queue()
4194 q->poll_nsec = BLK_MQ_POLL_CLASSIC; in blk_mq_init_allocated_queue()
4196 blk_mq_init_cpu_queues(q, set->nr_hw_queues); in blk_mq_init_allocated_queue()
4204 blk_stat_free_callback(q->poll_cb); in blk_mq_init_allocated_queue()
4205 q->poll_cb = NULL; in blk_mq_init_allocated_queue()
4207 q->mq_ops = NULL; in blk_mq_init_allocated_queue()
4208 return -ENOMEM; in blk_mq_init_allocated_queue()
4215 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_exit_queue()
4217 /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */ in blk_mq_exit_queue()
4218 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); in blk_mq_exit_queue()
4219 /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */ in blk_mq_exit_queue()
4227 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_rq_maps()
4228 set->shared_tags = blk_mq_alloc_map_and_rqs(set, in __blk_mq_alloc_rq_maps()
4230 set->queue_depth); in __blk_mq_alloc_rq_maps()
4231 if (!set->shared_tags) in __blk_mq_alloc_rq_maps()
4232 return -ENOMEM; in __blk_mq_alloc_rq_maps()
4235 for (i = 0; i < set->nr_hw_queues; i++) { in __blk_mq_alloc_rq_maps()
4244 while (--i >= 0) in __blk_mq_alloc_rq_maps()
4247 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_rq_maps()
4248 blk_mq_free_map_and_rqs(set, set->shared_tags, in __blk_mq_alloc_rq_maps()
4252 return -ENOMEM; in __blk_mq_alloc_rq_maps()
4257 * may reduce the depth asked for, if memory is tight. set->queue_depth
4265 depth = set->queue_depth; in blk_mq_alloc_set_map_and_rqs()
4271 set->queue_depth >>= 1; in blk_mq_alloc_set_map_and_rqs()
4272 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { in blk_mq_alloc_set_map_and_rqs()
4273 err = -ENOMEM; in blk_mq_alloc_set_map_and_rqs()
4276 } while (set->queue_depth); in blk_mq_alloc_set_map_and_rqs()
4278 if (!set->queue_depth || err) { in blk_mq_alloc_set_map_and_rqs()
4279 pr_err("blk-mq: failed to allocate request map\n"); in blk_mq_alloc_set_map_and_rqs()
4280 return -ENOMEM; in blk_mq_alloc_set_map_and_rqs()
4283 if (depth != set->queue_depth) in blk_mq_alloc_set_map_and_rqs()
4284 pr_info("blk-mq: reduced tag depth (%u -> %u)\n", in blk_mq_alloc_set_map_and_rqs()
4285 depth, set->queue_depth); in blk_mq_alloc_set_map_and_rqs()
4294 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the in blk_mq_update_queue_map()
4297 if (set->nr_maps == 1) in blk_mq_update_queue_map()
4298 set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; in blk_mq_update_queue_map()
4300 if (set->ops->map_queues && !is_kdump_kernel()) { in blk_mq_update_queue_map()
4307 * for (queue = 0; queue < set->nr_hw_queues; queue++) { in blk_mq_update_queue_map()
4310 * set->map[x].mq_map[cpu] = queue; in blk_mq_update_queue_map()
4313 * When we need to remap, the table has to be cleared for in blk_mq_update_queue_map()
4317 for (i = 0; i < set->nr_maps; i++) in blk_mq_update_queue_map()
4318 blk_mq_clear_mq_map(&set->map[i]); in blk_mq_update_queue_map()
4320 set->ops->map_queues(set); in blk_mq_update_queue_map()
4322 BUG_ON(set->nr_maps > 1); in blk_mq_update_queue_map()
4323 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); in blk_mq_update_queue_map()
4336 GFP_KERNEL, set->numa_node); in blk_mq_realloc_tag_set_tags()
4338 return -ENOMEM; in blk_mq_realloc_tag_set_tags()
4340 if (set->tags) in blk_mq_realloc_tag_set_tags()
4341 memcpy(new_tags, set->tags, cur_nr_hw_queues * in blk_mq_realloc_tag_set_tags()
4342 sizeof(*set->tags)); in blk_mq_realloc_tag_set_tags()
4343 kfree(set->tags); in blk_mq_realloc_tag_set_tags()
4344 set->tags = new_tags; in blk_mq_realloc_tag_set_tags()
4345 set->nr_hw_queues = new_nr_hw_queues; in blk_mq_realloc_tag_set_tags()
4360 * value will be stored in set->queue_depth.
4368 if (!set->nr_hw_queues) in blk_mq_alloc_tag_set()
4369 return -EINVAL; in blk_mq_alloc_tag_set()
4370 if (!set->queue_depth) in blk_mq_alloc_tag_set()
4371 return -EINVAL; in blk_mq_alloc_tag_set()
4372 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) in blk_mq_alloc_tag_set()
4373 return -EINVAL; in blk_mq_alloc_tag_set()
4375 if (!set->ops->queue_rq) in blk_mq_alloc_tag_set()
4376 return -EINVAL; in blk_mq_alloc_tag_set()
4378 if (!set->ops->get_budget ^ !set->ops->put_budget) in blk_mq_alloc_tag_set()
4379 return -EINVAL; in blk_mq_alloc_tag_set()
4381 if (set->queue_depth > BLK_MQ_MAX_DEPTH) { in blk_mq_alloc_tag_set()
4382 pr_info("blk-mq: reduced tag depth to %u\n", in blk_mq_alloc_tag_set()
4384 set->queue_depth = BLK_MQ_MAX_DEPTH; in blk_mq_alloc_tag_set()
4387 if (!set->nr_maps) in blk_mq_alloc_tag_set()
4388 set->nr_maps = 1; in blk_mq_alloc_tag_set()
4389 else if (set->nr_maps > HCTX_MAX_TYPES) in blk_mq_alloc_tag_set()
4390 return -EINVAL; in blk_mq_alloc_tag_set()
4398 set->nr_hw_queues = 1; in blk_mq_alloc_tag_set()
4399 set->nr_maps = 1; in blk_mq_alloc_tag_set()
4400 set->queue_depth = min(64U, set->queue_depth); in blk_mq_alloc_tag_set()
4403 * There is no use for more h/w queues than cpus if we just have in blk_mq_alloc_tag_set()
4406 if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) in blk_mq_alloc_tag_set()
4407 set->nr_hw_queues = nr_cpu_ids; in blk_mq_alloc_tag_set()
4409 if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0) in blk_mq_alloc_tag_set()
4410 return -ENOMEM; in blk_mq_alloc_tag_set()
4412 ret = -ENOMEM; in blk_mq_alloc_tag_set()
4413 for (i = 0; i < set->nr_maps; i++) { in blk_mq_alloc_tag_set()
4414 set->map[i].mq_map = kcalloc_node(nr_cpu_ids, in blk_mq_alloc_tag_set()
4415 sizeof(set->map[i].mq_map[0]), in blk_mq_alloc_tag_set()
4416 GFP_KERNEL, set->numa_node); in blk_mq_alloc_tag_set()
4417 if (!set->map[i].mq_map) in blk_mq_alloc_tag_set()
4419 set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues; in blk_mq_alloc_tag_set()
4428 mutex_init(&set->tag_list_lock); in blk_mq_alloc_tag_set()
4429 INIT_LIST_HEAD(&set->tag_list); in blk_mq_alloc_tag_set()
4434 for (i = 0; i < set->nr_maps; i++) { in blk_mq_alloc_tag_set()
4435 kfree(set->map[i].mq_map); in blk_mq_alloc_tag_set()
4436 set->map[i].mq_map = NULL; in blk_mq_alloc_tag_set()
4438 kfree(set->tags); in blk_mq_alloc_tag_set()
4439 set->tags = NULL; in blk_mq_alloc_tag_set()
4444 /* allocate and initialize a tagset for a simple single-queue device */
4450 set->ops = ops; in blk_mq_alloc_sq_tag_set()
4451 set->nr_hw_queues = 1; in blk_mq_alloc_sq_tag_set()
4452 set->nr_maps = 1; in blk_mq_alloc_sq_tag_set()
4453 set->queue_depth = queue_depth; in blk_mq_alloc_sq_tag_set()
4454 set->numa_node = NUMA_NO_NODE; in blk_mq_alloc_sq_tag_set()
4455 set->flags = set_flags; in blk_mq_alloc_sq_tag_set()
4464 for (i = 0; i < set->nr_hw_queues; i++) in blk_mq_free_tag_set()
4467 if (blk_mq_is_shared_tags(set->flags)) { in blk_mq_free_tag_set()
4468 blk_mq_free_map_and_rqs(set, set->shared_tags, in blk_mq_free_tag_set()
4472 for (j = 0; j < set->nr_maps; j++) { in blk_mq_free_tag_set()
4473 kfree(set->map[j].mq_map); in blk_mq_free_tag_set()
4474 set->map[j].mq_map = NULL; in blk_mq_free_tag_set()
4477 kfree(set->tags); in blk_mq_free_tag_set()
4478 set->tags = NULL; in blk_mq_free_tag_set()
4484 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_update_nr_requests()
4490 return -EINVAL; in blk_mq_update_nr_requests()
4492 if (q->nr_requests == nr) in blk_mq_update_nr_requests()
4500 if (!hctx->tags) in blk_mq_update_nr_requests()
4506 if (hctx->sched_tags) { in blk_mq_update_nr_requests()
4507 ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, in blk_mq_update_nr_requests()
4510 ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, in blk_mq_update_nr_requests()
4515 if (q->elevator && q->elevator->type->ops.depth_updated) in blk_mq_update_nr_requests()
4516 q->elevator->type->ops.depth_updated(hctx); in blk_mq_update_nr_requests()
4519 q->nr_requests = nr; in blk_mq_update_nr_requests()
4520 if (blk_mq_is_shared_tags(set->flags)) { in blk_mq_update_nr_requests()
4521 if (q->elevator) in blk_mq_update_nr_requests()
4554 if (!q->elevator) in blk_mq_elv_switch_none()
4561 /* q->elevator needs protection from ->sysfs_lock */ in blk_mq_elv_switch_none()
4562 mutex_lock(&q->sysfs_lock); in blk_mq_elv_switch_none()
4564 INIT_LIST_HEAD(&qe->node); in blk_mq_elv_switch_none()
4565 qe->q = q; in blk_mq_elv_switch_none()
4566 qe->type = q->elevator->type; in blk_mq_elv_switch_none()
4567 list_add(&qe->node, head); in blk_mq_elv_switch_none()
4576 __module_get(qe->type->elevator_owner); in blk_mq_elv_switch_none()
4578 mutex_unlock(&q->sysfs_lock); in blk_mq_elv_switch_none()
4589 if (qe->q == q) in blk_lookup_qe_pair()
4604 t = qe->type; in blk_mq_elv_switch_back()
4605 list_del(&qe->node); in blk_mq_elv_switch_back()
4608 mutex_lock(&q->sysfs_lock); in blk_mq_elv_switch_back()
4610 mutex_unlock(&q->sysfs_lock); in blk_mq_elv_switch_back()
4620 lockdep_assert_held(&set->tag_list_lock); in __blk_mq_update_nr_hw_queues()
4622 if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) in __blk_mq_update_nr_hw_queues()
4626 if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) in __blk_mq_update_nr_hw_queues()
4629 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4636 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4640 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
4645 prev_nr_hw_queues = set->nr_hw_queues; in __blk_mq_update_nr_hw_queues()
4646 if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) < in __blk_mq_update_nr_hw_queues()
4650 set->nr_hw_queues = nr_hw_queues; in __blk_mq_update_nr_hw_queues()
4653 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
4656 if (q->nr_hw_queues != set->nr_hw_queues) { in __blk_mq_update_nr_hw_queues()
4661 for (; i < set->nr_hw_queues; i++) in __blk_mq_update_nr_hw_queues()
4664 set->nr_hw_queues = prev_nr_hw_queues; in __blk_mq_update_nr_hw_queues()
4665 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); in __blk_mq_update_nr_hw_queues()
4672 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
4678 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4681 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4687 mutex_lock(&set->tag_list_lock); in blk_mq_update_nr_hw_queues()
4689 mutex_unlock(&set->tag_list_lock); in blk_mq_update_nr_hw_queues()
4696 if (q->poll_stat) in blk_poll_stats_enable()
4708 if (!q->poll_stat || blk_stat_is_active(q->poll_cb)) in blk_mq_poll_stats_start()
4711 blk_stat_activate_msecs(q->poll_cb, 100); in blk_mq_poll_stats_start()
4716 struct request_queue *q = cb->data; in blk_mq_poll_stats_fn()
4720 if (cb->stat[bucket].nr_samples) in blk_mq_poll_stats_fn()
4721 q->poll_stat[bucket] = cb->stat[bucket]; in blk_mq_poll_stats_fn()
4751 if (q->poll_stat[bucket].nr_samples) in blk_mq_poll_nsecs()
4752 ret = (q->poll_stat[bucket].mean + 1) / 2; in blk_mq_poll_nsecs()
4770 if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT)) in blk_mq_poll_hybrid()
4779 if (q->poll_nsec > 0) in blk_mq_poll_hybrid()
4780 nsecs = q->poll_nsec; in blk_mq_poll_hybrid()
4787 rq->rq_flags |= RQF_MQ_POLL_SLEPT; in blk_mq_poll_hybrid()
4791 * 'avg_completion_time / 2' as the pre-sleep target. in blk_mq_poll_hybrid()
4831 ret = q->mq_ops->poll(hctx, iob); in blk_mq_poll_classic()
4855 q->poll_nsec != BLK_MQ_POLL_CLASSIC) { in blk_mq_poll()
4864 return rq->mq_ctx->cpu; in blk_mq_rq_cpu()
4874 cancel_delayed_work_sync(&q->requeue_work); in blk_mq_cancel_work_sync()
4877 cancel_delayed_work_sync(&hctx->run_work); in blk_mq_cancel_work_sync()