Lines Matching +full:lock +full:- +full:latency +full:- +full:ns

1 // SPDX-License-Identifier: GPL-2.0
14 #include "blk-cgroup-rwstat.h"
15 #include "blk-stat.h"
16 #include "blk-throttle.h"
31 #define DFL_LATENCY_TARGET (-1L)
36 * For HD, very small latency comes from sequential IO. Such IO is helpless to
46 /* We measure latency for request size from <= 4k to >= 1M */
50 unsigned long total_latency; /* ns / 1024 */
55 unsigned long latency; /* ns / 1024 */ member
94 return pd_to_blkg(&tg->pd); in tg_to_blkg()
98 * sq_to_tg - return the throl_grp the specified service queue belongs to
101 * Return the throtl_grp @sq belongs to. If @sq is the top-level one
106 if (sq && sq->parent_sq) in sq_to_tg()
113 * sq_to_td - return throtl_data the specified service queue belongs to
124 return tg->td; in sq_to_td()
140 if (td->scale < 4096 && time_after_eq(jiffies, in throtl_adjusted_limit()
141 td->low_upgrade_time + td->scale * td->throtl_slice)) in throtl_adjusted_limit()
142 td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice; in throtl_adjusted_limit()
144 return low + (low >> 1) * td->scale; in throtl_adjusted_limit()
153 if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) in tg_bps_limit()
156 td = tg->td; in tg_bps_limit()
157 ret = tg->bps[rw][td->limit_index]; in tg_bps_limit()
158 if (ret == 0 && td->limit_index == LIMIT_LOW) { in tg_bps_limit()
160 if (!list_empty(&blkg->blkcg->css.children) || in tg_bps_limit()
161 tg->iops[rw][td->limit_index]) in tg_bps_limit()
167 if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] && in tg_bps_limit()
168 tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) { in tg_bps_limit()
171 adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td); in tg_bps_limit()
172 ret = min(tg->bps[rw][LIMIT_MAX], adjusted); in tg_bps_limit()
183 if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) in tg_iops_limit()
186 td = tg->td; in tg_iops_limit()
187 ret = tg->iops[rw][td->limit_index]; in tg_iops_limit()
188 if (ret == 0 && tg->td->limit_index == LIMIT_LOW) { in tg_iops_limit()
190 if (!list_empty(&blkg->blkcg->css.children) || in tg_iops_limit()
191 tg->bps[rw][td->limit_index]) in tg_iops_limit()
197 if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] && in tg_iops_limit()
198 tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) { in tg_iops_limit()
201 adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td); in tg_iops_limit()
204 ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted); in tg_iops_limit()
210 clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
213 * throtl_log - log debug message via blktrace
226 if (likely(!blk_trace_note_message_enabled(__td->queue))) \
229 blk_add_cgroup_trace_msg(__td->queue, \
230 &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
232 blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
241 return bio->bi_iter.bi_size; in throtl_bio_data_size()
246 INIT_LIST_HEAD(&qn->node); in throtl_qnode_init()
247 bio_list_init(&qn->bios); in throtl_qnode_init()
248 qn->tg = tg; in throtl_qnode_init()
252 * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
255 * @queued: the service_queue->queued[] list @qn belongs to
258 * @qn->tg's reference count is bumped when @qn is activated. See the
264 bio_list_add(&qn->bios, bio); in throtl_qnode_add_bio()
265 if (list_empty(&qn->node)) { in throtl_qnode_add_bio()
266 list_add_tail(&qn->node, queued); in throtl_qnode_add_bio()
267 blkg_get(tg_to_blkg(qn->tg)); in throtl_qnode_add_bio()
272 * throtl_peek_queued - peek the first bio on a qnode list
284 bio = bio_list_peek(&qn->bios); in throtl_peek_queued()
290 * throtl_pop_queued - pop the first bio form a qnode list
296 * that the popping order is round-robin.
313 bio = bio_list_pop(&qn->bios); in throtl_pop_queued()
316 if (bio_list_empty(&qn->bios)) { in throtl_pop_queued()
317 list_del_init(&qn->node); in throtl_pop_queued()
319 *tg_to_put = qn->tg; in throtl_pop_queued()
321 blkg_put(tg_to_blkg(qn->tg)); in throtl_pop_queued()
323 list_move_tail(&qn->node, queued); in throtl_pop_queued()
332 INIT_LIST_HEAD(&sq->queued[READ]); in throtl_service_queue_init()
333 INIT_LIST_HEAD(&sq->queued[WRITE]); in throtl_service_queue_init()
334 sq->pending_tree = RB_ROOT_CACHED; in throtl_service_queue_init()
335 timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); in throtl_service_queue_init()
345 tg = kzalloc_node(sizeof(*tg), gfp, q->node); in throtl_pd_alloc()
349 if (blkg_rwstat_init(&tg->stat_bytes, gfp)) in throtl_pd_alloc()
352 if (blkg_rwstat_init(&tg->stat_ios, gfp)) in throtl_pd_alloc()
355 throtl_service_queue_init(&tg->service_queue); in throtl_pd_alloc()
358 throtl_qnode_init(&tg->qnode_on_self[rw], tg); in throtl_pd_alloc()
359 throtl_qnode_init(&tg->qnode_on_parent[rw], tg); in throtl_pd_alloc()
362 RB_CLEAR_NODE(&tg->rb_node); in throtl_pd_alloc()
363 tg->bps[READ][LIMIT_MAX] = U64_MAX; in throtl_pd_alloc()
364 tg->bps[WRITE][LIMIT_MAX] = U64_MAX; in throtl_pd_alloc()
365 tg->iops[READ][LIMIT_MAX] = UINT_MAX; in throtl_pd_alloc()
366 tg->iops[WRITE][LIMIT_MAX] = UINT_MAX; in throtl_pd_alloc()
367 tg->bps_conf[READ][LIMIT_MAX] = U64_MAX; in throtl_pd_alloc()
368 tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX; in throtl_pd_alloc()
369 tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX; in throtl_pd_alloc()
370 tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX; in throtl_pd_alloc()
373 tg->latency_target = DFL_LATENCY_TARGET; in throtl_pd_alloc()
374 tg->latency_target_conf = DFL_LATENCY_TARGET; in throtl_pd_alloc()
375 tg->idletime_threshold = DFL_IDLE_THRESHOLD; in throtl_pd_alloc()
376 tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD; in throtl_pd_alloc()
378 return &tg->pd; in throtl_pd_alloc()
381 blkg_rwstat_exit(&tg->stat_bytes); in throtl_pd_alloc()
391 struct throtl_data *td = blkg->q->td; in throtl_pd_init()
392 struct throtl_service_queue *sq = &tg->service_queue; in throtl_pd_init()
407 sq->parent_sq = &td->service_queue; in throtl_pd_init()
408 if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent) in throtl_pd_init()
409 sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; in throtl_pd_init()
410 tg->td = td; in throtl_pd_init()
420 struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq); in tg_update_has_rules()
421 struct throtl_data *td = tg->td; in tg_update_has_rules()
425 tg->has_rules_iops[rw] = in tg_update_has_rules()
426 (parent_tg && parent_tg->has_rules_iops[rw]) || in tg_update_has_rules()
427 (td->limit_valid[td->limit_index] && in tg_update_has_rules()
429 tg->has_rules_bps[rw] = in tg_update_has_rules()
430 (parent_tg && parent_tg->has_rules_bps[rw]) || in tg_update_has_rules()
431 (td->limit_valid[td->limit_index] && in tg_update_has_rules()
454 blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { in blk_throtl_update_limit_valid()
457 if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] || in blk_throtl_update_limit_valid()
458 tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) { in blk_throtl_update_limit_valid()
465 td->limit_valid[LIMIT_LOW] = low_valid; in blk_throtl_update_limit_valid()
478 tg->bps[READ][LIMIT_LOW] = 0; in throtl_pd_offline()
479 tg->bps[WRITE][LIMIT_LOW] = 0; in throtl_pd_offline()
480 tg->iops[READ][LIMIT_LOW] = 0; in throtl_pd_offline()
481 tg->iops[WRITE][LIMIT_LOW] = 0; in throtl_pd_offline()
483 blk_throtl_update_limit_valid(tg->td); in throtl_pd_offline()
485 if (!tg->td->limit_valid[tg->td->limit_index]) in throtl_pd_offline()
486 throtl_upgrade_state(tg->td); in throtl_pd_offline()
493 del_timer_sync(&tg->service_queue.pending_timer); in throtl_pd_free()
494 blkg_rwstat_exit(&tg->stat_bytes); in throtl_pd_free()
495 blkg_rwstat_exit(&tg->stat_ios); in throtl_pd_free()
504 n = rb_first_cached(&parent_sq->pending_tree); in throtl_rb_first()
514 rb_erase_cached(n, &parent_sq->pending_tree); in throtl_rb_erase()
526 parent_sq->first_pending_disptime = tg->disptime; in update_min_dispatch_time()
531 struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; in tg_service_queue_add()
532 struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node; in tg_service_queue_add()
535 unsigned long key = tg->disptime; in tg_service_queue_add()
542 if (time_before(key, __tg->disptime)) in tg_service_queue_add()
543 node = &parent->rb_left; in tg_service_queue_add()
545 node = &parent->rb_right; in tg_service_queue_add()
550 rb_link_node(&tg->rb_node, parent, node); in tg_service_queue_add()
551 rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree, in tg_service_queue_add()
557 if (!(tg->flags & THROTL_TG_PENDING)) { in throtl_enqueue_tg()
559 tg->flags |= THROTL_TG_PENDING; in throtl_enqueue_tg()
560 tg->service_queue.parent_sq->nr_pending++; in throtl_enqueue_tg()
566 if (tg->flags & THROTL_TG_PENDING) { in throtl_dequeue_tg()
568 tg->service_queue.parent_sq; in throtl_dequeue_tg()
570 throtl_rb_erase(&tg->rb_node, parent_sq); in throtl_dequeue_tg()
571 --parent_sq->nr_pending; in throtl_dequeue_tg()
572 tg->flags &= ~THROTL_TG_PENDING; in throtl_dequeue_tg()
576 /* Call with queue lock held */
580 unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice; in throtl_schedule_pending_timer()
591 mod_timer(&sq->pending_timer, expires); in throtl_schedule_pending_timer()
593 expires - jiffies, jiffies); in throtl_schedule_pending_timer()
597 * throtl_schedule_next_dispatch - schedule the next dispatch cycle
601 * Arm @sq->pending_timer so that the next dispatch cycle starts on the
611 * delay before dispatch starts even if @sq->first_pending_disptime is not
618 if (!sq->nr_pending) in throtl_schedule_next_dispatch()
624 if (force || time_after(sq->first_pending_disptime, jiffies)) { in throtl_schedule_next_dispatch()
625 throtl_schedule_pending_timer(sq, sq->first_pending_disptime); in throtl_schedule_next_dispatch()
636 tg->bytes_disp[rw] = 0; in throtl_start_new_slice_with_credit()
637 tg->io_disp[rw] = 0; in throtl_start_new_slice_with_credit()
638 tg->carryover_bytes[rw] = 0; in throtl_start_new_slice_with_credit()
639 tg->carryover_ios[rw] = 0; in throtl_start_new_slice_with_credit()
647 if (time_after_eq(start, tg->slice_start[rw])) in throtl_start_new_slice_with_credit()
648 tg->slice_start[rw] = start; in throtl_start_new_slice_with_credit()
650 tg->slice_end[rw] = jiffies + tg->td->throtl_slice; in throtl_start_new_slice_with_credit()
651 throtl_log(&tg->service_queue, in throtl_start_new_slice_with_credit()
653 rw == READ ? 'R' : 'W', tg->slice_start[rw], in throtl_start_new_slice_with_credit()
654 tg->slice_end[rw], jiffies); in throtl_start_new_slice_with_credit()
660 tg->bytes_disp[rw] = 0; in throtl_start_new_slice()
661 tg->io_disp[rw] = 0; in throtl_start_new_slice()
662 tg->slice_start[rw] = jiffies; in throtl_start_new_slice()
663 tg->slice_end[rw] = jiffies + tg->td->throtl_slice; in throtl_start_new_slice()
665 tg->carryover_bytes[rw] = 0; in throtl_start_new_slice()
666 tg->carryover_ios[rw] = 0; in throtl_start_new_slice()
669 throtl_log(&tg->service_queue, in throtl_start_new_slice()
671 rw == READ ? 'R' : 'W', tg->slice_start[rw], in throtl_start_new_slice()
672 tg->slice_end[rw], jiffies); in throtl_start_new_slice()
678 tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice); in throtl_set_slice_end()
685 throtl_log(&tg->service_queue, in throtl_extend_slice()
687 rw == READ ? 'R' : 'W', tg->slice_start[rw], in throtl_extend_slice()
688 tg->slice_end[rw], jiffies); in throtl_extend_slice()
694 if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) in throtl_slice_used()
706 BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw])); in throtl_trim_slice()
709 * If bps are unlimited (-1), then time slice don't get in throtl_trim_slice()
724 throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice); in throtl_trim_slice()
726 time_elapsed = jiffies - tg->slice_start[rw]; in throtl_trim_slice()
728 nr_slices = time_elapsed / tg->td->throtl_slice; in throtl_trim_slice()
732 tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices; in throtl_trim_slice()
736 io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) / in throtl_trim_slice()
742 if (tg->bytes_disp[rw] >= bytes_trim) in throtl_trim_slice()
743 tg->bytes_disp[rw] -= bytes_trim; in throtl_trim_slice()
745 tg->bytes_disp[rw] = 0; in throtl_trim_slice()
747 if (tg->io_disp[rw] >= io_trim) in throtl_trim_slice()
748 tg->io_disp[rw] -= io_trim; in throtl_trim_slice()
750 tg->io_disp[rw] = 0; in throtl_trim_slice()
752 tg->slice_start[rw] += nr_slices * tg->td->throtl_slice; in throtl_trim_slice()
754 throtl_log(&tg->service_queue, in throtl_trim_slice()
757 tg->slice_start[rw], tg->slice_end[rw], jiffies); in throtl_trim_slice()
791 unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw]; in __tg_update_carryover()
802 tg->carryover_bytes[rw] += in __tg_update_carryover()
803 calculate_bytes_allowed(bps_limit, jiffy_elapsed) - in __tg_update_carryover()
804 tg->bytes_disp[rw]; in __tg_update_carryover()
806 tg->carryover_ios[rw] += in __tg_update_carryover()
807 calculate_io_allowed(iops_limit, jiffy_elapsed) - in __tg_update_carryover()
808 tg->io_disp[rw]; in __tg_update_carryover()
813 if (tg->service_queue.nr_queued[READ]) in tg_update_carryover()
815 if (tg->service_queue.nr_queued[WRITE]) in tg_update_carryover()
819 throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__, in tg_update_carryover()
820 tg->carryover_bytes[READ], tg->carryover_bytes[WRITE], in tg_update_carryover()
821 tg->carryover_ios[READ], tg->carryover_ios[WRITE]); in tg_update_carryover()
837 jiffy_elapsed = jiffies - tg->slice_start[rw]; in tg_within_iops_limit()
840 jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice); in tg_within_iops_limit()
842 tg->carryover_ios[rw]; in tg_within_iops_limit()
843 if (tg->io_disp[rw] + 1 <= io_allowed) { in tg_within_iops_limit()
850 jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed; in tg_within_iops_limit()
872 jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; in tg_within_bps_limit()
876 jiffy_elapsed_rnd = tg->td->throtl_slice; in tg_within_bps_limit()
878 jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); in tg_within_bps_limit()
880 tg->carryover_bytes[rw]; in tg_within_bps_limit()
881 if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) { in tg_within_bps_limit()
888 extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; in tg_within_bps_limit()
898 jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed); in tg_within_bps_limit()
906 * of jiffies to wait before this bio is with-in IO rate and can be dispatched
922 BUG_ON(tg->service_queue.nr_queued[rw] && in tg_may_dispatch()
923 bio != throtl_peek_queued(&tg->service_queue.queued[rw])); in tg_may_dispatch()
925 /* If tg->bps = -1, then BW is unlimited */ in tg_may_dispatch()
927 tg->flags & THROTL_TG_CANCELING) { in tg_may_dispatch()
940 if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) in tg_may_dispatch()
943 if (time_before(tg->slice_end[rw], in tg_may_dispatch()
944 jiffies + tg->td->throtl_slice)) in tg_may_dispatch()
946 jiffies + tg->td->throtl_slice); in tg_may_dispatch()
961 if (time_before(tg->slice_end[rw], jiffies + max_wait)) in tg_may_dispatch()
974 tg->bytes_disp[rw] += bio_size; in throtl_charge_bio()
975 tg->last_bytes_disp[rw] += bio_size; in throtl_charge_bio()
978 tg->io_disp[rw]++; in throtl_charge_bio()
979 tg->last_io_disp[rw]++; in throtl_charge_bio()
983 * throtl_add_bio_tg - add a bio to the specified throtl_grp
989 * tg->qnode_on_self[] is used.
994 struct throtl_service_queue *sq = &tg->service_queue; in throtl_add_bio_tg()
998 qn = &tg->qnode_on_self[rw]; in throtl_add_bio_tg()
1006 if (!sq->nr_queued[rw]) in throtl_add_bio_tg()
1007 tg->flags |= THROTL_TG_WAS_EMPTY; in throtl_add_bio_tg()
1009 throtl_qnode_add_bio(bio, qn, &sq->queued[rw]); in throtl_add_bio_tg()
1011 sq->nr_queued[rw]++; in throtl_add_bio_tg()
1017 struct throtl_service_queue *sq = &tg->service_queue; in tg_update_disptime()
1018 unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; in tg_update_disptime()
1021 bio = throtl_peek_queued(&sq->queued[READ]); in tg_update_disptime()
1025 bio = throtl_peek_queued(&sq->queued[WRITE]); in tg_update_disptime()
1033 throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); in tg_update_disptime()
1034 tg->disptime = disptime; in tg_update_disptime()
1038 tg->flags &= ~THROTL_TG_WAS_EMPTY; in tg_update_disptime()
1046 child_tg->slice_start[rw]); in start_parent_slice_with_credit()
1053 struct throtl_service_queue *sq = &tg->service_queue; in tg_dispatch_one_bio()
1054 struct throtl_service_queue *parent_sq = sq->parent_sq; in tg_dispatch_one_bio()
1065 bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); in tg_dispatch_one_bio()
1066 sq->nr_queued[rw]--; in tg_dispatch_one_bio()
1074 * @td->service_queue, @bio is ready to be issued. Put it on its in tg_dispatch_one_bio()
1079 throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg); in tg_dispatch_one_bio()
1082 throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw], in tg_dispatch_one_bio()
1083 &parent_sq->queued[rw]); in tg_dispatch_one_bio()
1084 BUG_ON(tg->td->nr_queued[rw] <= 0); in tg_dispatch_one_bio()
1085 tg->td->nr_queued[rw]--; in tg_dispatch_one_bio()
1096 struct throtl_service_queue *sq = &tg->service_queue; in throtl_dispatch_tg()
1099 unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads; in throtl_dispatch_tg()
1104 while ((bio = throtl_peek_queued(&sq->queued[READ])) && in throtl_dispatch_tg()
1114 while ((bio = throtl_peek_queued(&sq->queued[WRITE])) && in throtl_dispatch_tg()
1135 if (!parent_sq->nr_pending) in throtl_select_dispatch()
1142 if (time_before(jiffies, tg->disptime)) in throtl_select_dispatch()
1147 sq = &tg->service_queue; in throtl_select_dispatch()
1148 if (sq->nr_queued[READ] || sq->nr_queued[WRITE]) in throtl_select_dispatch()
1163 * throtl_pending_timer_fn - timer function for service_queue->pending_timer
1174 * the top-level service_tree is reached, throtl_data->dispatch_work is
1189 q = tg->pd.blkg->q; in throtl_pending_timer_fn()
1191 q = td->queue; in throtl_pending_timer_fn()
1193 spin_lock_irq(&q->queue_lock); in throtl_pending_timer_fn()
1195 if (!q->root_blkg) in throtl_pending_timer_fn()
1202 parent_sq = sq->parent_sq; in throtl_pending_timer_fn()
1207 sq->nr_queued[READ] + sq->nr_queued[WRITE], in throtl_pending_timer_fn()
1208 sq->nr_queued[READ], sq->nr_queued[WRITE]); in throtl_pending_timer_fn()
1220 spin_unlock_irq(&q->queue_lock); in throtl_pending_timer_fn()
1222 spin_lock_irq(&q->queue_lock); in throtl_pending_timer_fn()
1230 if (tg->flags & THROTL_TG_WAS_EMPTY) { in throtl_pending_timer_fn()
1240 /* reached the top-level, queue issuing */ in throtl_pending_timer_fn()
1241 queue_work(kthrotld_workqueue, &td->dispatch_work); in throtl_pending_timer_fn()
1244 spin_unlock_irq(&q->queue_lock); in throtl_pending_timer_fn()
1248 * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
1252 * of throtl_data->service_queue. Those bios are ready and issued by this
1259 struct throtl_service_queue *td_sq = &td->service_queue; in blk_throtl_dispatch_work_fn()
1260 struct request_queue *q = td->queue; in blk_throtl_dispatch_work_fn()
1268 spin_lock_irq(&q->queue_lock); in blk_throtl_dispatch_work_fn()
1270 while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) in blk_throtl_dispatch_work_fn()
1272 spin_unlock_irq(&q->queue_lock); in blk_throtl_dispatch_work_fn()
1307 &blkcg_policy_throtl, seq_cft(sf)->private, false); in tg_print_conf_u64()
1314 &blkcg_policy_throtl, seq_cft(sf)->private, false); in tg_print_conf_uint()
1320 struct throtl_service_queue *sq = &tg->service_queue; in tg_conf_updated()
1324 throtl_log(&tg->service_queue, in tg_conf_updated()
1334 * blk-throttle. in tg_conf_updated()
1337 global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { in tg_conf_updated()
1343 if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent || in tg_conf_updated()
1344 !blkg->parent->parent) in tg_conf_updated()
1346 parent_tg = blkg_to_tg(blkg->parent); in tg_conf_updated()
1349 * higher latency target in tg_conf_updated()
1351 this_tg->idletime_threshold = min(this_tg->idletime_threshold, in tg_conf_updated()
1352 parent_tg->idletime_threshold); in tg_conf_updated()
1353 this_tg->latency_target = max(this_tg->latency_target, in tg_conf_updated()
1354 parent_tg->latency_target); in tg_conf_updated()
1368 if (tg->flags & THROTL_TG_PENDING) { in tg_conf_updated()
1370 throtl_schedule_next_dispatch(sq->parent_sq, true); in tg_conf_updated()
1387 ret = -EINVAL; in tg_set_conf()
1397 *(u64 *)((void *)tg + of_cft(of)->private) = v; in tg_set_conf()
1399 *(unsigned int *)((void *)tg + of_cft(of)->private) = v; in tg_set_conf()
1424 seq_cft(sf)->private, true); in tg_print_rwstat()
1442 seq_cft(sf)->private, true); in tg_print_rwstat_recursive()
1498 const char *dname = blkg_dev_name(pd->blkg); in tg_prfill_limit()
1516 if (tg->bps_conf[READ][off] == bps_dft && in tg_prfill_limit()
1517 tg->bps_conf[WRITE][off] == bps_dft && in tg_prfill_limit()
1518 tg->iops_conf[READ][off] == iops_dft && in tg_prfill_limit()
1519 tg->iops_conf[WRITE][off] == iops_dft && in tg_prfill_limit()
1521 (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD && in tg_prfill_limit()
1522 tg->latency_target_conf == DFL_LATENCY_TARGET))) in tg_prfill_limit()
1525 if (tg->bps_conf[READ][off] != U64_MAX) in tg_prfill_limit()
1527 tg->bps_conf[READ][off]); in tg_prfill_limit()
1528 if (tg->bps_conf[WRITE][off] != U64_MAX) in tg_prfill_limit()
1530 tg->bps_conf[WRITE][off]); in tg_prfill_limit()
1531 if (tg->iops_conf[READ][off] != UINT_MAX) in tg_prfill_limit()
1533 tg->iops_conf[READ][off]); in tg_prfill_limit()
1534 if (tg->iops_conf[WRITE][off] != UINT_MAX) in tg_prfill_limit()
1536 tg->iops_conf[WRITE][off]); in tg_prfill_limit()
1538 if (tg->idletime_threshold_conf == ULONG_MAX) in tg_prfill_limit()
1542 tg->idletime_threshold_conf); in tg_prfill_limit()
1544 if (tg->latency_target_conf == ULONG_MAX) in tg_prfill_limit()
1545 strcpy(latency_time, " latency=max"); in tg_prfill_limit()
1548 " latency=%lu", tg->latency_target_conf); in tg_prfill_limit()
1560 &blkcg_policy_throtl, seq_cft(sf)->private, false); in tg_print_limit()
1574 int index = of_cft(of)->private; in tg_set_limit()
1583 v[0] = tg->bps_conf[READ][index]; in tg_set_limit()
1584 v[1] = tg->bps_conf[WRITE][index]; in tg_set_limit()
1585 v[2] = tg->iops_conf[READ][index]; in tg_set_limit()
1586 v[3] = tg->iops_conf[WRITE][index]; in tg_set_limit()
1588 idle_time = tg->idletime_threshold_conf; in tg_set_limit()
1589 latency_time = tg->latency_target_conf; in tg_set_limit()
1602 ret = -EINVAL; in tg_set_limit()
1608 ret = -ERANGE; in tg_set_limit()
1612 ret = -EINVAL; in tg_set_limit()
1623 else if (off == LIMIT_LOW && !strcmp(tok, "latency")) in tg_set_limit()
1629 tg->bps_conf[READ][index] = v[0]; in tg_set_limit()
1630 tg->bps_conf[WRITE][index] = v[1]; in tg_set_limit()
1631 tg->iops_conf[READ][index] = v[2]; in tg_set_limit()
1632 tg->iops_conf[WRITE][index] = v[3]; in tg_set_limit()
1635 tg->bps[READ][index] = v[0]; in tg_set_limit()
1636 tg->bps[WRITE][index] = v[1]; in tg_set_limit()
1637 tg->iops[READ][index] = v[2]; in tg_set_limit()
1638 tg->iops[WRITE][index] = v[3]; in tg_set_limit()
1640 tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW], in tg_set_limit()
1641 tg->bps_conf[READ][LIMIT_MAX]); in tg_set_limit()
1642 tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW], in tg_set_limit()
1643 tg->bps_conf[WRITE][LIMIT_MAX]); in tg_set_limit()
1644 tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW], in tg_set_limit()
1645 tg->iops_conf[READ][LIMIT_MAX]); in tg_set_limit()
1646 tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW], in tg_set_limit()
1647 tg->iops_conf[WRITE][LIMIT_MAX]); in tg_set_limit()
1648 tg->idletime_threshold_conf = idle_time; in tg_set_limit()
1649 tg->latency_target_conf = latency_time; in tg_set_limit()
1652 if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] || in tg_set_limit()
1653 tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) || in tg_set_limit()
1654 tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD || in tg_set_limit()
1655 tg->latency_target_conf == DFL_LATENCY_TARGET) { in tg_set_limit()
1656 tg->bps[READ][LIMIT_LOW] = 0; in tg_set_limit()
1657 tg->bps[WRITE][LIMIT_LOW] = 0; in tg_set_limit()
1658 tg->iops[READ][LIMIT_LOW] = 0; in tg_set_limit()
1659 tg->iops[WRITE][LIMIT_LOW] = 0; in tg_set_limit()
1660 tg->idletime_threshold = DFL_IDLE_THRESHOLD; in tg_set_limit()
1661 tg->latency_target = DFL_LATENCY_TARGET; in tg_set_limit()
1663 tg->idletime_threshold = tg->idletime_threshold_conf; in tg_set_limit()
1664 tg->latency_target = tg->latency_target_conf; in tg_set_limit()
1667 blk_throtl_update_limit_valid(tg->td); in tg_set_limit()
1668 if (tg->td->limit_valid[LIMIT_LOW]) { in tg_set_limit()
1670 tg->td->limit_index = LIMIT_LOW; in tg_set_limit()
1672 tg->td->limit_index = LIMIT_MAX; in tg_set_limit()
1674 tg->td->limit_valid[LIMIT_LOW]); in tg_set_limit()
1703 struct throtl_data *td = q->td; in throtl_shutdown_wq()
1705 cancel_work_sync(&td->dispatch_work); in throtl_shutdown_wq()
1721 struct request_queue *q = disk->queue; in blk_throtl_cancel_bios()
1725 spin_lock_irq(&q->queue_lock); in blk_throtl_cancel_bios()
1727 * queue_lock is held, rcu lock is not needed here technically. in blk_throtl_cancel_bios()
1728 * However, rcu lock is still held to emphasize that following in blk_throtl_cancel_bios()
1732 blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { in blk_throtl_cancel_bios()
1734 struct throtl_service_queue *sq = &tg->service_queue; in blk_throtl_cancel_bios()
1740 blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING; in blk_throtl_cancel_bios()
1750 spin_unlock_irq(&q->queue_lock); in blk_throtl_cancel_bios()
1758 if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW]) in __tg_last_low_overflow_time()
1759 rtime = tg->last_low_overflow_time[READ]; in __tg_last_low_overflow_time()
1760 if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) in __tg_last_low_overflow_time()
1761 wtime = tg->last_low_overflow_time[WRITE]; in __tg_last_low_overflow_time()
1773 parent_sq = parent->service_queue.parent_sq; in tg_last_low_overflow_time()
1782 if (!parent->bps[READ][LIMIT_LOW] && in tg_last_low_overflow_time()
1783 !parent->iops[READ][LIMIT_LOW] && in tg_last_low_overflow_time()
1784 !parent->bps[WRITE][LIMIT_LOW] && in tg_last_low_overflow_time()
1785 !parent->iops[WRITE][LIMIT_LOW]) in tg_last_low_overflow_time()
1797 * - single idle is too long, longer than a fixed value (in case user in throtl_tg_is_idle()
1799 * - average think time is more than threshold in throtl_tg_is_idle()
1800 * - IO latency is largely below threshold in throtl_tg_is_idle()
1805 time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold); in throtl_tg_is_idle()
1806 ret = tg->latency_target == DFL_LATENCY_TARGET || in throtl_tg_is_idle()
1807 tg->idletime_threshold == DFL_IDLE_THRESHOLD || in throtl_tg_is_idle()
1808 (ktime_get_ns() >> 10) - tg->last_finish_time > time || in throtl_tg_is_idle()
1809 tg->avg_idletime > tg->idletime_threshold || in throtl_tg_is_idle()
1810 (tg->latency_target && tg->bio_cnt && in throtl_tg_is_idle()
1811 tg->bad_bio_cnt * 5 < tg->bio_cnt); in throtl_tg_is_idle()
1812 throtl_log(&tg->service_queue, in throtl_tg_is_idle()
1814 tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt, in throtl_tg_is_idle()
1815 tg->bio_cnt, ret, tg->td->scale); in throtl_tg_is_idle()
1821 struct throtl_service_queue *sq = &tg->service_queue; in throtl_tg_can_upgrade()
1828 read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW]; in throtl_tg_can_upgrade()
1829 write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]; in throtl_tg_can_upgrade()
1832 if (read_limit && sq->nr_queued[READ] && in throtl_tg_can_upgrade()
1833 (!write_limit || sq->nr_queued[WRITE])) in throtl_tg_can_upgrade()
1835 if (write_limit && sq->nr_queued[WRITE] && in throtl_tg_can_upgrade()
1836 (!read_limit || sq->nr_queued[READ])) in throtl_tg_can_upgrade()
1840 tg_last_low_overflow_time(tg) + tg->td->throtl_slice) && in throtl_tg_can_upgrade()
1851 tg = sq_to_tg(tg->service_queue.parent_sq); in throtl_hierarchy_can_upgrade()
1852 if (!tg || !tg_to_blkg(tg)->parent) in throtl_hierarchy_can_upgrade()
1864 if (td->limit_index != LIMIT_LOW) in throtl_can_upgrade()
1867 if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice)) in throtl_can_upgrade()
1871 blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { in throtl_can_upgrade()
1876 if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children)) in throtl_can_upgrade()
1891 if (tg->td->limit_index != LIMIT_LOW) in throtl_upgrade_check()
1894 if (time_after(tg->last_check_time + tg->td->throtl_slice, now)) in throtl_upgrade_check()
1897 tg->last_check_time = now; in throtl_upgrade_check()
1900 __tg_last_low_overflow_time(tg) + tg->td->throtl_slice)) in throtl_upgrade_check()
1903 if (throtl_can_upgrade(tg->td, NULL)) in throtl_upgrade_check()
1904 throtl_upgrade_state(tg->td); in throtl_upgrade_check()
1912 throtl_log(&td->service_queue, "upgrade to max"); in throtl_upgrade_state()
1913 td->limit_index = LIMIT_MAX; in throtl_upgrade_state()
1914 td->low_upgrade_time = jiffies; in throtl_upgrade_state()
1915 td->scale = 0; in throtl_upgrade_state()
1917 blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { in throtl_upgrade_state()
1919 struct throtl_service_queue *sq = &tg->service_queue; in throtl_upgrade_state()
1921 tg->disptime = jiffies - 1; in throtl_upgrade_state()
1926 throtl_select_dispatch(&td->service_queue); in throtl_upgrade_state()
1927 throtl_schedule_next_dispatch(&td->service_queue, true); in throtl_upgrade_state()
1928 queue_work(kthrotld_workqueue, &td->dispatch_work); in throtl_upgrade_state()
1933 td->scale /= 2; in throtl_downgrade_state()
1935 throtl_log(&td->service_queue, "downgrade, scale %d", td->scale); in throtl_downgrade_state()
1936 if (td->scale) { in throtl_downgrade_state()
1937 td->low_upgrade_time = jiffies - td->scale * td->throtl_slice; in throtl_downgrade_state()
1941 td->limit_index = LIMIT_LOW; in throtl_downgrade_state()
1942 td->low_downgrade_time = jiffies; in throtl_downgrade_state()
1947 struct throtl_data *td = tg->td; in throtl_tg_can_downgrade()
1954 if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) && in throtl_tg_can_downgrade()
1956 td->throtl_slice) && in throtl_tg_can_downgrade()
1958 !list_empty(&tg_to_blkg(tg)->blkcg->css.children))) in throtl_tg_can_downgrade()
1968 tg = sq_to_tg(tg->service_queue.parent_sq); in throtl_hierarchy_can_downgrade()
1969 if (!tg || !tg_to_blkg(tg)->parent) in throtl_hierarchy_can_downgrade()
1982 if (tg->td->limit_index != LIMIT_MAX || in throtl_downgrade_check()
1983 !tg->td->limit_valid[LIMIT_LOW]) in throtl_downgrade_check()
1985 if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children)) in throtl_downgrade_check()
1987 if (time_after(tg->last_check_time + tg->td->throtl_slice, now)) in throtl_downgrade_check()
1990 elapsed_time = now - tg->last_check_time; in throtl_downgrade_check()
1991 tg->last_check_time = now; in throtl_downgrade_check()
1994 tg->td->throtl_slice)) in throtl_downgrade_check()
1997 if (tg->bps[READ][LIMIT_LOW]) { in throtl_downgrade_check()
1998 bps = tg->last_bytes_disp[READ] * HZ; in throtl_downgrade_check()
2000 if (bps >= tg->bps[READ][LIMIT_LOW]) in throtl_downgrade_check()
2001 tg->last_low_overflow_time[READ] = now; in throtl_downgrade_check()
2004 if (tg->bps[WRITE][LIMIT_LOW]) { in throtl_downgrade_check()
2005 bps = tg->last_bytes_disp[WRITE] * HZ; in throtl_downgrade_check()
2007 if (bps >= tg->bps[WRITE][LIMIT_LOW]) in throtl_downgrade_check()
2008 tg->last_low_overflow_time[WRITE] = now; in throtl_downgrade_check()
2011 if (tg->iops[READ][LIMIT_LOW]) { in throtl_downgrade_check()
2012 iops = tg->last_io_disp[READ] * HZ / elapsed_time; in throtl_downgrade_check()
2013 if (iops >= tg->iops[READ][LIMIT_LOW]) in throtl_downgrade_check()
2014 tg->last_low_overflow_time[READ] = now; in throtl_downgrade_check()
2017 if (tg->iops[WRITE][LIMIT_LOW]) { in throtl_downgrade_check()
2018 iops = tg->last_io_disp[WRITE] * HZ / elapsed_time; in throtl_downgrade_check()
2019 if (iops >= tg->iops[WRITE][LIMIT_LOW]) in throtl_downgrade_check()
2020 tg->last_low_overflow_time[WRITE] = now; in throtl_downgrade_check()
2028 throtl_downgrade_state(tg->td); in throtl_downgrade_check()
2030 tg->last_bytes_disp[READ] = 0; in throtl_downgrade_check()
2031 tg->last_bytes_disp[WRITE] = 0; in throtl_downgrade_check()
2032 tg->last_io_disp[READ] = 0; in throtl_downgrade_check()
2033 tg->last_io_disp[WRITE] = 0; in throtl_downgrade_check()
2039 unsigned long last_finish_time = tg->last_finish_time; in blk_throtl_update_idletime()
2046 last_finish_time == tg->checked_last_finish_time) in blk_throtl_update_idletime()
2049 tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3; in blk_throtl_update_idletime()
2050 tg->checked_last_finish_time = last_finish_time; in blk_throtl_update_idletime()
2058 unsigned long latency[2]; in throtl_update_latency_buckets() local
2060 if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW]) in throtl_update_latency_buckets()
2062 if (time_before(jiffies, td->last_calculate_time + HZ)) in throtl_update_latency_buckets()
2064 td->last_calculate_time = jiffies; in throtl_update_latency_buckets()
2069 struct latency_bucket *tmp = &td->tmp_buckets[rw][i]; in throtl_update_latency_buckets()
2075 bucket = per_cpu_ptr(td->latency_buckets[rw], in throtl_update_latency_buckets()
2077 tmp->total_latency += bucket[i].total_latency; in throtl_update_latency_buckets()
2078 tmp->samples += bucket[i].samples; in throtl_update_latency_buckets()
2083 if (tmp->samples >= 32) { in throtl_update_latency_buckets()
2084 int samples = tmp->samples; in throtl_update_latency_buckets()
2086 latency[rw] = tmp->total_latency; in throtl_update_latency_buckets()
2088 tmp->total_latency = 0; in throtl_update_latency_buckets()
2089 tmp->samples = 0; in throtl_update_latency_buckets()
2090 latency[rw] /= samples; in throtl_update_latency_buckets()
2091 if (latency[rw] == 0) in throtl_update_latency_buckets()
2093 avg_latency[rw][i].latency = latency[rw]; in throtl_update_latency_buckets()
2100 if (!avg_latency[rw][i].latency) { in throtl_update_latency_buckets()
2101 if (td->avg_buckets[rw][i].latency < last_latency[rw]) in throtl_update_latency_buckets()
2102 td->avg_buckets[rw][i].latency = in throtl_update_latency_buckets()
2107 if (!td->avg_buckets[rw][i].valid) in throtl_update_latency_buckets()
2108 latency[rw] = avg_latency[rw][i].latency; in throtl_update_latency_buckets()
2110 latency[rw] = (td->avg_buckets[rw][i].latency * 7 + in throtl_update_latency_buckets()
2111 avg_latency[rw][i].latency) >> 3; in throtl_update_latency_buckets()
2113 td->avg_buckets[rw][i].latency = max(latency[rw], in throtl_update_latency_buckets()
2115 td->avg_buckets[rw][i].valid = true; in throtl_update_latency_buckets()
2116 last_latency[rw] = td->avg_buckets[rw][i].latency; in throtl_update_latency_buckets()
2121 throtl_log(&td->service_queue, in throtl_update_latency_buckets()
2122 "Latency bucket %d: read latency=%ld, read valid=%d, " in throtl_update_latency_buckets()
2123 "write latency=%ld, write valid=%d", i, in throtl_update_latency_buckets()
2124 td->avg_buckets[READ][i].latency, in throtl_update_latency_buckets()
2125 td->avg_buckets[READ][i].valid, in throtl_update_latency_buckets()
2126 td->avg_buckets[WRITE][i].latency, in throtl_update_latency_buckets()
2127 td->avg_buckets[WRITE][i].valid); in throtl_update_latency_buckets()
2159 struct request_queue *q = bdev_get_queue(bio->bi_bdev); in __blk_throtl_bio()
2160 struct blkcg_gq *blkg = bio->bi_blkg; in __blk_throtl_bio()
2166 struct throtl_data *td = tg->td; in __blk_throtl_bio()
2171 blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, in __blk_throtl_bio()
2172 bio->bi_iter.bi_size); in __blk_throtl_bio()
2173 blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); in __blk_throtl_bio()
2176 spin_lock_irq(&q->queue_lock); in __blk_throtl_bio()
2182 sq = &tg->service_queue; in __blk_throtl_bio()
2186 if (tg->last_low_overflow_time[rw] == 0) in __blk_throtl_bio()
2187 tg->last_low_overflow_time[rw] = jiffies; in __blk_throtl_bio()
2190 /* throtl is FIFO - if bios are already queued, should queue */ in __blk_throtl_bio()
2191 if (sq->nr_queued[rw]) in __blk_throtl_bio()
2196 tg->last_low_overflow_time[rw] = jiffies; in __blk_throtl_bio()
2225 qn = &tg->qnode_on_parent[rw]; in __blk_throtl_bio()
2226 sq = sq->parent_sq; in __blk_throtl_bio()
2234 /* out-of-limit, queue to @tg */ in __blk_throtl_bio()
2237 tg->bytes_disp[rw], bio->bi_iter.bi_size, in __blk_throtl_bio()
2239 tg->io_disp[rw], tg_iops_limit(tg, rw), in __blk_throtl_bio()
2240 sq->nr_queued[READ], sq->nr_queued[WRITE]); in __blk_throtl_bio()
2242 tg->last_low_overflow_time[rw] = jiffies; in __blk_throtl_bio()
2244 td->nr_queued[rw]++; in __blk_throtl_bio()
2254 if (tg->flags & THROTL_TG_WAS_EMPTY) { in __blk_throtl_bio()
2256 throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true); in __blk_throtl_bio()
2261 if (throttled || !td->track_bio_latency) in __blk_throtl_bio()
2262 bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY; in __blk_throtl_bio()
2264 spin_unlock_irq(&q->queue_lock); in __blk_throtl_bio()
2275 struct latency_bucket *latency; in throtl_track_latency() local
2278 if (!td || td->limit_index != LIMIT_LOW || in throtl_track_latency()
2280 !blk_queue_nonrot(td->queue)) in throtl_track_latency()
2285 latency = get_cpu_ptr(td->latency_buckets[rw]); in throtl_track_latency()
2286 latency[index].total_latency += time; in throtl_track_latency()
2287 latency[index].samples++; in throtl_track_latency()
2288 put_cpu_ptr(td->latency_buckets[rw]); in throtl_track_latency()
2293 struct request_queue *q = rq->q; in blk_throtl_stat_add()
2294 struct throtl_data *td = q->td; in blk_throtl_stat_add()
2310 blkg = bio->bi_blkg; in blk_throtl_bio_endio()
2314 if (!tg->td->limit_valid[LIMIT_LOW]) in blk_throtl_bio_endio()
2318 tg->last_finish_time = finish_time_ns >> 10; in blk_throtl_bio_endio()
2320 start_time = bio_issue_time(&bio->bi_issue) >> 10; in blk_throtl_bio_endio()
2325 lat = finish_time - start_time; in blk_throtl_bio_endio()
2327 if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY)) in blk_throtl_bio_endio()
2328 throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue), in blk_throtl_bio_endio()
2331 if (tg->latency_target && lat >= tg->td->filtered_latency) { in blk_throtl_bio_endio()
2335 bucket = request_bucket_index(bio_issue_size(&bio->bi_issue)); in blk_throtl_bio_endio()
2336 threshold = tg->td->avg_buckets[rw][bucket].latency + in blk_throtl_bio_endio()
2337 tg->latency_target; in blk_throtl_bio_endio()
2339 tg->bad_bio_cnt++; in blk_throtl_bio_endio()
2344 tg->bio_cnt++; in blk_throtl_bio_endio()
2347 if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) { in blk_throtl_bio_endio()
2348 tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies; in blk_throtl_bio_endio()
2349 tg->bio_cnt /= 2; in blk_throtl_bio_endio()
2350 tg->bad_bio_cnt /= 2; in blk_throtl_bio_endio()
2357 struct request_queue *q = disk->queue; in blk_throtl_init()
2361 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); in blk_throtl_init()
2363 return -ENOMEM; in blk_throtl_init()
2364 td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) * in blk_throtl_init()
2366 if (!td->latency_buckets[READ]) { in blk_throtl_init()
2368 return -ENOMEM; in blk_throtl_init()
2370 td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) * in blk_throtl_init()
2372 if (!td->latency_buckets[WRITE]) { in blk_throtl_init()
2373 free_percpu(td->latency_buckets[READ]); in blk_throtl_init()
2375 return -ENOMEM; in blk_throtl_init()
2378 INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn); in blk_throtl_init()
2379 throtl_service_queue_init(&td->service_queue); in blk_throtl_init()
2381 q->td = td; in blk_throtl_init()
2382 td->queue = q; in blk_throtl_init()
2384 td->limit_valid[LIMIT_MAX] = true; in blk_throtl_init()
2385 td->limit_index = LIMIT_MAX; in blk_throtl_init()
2386 td->low_upgrade_time = jiffies; in blk_throtl_init()
2387 td->low_downgrade_time = jiffies; in blk_throtl_init()
2392 free_percpu(td->latency_buckets[READ]); in blk_throtl_init()
2393 free_percpu(td->latency_buckets[WRITE]); in blk_throtl_init()
2401 struct request_queue *q = disk->queue; in blk_throtl_exit()
2403 BUG_ON(!q->td); in blk_throtl_exit()
2404 del_timer_sync(&q->td->service_queue.pending_timer); in blk_throtl_exit()
2407 free_percpu(q->td->latency_buckets[READ]); in blk_throtl_exit()
2408 free_percpu(q->td->latency_buckets[WRITE]); in blk_throtl_exit()
2409 kfree(q->td); in blk_throtl_exit()
2414 struct request_queue *q = disk->queue; in blk_throtl_register()
2418 td = q->td; in blk_throtl_register()
2422 td->throtl_slice = DFL_THROTL_SLICE_SSD; in blk_throtl_register()
2423 td->filtered_latency = LATENCY_FILTERED_SSD; in blk_throtl_register()
2425 td->throtl_slice = DFL_THROTL_SLICE_HD; in blk_throtl_register()
2426 td->filtered_latency = LATENCY_FILTERED_HD; in blk_throtl_register()
2428 td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY; in blk_throtl_register()
2429 td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY; in blk_throtl_register()
2434 td->throtl_slice = DFL_THROTL_SLICE_HD; in blk_throtl_register()
2437 td->track_bio_latency = !queue_is_mq(q); in blk_throtl_register()
2438 if (!td->track_bio_latency) in blk_throtl_register()
2445 if (!q->td) in blk_throtl_sample_time_show()
2446 return -EINVAL; in blk_throtl_sample_time_show()
2447 return sprintf(page, "%u\n", jiffies_to_msecs(q->td->throtl_slice)); in blk_throtl_sample_time_show()
2456 if (!q->td) in blk_throtl_sample_time_store()
2457 return -EINVAL; in blk_throtl_sample_time_store()
2459 return -EINVAL; in blk_throtl_sample_time_store()
2462 return -EINVAL; in blk_throtl_sample_time_store()
2463 q->td->throtl_slice = t; in blk_throtl_sample_time_store()