Lines Matching +full:step +full:- +full:down
1 // SPDX-License-Identifier: GPL-2.0
6 * - Monitor latencies in a defined window of time.
7 * - If the minimum latency in the above window exceeds some target, increment
8 * scaling step and scale down queue depth by a factor of 2x. The monitoring
9 * window is then shrunk to 100 / sqrt(scaling step + 1).
10 * - For any window where we don't have solid data on what the latencies
12 * - If latencies look good, decrement scaling step.
13 * - If we're only doing writes, allow the scaling step to go negative. This
15 * scaling step of 0 if reads show up or the heavy writers finish. Unlike
17 * scaling step retains the default step==0 window size.
25 #include <linux/backing-dev.h>
28 #include "blk-stat.h"
29 #include "blk-wbt.h"
30 #include "blk-rq-qos.h"
75 * information to make a firm scale up/down decision.
89 unsigned long last_issue; /* last non-throttled issue */
90 unsigned long last_comp; /* last non-throttled comp */
104 rq->wbt_flags = 0; in wbt_clear_state()
109 return rq->wbt_flags; in wbt_flags()
114 return rq->wbt_flags & WBT_TRACKED; in wbt_is_tracked()
119 return rq->wbt_flags & WBT_READ; in wbt_is_read()
124 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
141 * information to scale up or down, scale up.
148 return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && in rwb_enabled()
149 rwb->enable_state != WBT_STATE_OFF_MANUAL; in rwb_enabled()
168 struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb; in wb_recent_wait()
170 return time_before(jiffies, wb->dirty_sleep + HZ); in wb_recent_wait()
177 return &rwb->rq_wait[WBT_RWQ_KSWAPD]; in get_rq_wait()
179 return &rwb->rq_wait[WBT_RWQ_DISCARD]; in get_rq_wait()
181 return &rwb->rq_wait[WBT_RWQ_BG]; in get_rq_wait()
189 struct rq_wait *rqw = &rwb->rq_wait[i]; in rwb_wake_all()
191 if (wq_has_sleeper(&rqw->wait)) in rwb_wake_all()
192 wake_up_all(&rqw->wait); in rwb_wake_all()
201 inflight = atomic_dec_return(&rqw->inflight); in wbt_rqw_done()
205 * the device does write back caching, drop further down before we in wbt_rqw_done()
209 limit = rwb->wb_background; in wbt_rqw_done()
210 else if (rwb->wc && !wb_recent_wait(rwb)) in wbt_rqw_done()
213 limit = rwb->wb_normal; in wbt_rqw_done()
221 if (wq_has_sleeper(&rqw->wait)) { in wbt_rqw_done()
222 int diff = limit - inflight; in wbt_rqw_done()
224 if (!inflight || diff >= rwb->wb_background / 2) in wbt_rqw_done()
225 wake_up_all(&rqw->wait); in wbt_rqw_done()
250 if (rwb->sync_cookie == rq) { in wbt_done()
251 rwb->sync_issue = 0; in wbt_done()
252 rwb->sync_cookie = NULL; in wbt_done()
256 wb_timestamp(rwb, &rwb->last_comp); in wbt_done()
258 WARN_ON_ONCE(rq == rwb->sync_cookie); in wbt_done()
278 u64 now, issue = READ_ONCE(rwb->sync_issue); in rwb_sync_issue_lat()
280 if (!issue || !rwb->sync_cookie) in rwb_sync_issue_lat()
284 return now - issue; in rwb_sync_issue_lat()
292 ret += atomic_read(&rwb->rq_wait[i].inflight); in wbt_inflight()
306 struct backing_dev_info *bdi = rwb->rqos.disk->bdi; in latency_exceeded()
307 struct rq_depth *rqd = &rwb->rq_depth; in latency_exceeded()
320 if (thislat > rwb->cur_win_nsec || in latency_exceeded()
321 (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { in latency_exceeded()
343 * If the 'min' latency exceeds our target, step down. in latency_exceeded()
345 if (stat[READ].min > rwb->min_lat_nsec) { in latency_exceeded()
351 if (rqd->scale_step) in latency_exceeded()
359 struct backing_dev_info *bdi = rwb->rqos.disk->bdi; in rwb_trace_step()
360 struct rq_depth *rqd = &rwb->rq_depth; in rwb_trace_step()
362 trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, in rwb_trace_step()
363 rwb->wb_background, rwb->wb_normal, rqd->max_depth); in rwb_trace_step()
368 if (rwb->min_lat_nsec == 0) { in calc_wb_limits()
369 rwb->wb_normal = rwb->wb_background = 0; in calc_wb_limits()
370 } else if (rwb->rq_depth.max_depth <= 2) { in calc_wb_limits()
371 rwb->wb_normal = rwb->rq_depth.max_depth; in calc_wb_limits()
372 rwb->wb_background = 1; in calc_wb_limits()
374 rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; in calc_wb_limits()
375 rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; in calc_wb_limits()
381 if (!rq_depth_scale_up(&rwb->rq_depth)) in scale_up()
384 rwb->unknown_cnt = 0; in scale_up()
391 if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) in scale_down()
394 rwb->unknown_cnt = 0; in scale_down()
395 rwb_trace_step(rwb, tracepoint_string("scale down")); in scale_down()
400 struct rq_depth *rqd = &rwb->rq_depth; in rwb_arm_timer()
402 if (rqd->scale_step > 0) { in rwb_arm_timer()
409 rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, in rwb_arm_timer()
410 int_sqrt((rqd->scale_step + 1) << 8)); in rwb_arm_timer()
413 * For step < 0, we don't want to increase/decrease the in rwb_arm_timer()
416 rwb->cur_win_nsec = rwb->win_nsec; in rwb_arm_timer()
419 blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); in rwb_arm_timer()
424 struct rq_wb *rwb = cb->data; in wb_timer_fn()
425 struct rq_depth *rqd = &rwb->rq_depth; in wb_timer_fn()
429 if (!rwb->rqos.disk) in wb_timer_fn()
432 status = latency_exceeded(rwb, cb->stat); in wb_timer_fn()
434 trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight); in wb_timer_fn()
437 * If we exceeded the latency target, step down. If we did not, in wb_timer_fn()
438 * step one level up. If we don't know enough to say either exceeded in wb_timer_fn()
450 * We started a the center step, but don't have a valid in wb_timer_fn()
452 * Allow step to go negative, to increase write perf. in wb_timer_fn()
457 if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) in wb_timer_fn()
462 * case, slowly return to center state (step == 0). in wb_timer_fn()
464 if (rqd->scale_step > 0) in wb_timer_fn()
466 else if (rqd->scale_step < 0) in wb_timer_fn()
474 * Re-arm timer, if we have IO in flight in wb_timer_fn()
476 if (rqd->scale_step || inflight) in wb_timer_fn()
482 struct rq_depth *rqd = &rwb->rq_depth; in wbt_update_limits()
484 rqd->scale_step = 0; in wbt_update_limits()
485 rqd->scaled_max = false; in wbt_update_limits()
505 return RQWB(rqos)->min_lat_nsec; in wbt_get_min_lat()
514 RQWB(rqos)->min_lat_nsec = val; in wbt_set_min_lat()
516 RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; in wbt_set_min_lat()
518 RQWB(rqos)->enable_state = WBT_STATE_OFF_MANUAL; in wbt_set_min_lat()
528 return time_before(now, rwb->last_issue + HZ / 10) || in close_io()
529 time_before(now, rwb->last_comp + HZ / 10); in close_io()
539 return rwb->wb_background; in get_limit()
550 limit = rwb->rq_depth.max_depth; in get_limit()
556 limit = rwb->wb_background; in get_limit()
558 limit = rwb->wb_normal; in get_limit()
572 return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf)); in wbt_inflight_cb()
578 wbt_rqw_done(data->rwb, rqw, data->wb_acct); in wbt_cleanup_cb()
605 if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == in wbt_should_throttle()
645 * If we do sleep, we'll release and re-grab it.
655 wb_timestamp(rwb, &rwb->last_issue); in wbt_wait()
659 __wbt_wait(rwb, flags, bio->bi_opf); in wbt_wait()
661 if (!blk_stat_is_active(rwb->cb)) in wbt_wait()
668 rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); in wbt_track()
685 if (wbt_is_read(rq) && !rwb->sync_issue) { in wbt_issue()
686 rwb->sync_cookie = rq; in wbt_issue()
687 rwb->sync_issue = rq->io_start_time_ns; in wbt_issue()
696 if (rq == rwb->sync_cookie) { in wbt_requeue()
697 rwb->sync_issue = 0; in wbt_requeue()
698 rwb->sync_cookie = NULL; in wbt_requeue()
706 RQWB(rqos)->wc = write_cache_on; in wbt_set_write_cache()
714 struct request_queue *q = disk->queue; in wbt_enable_default()
718 if (q->elevator && in wbt_enable_default()
719 test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags)) in wbt_enable_default()
725 if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) in wbt_enable_default()
726 RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; in wbt_enable_default()
730 /* Queue not registered? Maybe shutting down... */ in wbt_enable_default()
742 * We default to 2msec for non-rotational storage, and 75msec in wbt_default_latency_nsec()
761 return -1; in wbt_data_dir()
766 RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); in wbt_queue_depth_changed()
774 blk_stat_remove_callback(rqos->disk->queue, rwb->cb); in wbt_exit()
775 blk_stat_free_callback(rwb->cb); in wbt_exit()
784 struct rq_qos *rqos = wbt_rq_qos(disk->queue); in wbt_disable_default()
789 if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { in wbt_disable_default()
790 blk_stat_deactivate(rwb->cb); in wbt_disable_default()
791 rwb->enable_state = WBT_STATE_OFF_DEFAULT; in wbt_disable_default()
802 seq_printf(m, "%llu\n", rwb->cur_win_nsec); in wbt_curr_win_nsec_show()
811 seq_printf(m, "%d\n", rwb->enable_state); in wbt_enabled_show()
819 seq_printf(m, "%u\n", rqos->id); in wbt_id_show()
831 atomic_read(&rwb->rq_wait[i].inflight)); in wbt_inflight_show()
840 seq_printf(m, "%lu\n", rwb->min_lat_nsec); in wbt_min_lat_nsec_show()
849 seq_printf(m, "%u\n", rwb->unknown_cnt); in wbt_unknown_cnt_show()
858 seq_printf(m, "%u\n", rwb->wb_normal); in wbt_normal_show()
867 seq_printf(m, "%u\n", rwb->wb_background); in wbt_background_show()
900 struct request_queue *q = disk->queue; in wbt_init()
907 return -ENOMEM; in wbt_init()
909 rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); in wbt_init()
910 if (!rwb->cb) { in wbt_init()
912 return -ENOMEM; in wbt_init()
916 rq_wait_init(&rwb->rq_wait[i]); in wbt_init()
918 rwb->last_comp = rwb->last_issue = jiffies; in wbt_init()
919 rwb->win_nsec = RWB_WINDOW_NSEC; in wbt_init()
920 rwb->enable_state = WBT_STATE_ON_DEFAULT; in wbt_init()
921 rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags); in wbt_init()
922 rwb->rq_depth.default_depth = RWB_DEF_DEPTH; in wbt_init()
923 rwb->min_lat_nsec = wbt_default_latency_nsec(q); in wbt_init()
924 rwb->rq_depth.queue_depth = blk_queue_depth(q); in wbt_init()
930 mutex_lock(&q->rq_qos_mutex); in wbt_init()
931 ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); in wbt_init()
932 mutex_unlock(&q->rq_qos_mutex); in wbt_init()
936 blk_stat_add_callback(q, rwb->cb); in wbt_init()
941 blk_stat_free_callback(rwb->cb); in wbt_init()