1 /*
2  *  Deadline i/o scheduler.
3  *
4  *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
5  */
6 #include <linux/kernel.h>
7 #include <linux/fs.h>
8 #include <linux/blkdev.h>
9 #include <linux/elevator.h>
10 #include <linux/bio.h>
11 #include <linux/module.h>
12 #include <linux/slab.h>
13 #include <linux/init.h>
14 #include <linux/compiler.h>
15 #include <linux/rbtree.h>
16 
17 /*
18  * See Documentation/block/deadline-iosched.txt
19  */
20 static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
21 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
22 static const int writes_starved = 2;    /* max times reads can starve a write */
23 static const int fifo_batch = 16;       /* # of sequential requests treated as one
24 				     by the above parameters. For throughput. */
25 
26 struct deadline_data {
27 	/*
28 	 * run time data
29 	 */
30 
31 	/*
32 	 * requests (deadline_rq s) are present on both sort_list and fifo_list
33 	 */
34 	struct rb_root sort_list[2];
35 	struct list_head fifo_list[2];
36 
37 	/*
38 	 * next in sort order. read, write or both are NULL
39 	 */
40 	struct request *next_rq[2];
41 	unsigned int batching;		/* number of sequential requests made */
42 	unsigned int starved;		/* times reads have starved writes */
43 
44 	/*
45 	 * settings that change how the i/o scheduler behaves
46 	 */
47 	int fifo_expire[2];
48 	int fifo_batch;
49 	int writes_starved;
50 	int front_merges;
51 };
52 
53 static inline struct rb_root *
deadline_rb_root(struct deadline_data * dd,struct request * rq)54 deadline_rb_root(struct deadline_data *dd, struct request *rq)
55 {
56 	return &dd->sort_list[rq_data_dir(rq)];
57 }
58 
59 /*
60  * get the request after `rq' in sector-sorted order
61  */
62 static inline struct request *
deadline_latter_request(struct request * rq)63 deadline_latter_request(struct request *rq)
64 {
65 	struct rb_node *node = rb_next(&rq->rb_node);
66 
67 	if (node)
68 		return rb_entry_rq(node);
69 
70 	return NULL;
71 }
72 
73 static void
deadline_add_rq_rb(struct deadline_data * dd,struct request * rq)74 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
75 {
76 	struct rb_root *root = deadline_rb_root(dd, rq);
77 
78 	elv_rb_add(root, rq);
79 }
80 
81 static inline void
deadline_del_rq_rb(struct deadline_data * dd,struct request * rq)82 deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
83 {
84 	const int data_dir = rq_data_dir(rq);
85 
86 	if (dd->next_rq[data_dir] == rq)
87 		dd->next_rq[data_dir] = deadline_latter_request(rq);
88 
89 	elv_rb_del(deadline_rb_root(dd, rq), rq);
90 }
91 
92 /*
93  * add rq to rbtree and fifo
94  */
95 static void
deadline_add_request(struct request_queue * q,struct request * rq)96 deadline_add_request(struct request_queue *q, struct request *rq)
97 {
98 	struct deadline_data *dd = q->elevator->elevator_data;
99 	const int data_dir = rq_data_dir(rq);
100 
101 	/*
102 	 * This may be a requeue of a write request that has locked its
103 	 * target zone. If it is the case, this releases the zone lock.
104 	 */
105 	blk_req_zone_write_unlock(rq);
106 
107 	deadline_add_rq_rb(dd, rq);
108 
109 	/*
110 	 * set expire time and add to fifo list
111 	 */
112 	rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
113 	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
114 }
115 
116 /*
117  * remove rq from rbtree and fifo.
118  */
deadline_remove_request(struct request_queue * q,struct request * rq)119 static void deadline_remove_request(struct request_queue *q, struct request *rq)
120 {
121 	struct deadline_data *dd = q->elevator->elevator_data;
122 
123 	rq_fifo_clear(rq);
124 	deadline_del_rq_rb(dd, rq);
125 }
126 
127 static enum elv_merge
deadline_merge(struct request_queue * q,struct request ** req,struct bio * bio)128 deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
129 {
130 	struct deadline_data *dd = q->elevator->elevator_data;
131 	struct request *__rq;
132 
133 	/*
134 	 * check for front merge
135 	 */
136 	if (dd->front_merges) {
137 		sector_t sector = bio_end_sector(bio);
138 
139 		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
140 		if (__rq) {
141 			BUG_ON(sector != blk_rq_pos(__rq));
142 
143 			if (elv_bio_merge_ok(__rq, bio)) {
144 				*req = __rq;
145 				return ELEVATOR_FRONT_MERGE;
146 			}
147 		}
148 	}
149 
150 	return ELEVATOR_NO_MERGE;
151 }
152 
deadline_merged_request(struct request_queue * q,struct request * req,enum elv_merge type)153 static void deadline_merged_request(struct request_queue *q,
154 				    struct request *req, enum elv_merge type)
155 {
156 	struct deadline_data *dd = q->elevator->elevator_data;
157 
158 	/*
159 	 * if the merge was a front merge, we need to reposition request
160 	 */
161 	if (type == ELEVATOR_FRONT_MERGE) {
162 		elv_rb_del(deadline_rb_root(dd, req), req);
163 		deadline_add_rq_rb(dd, req);
164 	}
165 }
166 
167 static void
deadline_merged_requests(struct request_queue * q,struct request * req,struct request * next)168 deadline_merged_requests(struct request_queue *q, struct request *req,
169 			 struct request *next)
170 {
171 	/*
172 	 * if next expires before rq, assign its expire time to rq
173 	 * and move into next position (next will be deleted) in fifo
174 	 */
175 	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
176 		if (time_before((unsigned long)next->fifo_time,
177 				(unsigned long)req->fifo_time)) {
178 			list_move(&req->queuelist, &next->queuelist);
179 			req->fifo_time = next->fifo_time;
180 		}
181 	}
182 
183 	/*
184 	 * kill knowledge of next, this one is a goner
185 	 */
186 	deadline_remove_request(q, next);
187 }
188 
189 /*
190  * move request from sort list to dispatch queue.
191  */
192 static inline void
deadline_move_to_dispatch(struct deadline_data * dd,struct request * rq)193 deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
194 {
195 	struct request_queue *q = rq->q;
196 
197 	/*
198 	 * For a zoned block device, write requests must write lock their
199 	 * target zone.
200 	 */
201 	blk_req_zone_write_lock(rq);
202 
203 	deadline_remove_request(q, rq);
204 	elv_dispatch_add_tail(q, rq);
205 }
206 
207 /*
208  * move an entry to dispatch queue
209  */
210 static void
deadline_move_request(struct deadline_data * dd,struct request * rq)211 deadline_move_request(struct deadline_data *dd, struct request *rq)
212 {
213 	const int data_dir = rq_data_dir(rq);
214 
215 	dd->next_rq[READ] = NULL;
216 	dd->next_rq[WRITE] = NULL;
217 	dd->next_rq[data_dir] = deadline_latter_request(rq);
218 
219 	/*
220 	 * take it off the sort and fifo list, move
221 	 * to dispatch queue
222 	 */
223 	deadline_move_to_dispatch(dd, rq);
224 }
225 
226 /*
227  * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
228  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
229  */
deadline_check_fifo(struct deadline_data * dd,int ddir)230 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
231 {
232 	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
233 
234 	/*
235 	 * rq is expired!
236 	 */
237 	if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
238 		return 1;
239 
240 	return 0;
241 }
242 
243 /*
244  * For the specified data direction, return the next request to dispatch using
245  * arrival ordered lists.
246  */
247 static struct request *
deadline_fifo_request(struct deadline_data * dd,int data_dir)248 deadline_fifo_request(struct deadline_data *dd, int data_dir)
249 {
250 	struct request *rq;
251 
252 	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
253 		return NULL;
254 
255 	if (list_empty(&dd->fifo_list[data_dir]))
256 		return NULL;
257 
258 	rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
259 	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
260 		return rq;
261 
262 	/*
263 	 * Look for a write request that can be dispatched, that is one with
264 	 * an unlocked target zone.
265 	 */
266 	list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
267 		if (blk_req_can_dispatch_to_zone(rq))
268 			return rq;
269 	}
270 
271 	return NULL;
272 }
273 
274 /*
275  * For the specified data direction, return the next request to dispatch using
276  * sector position sorted lists.
277  */
278 static struct request *
deadline_next_request(struct deadline_data * dd,int data_dir)279 deadline_next_request(struct deadline_data *dd, int data_dir)
280 {
281 	struct request *rq;
282 
283 	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
284 		return NULL;
285 
286 	rq = dd->next_rq[data_dir];
287 	if (!rq)
288 		return NULL;
289 
290 	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
291 		return rq;
292 
293 	/*
294 	 * Look for a write request that can be dispatched, that is one with
295 	 * an unlocked target zone.
296 	 */
297 	while (rq) {
298 		if (blk_req_can_dispatch_to_zone(rq))
299 			return rq;
300 		rq = deadline_latter_request(rq);
301 	}
302 
303 	return NULL;
304 }
305 
306 /*
307  * deadline_dispatch_requests selects the best request according to
308  * read/write expire, fifo_batch, etc
309  */
deadline_dispatch_requests(struct request_queue * q,int force)310 static int deadline_dispatch_requests(struct request_queue *q, int force)
311 {
312 	struct deadline_data *dd = q->elevator->elevator_data;
313 	const int reads = !list_empty(&dd->fifo_list[READ]);
314 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
315 	struct request *rq, *next_rq;
316 	int data_dir;
317 
318 	/*
319 	 * batches are currently reads XOR writes
320 	 */
321 	rq = deadline_next_request(dd, WRITE);
322 	if (!rq)
323 		rq = deadline_next_request(dd, READ);
324 
325 	if (rq && dd->batching < dd->fifo_batch)
326 		/* we have a next request are still entitled to batch */
327 		goto dispatch_request;
328 
329 	/*
330 	 * at this point we are not running a batch. select the appropriate
331 	 * data direction (read / write)
332 	 */
333 
334 	if (reads) {
335 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
336 
337 		if (deadline_fifo_request(dd, WRITE) &&
338 		    (dd->starved++ >= dd->writes_starved))
339 			goto dispatch_writes;
340 
341 		data_dir = READ;
342 
343 		goto dispatch_find_request;
344 	}
345 
346 	/*
347 	 * there are either no reads or writes have been starved
348 	 */
349 
350 	if (writes) {
351 dispatch_writes:
352 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
353 
354 		dd->starved = 0;
355 
356 		data_dir = WRITE;
357 
358 		goto dispatch_find_request;
359 	}
360 
361 	return 0;
362 
363 dispatch_find_request:
364 	/*
365 	 * we are not running a batch, find best request for selected data_dir
366 	 */
367 	next_rq = deadline_next_request(dd, data_dir);
368 	if (deadline_check_fifo(dd, data_dir) || !next_rq) {
369 		/*
370 		 * A deadline has expired, the last request was in the other
371 		 * direction, or we have run out of higher-sectored requests.
372 		 * Start again from the request with the earliest expiry time.
373 		 */
374 		rq = deadline_fifo_request(dd, data_dir);
375 	} else {
376 		/*
377 		 * The last req was the same dir and we have a next request in
378 		 * sort order. No expired requests so continue on from here.
379 		 */
380 		rq = next_rq;
381 	}
382 
383 	/*
384 	 * For a zoned block device, if we only have writes queued and none of
385 	 * them can be dispatched, rq will be NULL.
386 	 */
387 	if (!rq)
388 		return 0;
389 
390 	dd->batching = 0;
391 
392 dispatch_request:
393 	/*
394 	 * rq is the selected appropriate request.
395 	 */
396 	dd->batching++;
397 	deadline_move_request(dd, rq);
398 
399 	return 1;
400 }
401 
402 /*
403  * For zoned block devices, write unlock the target zone of completed
404  * write requests.
405  */
406 static void
deadline_completed_request(struct request_queue * q,struct request * rq)407 deadline_completed_request(struct request_queue *q, struct request *rq)
408 {
409 	blk_req_zone_write_unlock(rq);
410 }
411 
deadline_exit_queue(struct elevator_queue * e)412 static void deadline_exit_queue(struct elevator_queue *e)
413 {
414 	struct deadline_data *dd = e->elevator_data;
415 
416 	BUG_ON(!list_empty(&dd->fifo_list[READ]));
417 	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
418 
419 	kfree(dd);
420 }
421 
422 /*
423  * initialize elevator private data (deadline_data).
424  */
deadline_init_queue(struct request_queue * q,struct elevator_type * e)425 static int deadline_init_queue(struct request_queue *q, struct elevator_type *e)
426 {
427 	struct deadline_data *dd;
428 	struct elevator_queue *eq;
429 
430 	eq = elevator_alloc(q, e);
431 	if (!eq)
432 		return -ENOMEM;
433 
434 	dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
435 	if (!dd) {
436 		kobject_put(&eq->kobj);
437 		return -ENOMEM;
438 	}
439 	eq->elevator_data = dd;
440 
441 	INIT_LIST_HEAD(&dd->fifo_list[READ]);
442 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
443 	dd->sort_list[READ] = RB_ROOT;
444 	dd->sort_list[WRITE] = RB_ROOT;
445 	dd->fifo_expire[READ] = read_expire;
446 	dd->fifo_expire[WRITE] = write_expire;
447 	dd->writes_starved = writes_starved;
448 	dd->front_merges = 1;
449 	dd->fifo_batch = fifo_batch;
450 
451 	spin_lock_irq(q->queue_lock);
452 	q->elevator = eq;
453 	spin_unlock_irq(q->queue_lock);
454 	return 0;
455 }
456 
457 /*
458  * sysfs parts below
459  */
460 
461 static ssize_t
deadline_var_show(int var,char * page)462 deadline_var_show(int var, char *page)
463 {
464 	return sprintf(page, "%d\n", var);
465 }
466 
467 static void
deadline_var_store(int * var,const char * page)468 deadline_var_store(int *var, const char *page)
469 {
470 	char *p = (char *) page;
471 
472 	*var = simple_strtol(p, &p, 10);
473 }
474 
475 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
476 static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
477 {									\
478 	struct deadline_data *dd = e->elevator_data;			\
479 	int __data = __VAR;						\
480 	if (__CONV)							\
481 		__data = jiffies_to_msecs(__data);			\
482 	return deadline_var_show(__data, (page));			\
483 }
484 SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
485 SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
486 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
487 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
488 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
489 #undef SHOW_FUNCTION
490 
491 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
492 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
493 {									\
494 	struct deadline_data *dd = e->elevator_data;			\
495 	int __data;							\
496 	deadline_var_store(&__data, (page));				\
497 	if (__data < (MIN))						\
498 		__data = (MIN);						\
499 	else if (__data > (MAX))					\
500 		__data = (MAX);						\
501 	if (__CONV)							\
502 		*(__PTR) = msecs_to_jiffies(__data);			\
503 	else								\
504 		*(__PTR) = __data;					\
505 	return count;							\
506 }
507 STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
508 STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
509 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
510 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
511 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
512 #undef STORE_FUNCTION
513 
514 #define DD_ATTR(name) \
515 	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
516 
517 static struct elv_fs_entry deadline_attrs[] = {
518 	DD_ATTR(read_expire),
519 	DD_ATTR(write_expire),
520 	DD_ATTR(writes_starved),
521 	DD_ATTR(front_merges),
522 	DD_ATTR(fifo_batch),
523 	__ATTR_NULL
524 };
525 
526 static struct elevator_type iosched_deadline = {
527 	.ops.sq = {
528 		.elevator_merge_fn = 		deadline_merge,
529 		.elevator_merged_fn =		deadline_merged_request,
530 		.elevator_merge_req_fn =	deadline_merged_requests,
531 		.elevator_dispatch_fn =		deadline_dispatch_requests,
532 		.elevator_completed_req_fn =	deadline_completed_request,
533 		.elevator_add_req_fn =		deadline_add_request,
534 		.elevator_former_req_fn =	elv_rb_former_request,
535 		.elevator_latter_req_fn =	elv_rb_latter_request,
536 		.elevator_init_fn =		deadline_init_queue,
537 		.elevator_exit_fn =		deadline_exit_queue,
538 	},
539 
540 	.elevator_attrs = deadline_attrs,
541 	.elevator_name = "deadline",
542 	.elevator_owner = THIS_MODULE,
543 };
544 
deadline_init(void)545 static int __init deadline_init(void)
546 {
547 	return elv_register(&iosched_deadline);
548 }
549 
deadline_exit(void)550 static void __exit deadline_exit(void)
551 {
552 	elv_unregister(&iosched_deadline);
553 }
554 
555 module_init(deadline_init);
556 module_exit(deadline_exit);
557 
558 MODULE_AUTHOR("Jens Axboe");
559 MODULE_LICENSE("GPL");
560 MODULE_DESCRIPTION("deadline IO scheduler");
561