1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.
4  *
5  * This file is released under the GPL.
6  */
7 
8 #include "dm.h"
9 #include "dm-bio-prison-v1.h"
10 #include "dm-bio-prison-v2.h"
11 
12 #include <linux/spinlock.h>
13 #include <linux/mempool.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 
17 /*----------------------------------------------------------------*/
18 
19 #define MIN_CELLS 1024
20 
21 struct prison_region {
22 	spinlock_t lock;
23 	struct rb_root cell;
24 } ____cacheline_aligned_in_smp;
25 
26 struct dm_bio_prison {
27 	mempool_t cell_pool;
28 	unsigned int num_locks;
29 	struct prison_region regions[];
30 };
31 
32 static struct kmem_cache *_cell_cache;
33 
34 /*----------------------------------------------------------------*/
35 
36 /*
37  * @nr_cells should be the number of cells you want in use _concurrently_.
38  * Don't confuse it with the number of distinct keys.
39  */
dm_bio_prison_create(void)40 struct dm_bio_prison *dm_bio_prison_create(void)
41 {
42 	int ret;
43 	unsigned int i, num_locks;
44 	struct dm_bio_prison *prison;
45 
46 	num_locks = dm_num_hash_locks();
47 	prison = kzalloc(struct_size(prison, regions, num_locks), GFP_KERNEL);
48 	if (!prison)
49 		return NULL;
50 	prison->num_locks = num_locks;
51 
52 	for (i = 0; i < prison->num_locks; i++) {
53 		spin_lock_init(&prison->regions[i].lock);
54 		prison->regions[i].cell = RB_ROOT;
55 	}
56 
57 	ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
58 	if (ret) {
59 		kfree(prison);
60 		return NULL;
61 	}
62 
63 	return prison;
64 }
65 EXPORT_SYMBOL_GPL(dm_bio_prison_create);
66 
dm_bio_prison_destroy(struct dm_bio_prison * prison)67 void dm_bio_prison_destroy(struct dm_bio_prison *prison)
68 {
69 	mempool_exit(&prison->cell_pool);
70 	kfree(prison);
71 }
72 EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
73 
dm_bio_prison_alloc_cell(struct dm_bio_prison * prison,gfp_t gfp)74 struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
75 {
76 	return mempool_alloc(&prison->cell_pool, gfp);
77 }
78 EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
79 
dm_bio_prison_free_cell(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell)80 void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
81 			     struct dm_bio_prison_cell *cell)
82 {
83 	mempool_free(cell, &prison->cell_pool);
84 }
85 EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
86 
__setup_new_cell(struct dm_cell_key * key,struct bio * holder,struct dm_bio_prison_cell * cell)87 static void __setup_new_cell(struct dm_cell_key *key,
88 			     struct bio *holder,
89 			     struct dm_bio_prison_cell *cell)
90 {
91 	memcpy(&cell->key, key, sizeof(cell->key));
92 	cell->holder = holder;
93 	bio_list_init(&cell->bios);
94 }
95 
cmp_keys(struct dm_cell_key * lhs,struct dm_cell_key * rhs)96 static int cmp_keys(struct dm_cell_key *lhs,
97 		    struct dm_cell_key *rhs)
98 {
99 	if (lhs->virtual < rhs->virtual)
100 		return -1;
101 
102 	if (lhs->virtual > rhs->virtual)
103 		return 1;
104 
105 	if (lhs->dev < rhs->dev)
106 		return -1;
107 
108 	if (lhs->dev > rhs->dev)
109 		return 1;
110 
111 	if (lhs->block_end <= rhs->block_begin)
112 		return -1;
113 
114 	if (lhs->block_begin >= rhs->block_end)
115 		return 1;
116 
117 	return 0;
118 }
119 
lock_nr(struct dm_cell_key * key,unsigned int num_locks)120 static inline unsigned int lock_nr(struct dm_cell_key *key, unsigned int num_locks)
121 {
122 	return dm_hash_locks_index((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT),
123 				   num_locks);
124 }
125 
dm_cell_key_has_valid_range(struct dm_cell_key * key)126 bool dm_cell_key_has_valid_range(struct dm_cell_key *key)
127 {
128 	if (WARN_ON_ONCE(key->block_end - key->block_begin > BIO_PRISON_MAX_RANGE))
129 		return false;
130 	if (WARN_ON_ONCE((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) !=
131 			 (key->block_end - 1) >> BIO_PRISON_MAX_RANGE_SHIFT))
132 		return false;
133 
134 	return true;
135 }
136 EXPORT_SYMBOL(dm_cell_key_has_valid_range);
137 
__bio_detain(struct rb_root * root,struct dm_cell_key * key,struct bio * inmate,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)138 static int __bio_detain(struct rb_root *root,
139 			struct dm_cell_key *key,
140 			struct bio *inmate,
141 			struct dm_bio_prison_cell *cell_prealloc,
142 			struct dm_bio_prison_cell **cell_result)
143 {
144 	int r;
145 	struct rb_node **new = &root->rb_node, *parent = NULL;
146 
147 	while (*new) {
148 		struct dm_bio_prison_cell *cell =
149 			rb_entry(*new, struct dm_bio_prison_cell, node);
150 
151 		r = cmp_keys(key, &cell->key);
152 
153 		parent = *new;
154 		if (r < 0)
155 			new = &((*new)->rb_left);
156 		else if (r > 0)
157 			new = &((*new)->rb_right);
158 		else {
159 			if (inmate)
160 				bio_list_add(&cell->bios, inmate);
161 			*cell_result = cell;
162 			return 1;
163 		}
164 	}
165 
166 	__setup_new_cell(key, inmate, cell_prealloc);
167 	*cell_result = cell_prealloc;
168 
169 	rb_link_node(&cell_prealloc->node, parent, new);
170 	rb_insert_color(&cell_prealloc->node, root);
171 
172 	return 0;
173 }
174 
bio_detain(struct dm_bio_prison * prison,struct dm_cell_key * key,struct bio * inmate,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)175 static int bio_detain(struct dm_bio_prison *prison,
176 		      struct dm_cell_key *key,
177 		      struct bio *inmate,
178 		      struct dm_bio_prison_cell *cell_prealloc,
179 		      struct dm_bio_prison_cell **cell_result)
180 {
181 	int r;
182 	unsigned l = lock_nr(key, prison->num_locks);
183 
184 	spin_lock_irq(&prison->regions[l].lock);
185 	r = __bio_detain(&prison->regions[l].cell, key, inmate, cell_prealloc, cell_result);
186 	spin_unlock_irq(&prison->regions[l].lock);
187 
188 	return r;
189 }
190 
dm_bio_detain(struct dm_bio_prison * prison,struct dm_cell_key * key,struct bio * inmate,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)191 int dm_bio_detain(struct dm_bio_prison *prison,
192 		  struct dm_cell_key *key,
193 		  struct bio *inmate,
194 		  struct dm_bio_prison_cell *cell_prealloc,
195 		  struct dm_bio_prison_cell **cell_result)
196 {
197 	return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
198 }
199 EXPORT_SYMBOL_GPL(dm_bio_detain);
200 
dm_get_cell(struct dm_bio_prison * prison,struct dm_cell_key * key,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)201 int dm_get_cell(struct dm_bio_prison *prison,
202 		struct dm_cell_key *key,
203 		struct dm_bio_prison_cell *cell_prealloc,
204 		struct dm_bio_prison_cell **cell_result)
205 {
206 	return bio_detain(prison, key, NULL, cell_prealloc, cell_result);
207 }
208 EXPORT_SYMBOL_GPL(dm_get_cell);
209 
210 /*
211  * @inmates must have been initialised prior to this call
212  */
__cell_release(struct rb_root * root,struct dm_bio_prison_cell * cell,struct bio_list * inmates)213 static void __cell_release(struct rb_root *root,
214 			   struct dm_bio_prison_cell *cell,
215 			   struct bio_list *inmates)
216 {
217 	rb_erase(&cell->node, root);
218 
219 	if (inmates) {
220 		if (cell->holder)
221 			bio_list_add(inmates, cell->holder);
222 		bio_list_merge(inmates, &cell->bios);
223 	}
224 }
225 
dm_cell_release(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell,struct bio_list * bios)226 void dm_cell_release(struct dm_bio_prison *prison,
227 		     struct dm_bio_prison_cell *cell,
228 		     struct bio_list *bios)
229 {
230 	unsigned l = lock_nr(&cell->key, prison->num_locks);
231 
232 	spin_lock_irq(&prison->regions[l].lock);
233 	__cell_release(&prison->regions[l].cell, cell, bios);
234 	spin_unlock_irq(&prison->regions[l].lock);
235 }
236 EXPORT_SYMBOL_GPL(dm_cell_release);
237 
238 /*
239  * Sometimes we don't want the holder, just the additional bios.
240  */
__cell_release_no_holder(struct rb_root * root,struct dm_bio_prison_cell * cell,struct bio_list * inmates)241 static void __cell_release_no_holder(struct rb_root *root,
242 				     struct dm_bio_prison_cell *cell,
243 				     struct bio_list *inmates)
244 {
245 	rb_erase(&cell->node, root);
246 	bio_list_merge(inmates, &cell->bios);
247 }
248 
dm_cell_release_no_holder(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell,struct bio_list * inmates)249 void dm_cell_release_no_holder(struct dm_bio_prison *prison,
250 			       struct dm_bio_prison_cell *cell,
251 			       struct bio_list *inmates)
252 {
253 	unsigned l = lock_nr(&cell->key, prison->num_locks);
254 	unsigned long flags;
255 
256 	spin_lock_irqsave(&prison->regions[l].lock, flags);
257 	__cell_release_no_holder(&prison->regions[l].cell, cell, inmates);
258 	spin_unlock_irqrestore(&prison->regions[l].lock, flags);
259 }
260 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
261 
dm_cell_error(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell,blk_status_t error)262 void dm_cell_error(struct dm_bio_prison *prison,
263 		   struct dm_bio_prison_cell *cell, blk_status_t error)
264 {
265 	struct bio_list bios;
266 	struct bio *bio;
267 
268 	bio_list_init(&bios);
269 	dm_cell_release(prison, cell, &bios);
270 
271 	while ((bio = bio_list_pop(&bios))) {
272 		bio->bi_status = error;
273 		bio_endio(bio);
274 	}
275 }
276 EXPORT_SYMBOL_GPL(dm_cell_error);
277 
dm_cell_visit_release(struct dm_bio_prison * prison,void (* visit_fn)(void *,struct dm_bio_prison_cell *),void * context,struct dm_bio_prison_cell * cell)278 void dm_cell_visit_release(struct dm_bio_prison *prison,
279 			   void (*visit_fn)(void *, struct dm_bio_prison_cell *),
280 			   void *context,
281 			   struct dm_bio_prison_cell *cell)
282 {
283 	unsigned l = lock_nr(&cell->key, prison->num_locks);
284 	spin_lock_irq(&prison->regions[l].lock);
285 	visit_fn(context, cell);
286 	rb_erase(&cell->node, &prison->regions[l].cell);
287 	spin_unlock_irq(&prison->regions[l].lock);
288 }
289 EXPORT_SYMBOL_GPL(dm_cell_visit_release);
290 
__promote_or_release(struct rb_root * root,struct dm_bio_prison_cell * cell)291 static int __promote_or_release(struct rb_root *root,
292 				struct dm_bio_prison_cell *cell)
293 {
294 	if (bio_list_empty(&cell->bios)) {
295 		rb_erase(&cell->node, root);
296 		return 1;
297 	}
298 
299 	cell->holder = bio_list_pop(&cell->bios);
300 	return 0;
301 }
302 
dm_cell_promote_or_release(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell)303 int dm_cell_promote_or_release(struct dm_bio_prison *prison,
304 			       struct dm_bio_prison_cell *cell)
305 {
306 	int r;
307 	unsigned l = lock_nr(&cell->key, prison->num_locks);
308 
309 	spin_lock_irq(&prison->regions[l].lock);
310 	r = __promote_or_release(&prison->regions[l].cell, cell);
311 	spin_unlock_irq(&prison->regions[l].lock);
312 
313 	return r;
314 }
315 EXPORT_SYMBOL_GPL(dm_cell_promote_or_release);
316 
317 /*----------------------------------------------------------------*/
318 
319 #define DEFERRED_SET_SIZE 64
320 
321 struct dm_deferred_entry {
322 	struct dm_deferred_set *ds;
323 	unsigned int count;
324 	struct list_head work_items;
325 };
326 
327 struct dm_deferred_set {
328 	spinlock_t lock;
329 	unsigned int current_entry;
330 	unsigned int sweeper;
331 	struct dm_deferred_entry entries[DEFERRED_SET_SIZE];
332 };
333 
dm_deferred_set_create(void)334 struct dm_deferred_set *dm_deferred_set_create(void)
335 {
336 	int i;
337 	struct dm_deferred_set *ds;
338 
339 	ds = kmalloc(sizeof(*ds), GFP_KERNEL);
340 	if (!ds)
341 		return NULL;
342 
343 	spin_lock_init(&ds->lock);
344 	ds->current_entry = 0;
345 	ds->sweeper = 0;
346 	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
347 		ds->entries[i].ds = ds;
348 		ds->entries[i].count = 0;
349 		INIT_LIST_HEAD(&ds->entries[i].work_items);
350 	}
351 
352 	return ds;
353 }
354 EXPORT_SYMBOL_GPL(dm_deferred_set_create);
355 
dm_deferred_set_destroy(struct dm_deferred_set * ds)356 void dm_deferred_set_destroy(struct dm_deferred_set *ds)
357 {
358 	kfree(ds);
359 }
360 EXPORT_SYMBOL_GPL(dm_deferred_set_destroy);
361 
dm_deferred_entry_inc(struct dm_deferred_set * ds)362 struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds)
363 {
364 	unsigned long flags;
365 	struct dm_deferred_entry *entry;
366 
367 	spin_lock_irqsave(&ds->lock, flags);
368 	entry = ds->entries + ds->current_entry;
369 	entry->count++;
370 	spin_unlock_irqrestore(&ds->lock, flags);
371 
372 	return entry;
373 }
374 EXPORT_SYMBOL_GPL(dm_deferred_entry_inc);
375 
ds_next(unsigned int index)376 static unsigned int ds_next(unsigned int index)
377 {
378 	return (index + 1) % DEFERRED_SET_SIZE;
379 }
380 
__sweep(struct dm_deferred_set * ds,struct list_head * head)381 static void __sweep(struct dm_deferred_set *ds, struct list_head *head)
382 {
383 	while ((ds->sweeper != ds->current_entry) &&
384 	       !ds->entries[ds->sweeper].count) {
385 		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
386 		ds->sweeper = ds_next(ds->sweeper);
387 	}
388 
389 	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
390 		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
391 }
392 
dm_deferred_entry_dec(struct dm_deferred_entry * entry,struct list_head * head)393 void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head)
394 {
395 	unsigned long flags;
396 
397 	spin_lock_irqsave(&entry->ds->lock, flags);
398 	BUG_ON(!entry->count);
399 	--entry->count;
400 	__sweep(entry->ds, head);
401 	spin_unlock_irqrestore(&entry->ds->lock, flags);
402 }
403 EXPORT_SYMBOL_GPL(dm_deferred_entry_dec);
404 
405 /*
406  * Returns 1 if deferred or 0 if no pending items to delay job.
407  */
dm_deferred_set_add_work(struct dm_deferred_set * ds,struct list_head * work)408 int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
409 {
410 	int r = 1;
411 	unsigned int next_entry;
412 
413 	spin_lock_irq(&ds->lock);
414 	if ((ds->sweeper == ds->current_entry) &&
415 	    !ds->entries[ds->current_entry].count)
416 		r = 0;
417 	else {
418 		list_add(work, &ds->entries[ds->current_entry].work_items);
419 		next_entry = ds_next(ds->current_entry);
420 		if (!ds->entries[next_entry].count)
421 			ds->current_entry = next_entry;
422 	}
423 	spin_unlock_irq(&ds->lock);
424 
425 	return r;
426 }
427 EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);
428 
429 /*----------------------------------------------------------------*/
430 
dm_bio_prison_init_v1(void)431 static int __init dm_bio_prison_init_v1(void)
432 {
433 	_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
434 	if (!_cell_cache)
435 		return -ENOMEM;
436 
437 	return 0;
438 }
439 
dm_bio_prison_exit_v1(void)440 static void dm_bio_prison_exit_v1(void)
441 {
442 	kmem_cache_destroy(_cell_cache);
443 	_cell_cache = NULL;
444 }
445 
446 static int (*_inits[])(void) __initdata = {
447 	dm_bio_prison_init_v1,
448 	dm_bio_prison_init_v2,
449 };
450 
451 static void (*_exits[])(void) = {
452 	dm_bio_prison_exit_v1,
453 	dm_bio_prison_exit_v2,
454 };
455 
dm_bio_prison_init(void)456 static int __init dm_bio_prison_init(void)
457 {
458 	const int count = ARRAY_SIZE(_inits);
459 
460 	int r, i;
461 
462 	for (i = 0; i < count; i++) {
463 		r = _inits[i]();
464 		if (r)
465 			goto bad;
466 	}
467 
468 	return 0;
469 
470 bad:
471 	while (i--)
472 		_exits[i]();
473 
474 	return r;
475 }
476 
dm_bio_prison_exit(void)477 static void __exit dm_bio_prison_exit(void)
478 {
479 	int i = ARRAY_SIZE(_exits);
480 
481 	while (i--)
482 		_exits[i]();
483 }
484 
485 /*
486  * module hooks
487  */
488 module_init(dm_bio_prison_init);
489 module_exit(dm_bio_prison_exit);
490 
491 MODULE_DESCRIPTION(DM_NAME " bio prison");
492 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
493 MODULE_LICENSE("GPL");
494