Lines Matching +full:dc +full:- +full:to +full:- +full:dc

1 // SPDX-License-Identifier: GPL-2.0
3 * bcache setup/teardown code, and some metadata io - read a superblock and
4 * figure out what to do with it.
66 unsigned int bucket_size = le16_to_cpu(s->bucket_size); in get_bucket_size()
68 if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { in get_bucket_size()
72 max = sizeof(unsigned int) * BITS_PER_BYTE - 1; in get_bucket_size()
73 order = le16_to_cpu(s->bucket_size); in get_bucket_size()
84 le16_to_cpu(s->obso_bucket_size_hi) << 16; in get_bucket_size()
97 sb->first_bucket= le16_to_cpu(s->first_bucket); in read_super_common()
98 sb->nbuckets = le64_to_cpu(s->nbuckets); in read_super_common()
99 sb->bucket_size = get_bucket_size(sb, s); in read_super_common()
101 sb->nr_in_set = le16_to_cpu(s->nr_in_set); in read_super_common()
102 sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); in read_super_common()
105 if (sb->keys > SB_JOURNAL_BUCKETS) in read_super_common()
109 if (sb->nbuckets > LONG_MAX) in read_super_common()
113 if (sb->nbuckets < 1 << 7) in read_super_common()
117 if (!is_power_of_2(sb->block_size)) in read_super_common()
121 if (sb->block_size > PAGE_SECTORS) in read_super_common()
125 if (!is_power_of_2(sb->bucket_size)) in read_super_common()
129 if (sb->bucket_size < PAGE_SECTORS) in read_super_common()
133 if (get_capacity(bdev->bd_disk) < in read_super_common()
134 sb->bucket_size * sb->nbuckets) in read_super_common()
138 if (bch_is_zero(sb->set_uuid, 16)) in read_super_common()
142 if (!sb->nr_in_set || in read_super_common()
143 sb->nr_in_set <= sb->nr_this_dev || in read_super_common()
144 sb->nr_in_set > MAX_CACHES_PER_SET) in read_super_common()
148 for (i = 0; i < sb->keys; i++) in read_super_common()
149 if (sb->d[i] != sb->first_bucket + i) in read_super_common()
153 if (sb->first_bucket + sb->keys > sb->nbuckets) in read_super_common()
157 if (sb->first_bucket * sb->bucket_size < 16) in read_super_common()
174 page = read_cache_page_gfp(bdev->bd_inode->i_mapping, in read_super()
180 sb->offset = le64_to_cpu(s->offset); in read_super()
181 sb->version = le64_to_cpu(s->version); in read_super()
183 memcpy(sb->magic, s->magic, 16); in read_super()
184 memcpy(sb->uuid, s->uuid, 16); in read_super()
185 memcpy(sb->set_uuid, s->set_uuid, 16); in read_super()
186 memcpy(sb->label, s->label, SB_LABEL_SIZE); in read_super()
188 sb->flags = le64_to_cpu(s->flags); in read_super()
189 sb->seq = le64_to_cpu(s->seq); in read_super()
190 sb->last_mount = le32_to_cpu(s->last_mount); in read_super()
191 sb->keys = le16_to_cpu(s->keys); in read_super()
194 sb->d[i] = le64_to_cpu(s->d[i]); in read_super()
197 sb->version, sb->flags, sb->seq, sb->keys); in read_super()
200 if (sb->offset != SB_SECTOR) in read_super()
204 if (memcmp(sb->magic, bcache_magic, 16)) in read_super()
208 if (s->csum != csum_set(s)) in read_super()
212 if (bch_is_zero(sb->uuid, 16)) in read_super()
215 sb->block_size = le16_to_cpu(s->block_size); in read_super()
218 if (sb->block_size << 9 < bdev_logical_block_size(bdev)) in read_super()
221 switch (sb->version) { in read_super()
223 sb->data_offset = BDEV_DATA_START_DEFAULT; in read_super()
227 sb->data_offset = le64_to_cpu(s->data_offset); in read_super()
230 if (sb->data_offset < BDEV_DATA_START_DEFAULT) in read_super()
245 sb->feature_compat = le64_to_cpu(s->feature_compat); in read_super()
246 sb->feature_incompat = le64_to_cpu(s->feature_incompat); in read_super()
247 sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); in read_super()
254 err = "Unsupported read-only compatible feature found"; in read_super()
271 sb->last_mount = (u32)ktime_get_real_seconds(); in read_super()
281 struct cached_dev *dc = bio->bi_private; in write_bdev_super_endio() local
283 if (bio->bi_status) in write_bdev_super_endio()
284 bch_count_backing_io_errors(dc, bio); in write_bdev_super_endio()
286 closure_put(&dc->sb_write); in write_bdev_super_endio()
294 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META; in __write_super()
295 bio->bi_iter.bi_sector = SB_SECTOR; in __write_super()
299 out->offset = cpu_to_le64(sb->offset); in __write_super()
301 memcpy(out->uuid, sb->uuid, 16); in __write_super()
302 memcpy(out->set_uuid, sb->set_uuid, 16); in __write_super()
303 memcpy(out->label, sb->label, SB_LABEL_SIZE); in __write_super()
305 out->flags = cpu_to_le64(sb->flags); in __write_super()
306 out->seq = cpu_to_le64(sb->seq); in __write_super()
308 out->last_mount = cpu_to_le32(sb->last_mount); in __write_super()
309 out->first_bucket = cpu_to_le16(sb->first_bucket); in __write_super()
310 out->keys = cpu_to_le16(sb->keys); in __write_super()
312 for (i = 0; i < sb->keys; i++) in __write_super()
313 out->d[i] = cpu_to_le64(sb->d[i]); in __write_super()
315 if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { in __write_super()
316 out->feature_compat = cpu_to_le64(sb->feature_compat); in __write_super()
317 out->feature_incompat = cpu_to_le64(sb->feature_incompat); in __write_super()
318 out->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat); in __write_super()
321 out->version = cpu_to_le64(sb->version); in __write_super()
322 out->csum = csum_set(out); in __write_super()
325 sb->version, sb->flags, sb->seq); in __write_super()
332 struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write); in bch_write_bdev_super_unlock() local
334 up(&dc->sb_write_mutex); in bch_write_bdev_super_unlock()
337 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) in bch_write_bdev_super() argument
339 struct closure *cl = &dc->sb_write; in bch_write_bdev_super()
340 struct bio *bio = &dc->sb_bio; in bch_write_bdev_super()
342 down(&dc->sb_write_mutex); in bch_write_bdev_super()
345 bio_init(bio, dc->bdev, dc->sb_bv, 1, 0); in bch_write_bdev_super()
346 bio->bi_end_io = write_bdev_super_endio; in bch_write_bdev_super()
347 bio->bi_private = dc; in bch_write_bdev_super()
350 /* I/O request sent to backing device */ in bch_write_bdev_super()
351 __write_super(&dc->sb, dc->sb_disk, bio); in bch_write_bdev_super()
358 struct cache *ca = bio->bi_private; in write_super_endio()
361 bch_count_io_errors(ca, bio->bi_status, 0, in write_super_endio()
363 closure_put(&ca->set->sb_write); in write_super_endio()
370 up(&c->sb_write_mutex); in bcache_write_super_unlock()
375 struct closure *cl = &c->sb_write; in bcache_write_super()
376 struct cache *ca = c->cache; in bcache_write_super()
377 struct bio *bio = &ca->sb_bio; in bcache_write_super()
380 down(&c->sb_write_mutex); in bcache_write_super()
381 closure_init(cl, &c->cl); in bcache_write_super()
383 ca->sb.seq++; in bcache_write_super()
385 if (ca->sb.version < version) in bcache_write_super()
386 ca->sb.version = version; in bcache_write_super()
388 bio_init(bio, ca->bdev, ca->sb_bv, 1, 0); in bcache_write_super()
389 bio->bi_end_io = write_super_endio; in bcache_write_super()
390 bio->bi_private = ca; in bcache_write_super()
393 __write_super(&ca->sb, ca->sb_disk, bio); in bcache_write_super()
402 struct closure *cl = bio->bi_private; in uuid_endio()
405 cache_set_err_on(bio->bi_status, c, "accessing uuids"); in uuid_endio()
414 up(&c->uuid_write_mutex); in uuid_io_unlock()
420 struct closure *cl = &c->uuid_write; in uuid_io()
426 down(&c->uuid_write_mutex); in uuid_io()
432 bio->bi_opf = opf | REQ_SYNC | REQ_META; in uuid_io()
433 bio->bi_iter.bi_size = KEY_SIZE(k) << 9; in uuid_io()
435 bio->bi_end_io = uuid_endio; in uuid_io()
436 bio->bi_private = cl; in uuid_io()
437 bch_bio_map(bio, c->uuids); in uuid_io()
449 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) in uuid_io()
450 if (!bch_is_zero(u->uuid, 16)) in uuid_io()
452 u - c->uuids, u->uuid, u->label, in uuid_io()
453 u->first_reg, u->last_reg, u->invalidated); in uuid_io()
460 struct bkey *k = &j->uuid_bucket; in uuid_read()
465 bkey_copy(&c->uuid_bucket, k); in uuid_read()
468 if (j->version < BCACHE_JSET_VERSION_UUIDv1) { in uuid_read()
469 struct uuid_entry_v0 *u0 = (void *) c->uuids; in uuid_read()
470 struct uuid_entry *u1 = (void *) c->uuids; in uuid_read()
476 * Since the new uuid entry is bigger than the old, we have to in uuid_read()
478 * in order to do it in place in uuid_read()
481 for (i = c->nr_uuids - 1; in uuid_read()
483 --i) { in uuid_read()
503 struct cache *ca = c->cache; in __uuid_write()
512 size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS; in __uuid_write()
518 atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); in __uuid_write()
520 bkey_copy(&c->uuid_bucket, &k.key); in __uuid_write()
539 for (u = c->uuids; in uuid_find()
540 u < c->uuids + c->nr_uuids; u++) in uuid_find()
541 if (!memcmp(u->uuid, uuid, 16)) in uuid_find()
561 * See alloc.c for an explanation of the gen. The priority is used to implement
565 * The gens and the priorities don't have a whole lot to do with each other, and
566 * it's actually the gens that must be written out at specific times - it's no
571 * to fit them all. The buckets we use to store them form a list; the journal
572 * header points to the first bucket, the first bucket points to the second
576 * of buckets to allocate from) the allocation code will invalidate some
583 struct cache *ca = bio->bi_private; in prio_endio()
585 cache_set_err_on(bio->bi_status, ca->set, "accessing priorities"); in prio_endio()
586 bch_bbio_free(bio, ca->set); in prio_endio()
587 closure_put(&ca->prio); in prio_endio()
592 struct closure *cl = &ca->prio; in prio_io()
593 struct bio *bio = bch_bbio_alloc(ca->set); in prio_io()
597 bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size; in prio_io()
598 bio_set_dev(bio, ca->bdev); in prio_io()
599 bio->bi_iter.bi_size = meta_bucket_bytes(&ca->sb); in prio_io()
601 bio->bi_end_io = prio_endio; in prio_io()
602 bio->bi_private = ca; in prio_io()
603 bio->bi_opf = opf | REQ_SYNC | REQ_META; in prio_io()
604 bch_bio_map(bio, ca->disk_buckets); in prio_io()
606 closure_bio_submit(ca->set, bio, &ca->prio); in prio_io()
617 fifo_used(&ca->free[RESERVE_PRIO]), in bch_prio_write()
618 fifo_used(&ca->free[RESERVE_NONE]), in bch_prio_write()
619 fifo_used(&ca->free_inc)); in bch_prio_write()
622 * Pre-check if there are enough free buckets. In the non-blocking in bch_prio_write()
623 * scenario it's better to fail early rather than starting to allocate in bch_prio_write()
627 size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) + in bch_prio_write()
628 fifo_used(&ca->free[RESERVE_NONE]); in bch_prio_write()
630 return -ENOMEM; in bch_prio_write()
635 lockdep_assert_held(&ca->set->bucket_lock); in bch_prio_write()
637 ca->disk_buckets->seq++; in bch_prio_write()
639 atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), in bch_prio_write()
640 &ca->meta_sectors_written); in bch_prio_write()
642 for (i = prio_buckets(ca) - 1; i >= 0; --i) { in bch_prio_write()
644 struct prio_set *p = ca->disk_buckets; in bch_prio_write()
645 struct bucket_disk *d = p->data; in bch_prio_write()
648 for (b = ca->buckets + i * prios_per_bucket(ca); in bch_prio_write()
649 b < ca->buckets + ca->sb.nbuckets && d < end; in bch_prio_write()
651 d->prio = cpu_to_le16(b->prio); in bch_prio_write()
652 d->gen = b->gen; in bch_prio_write()
655 p->next_bucket = ca->prio_buckets[i + 1]; in bch_prio_write()
656 p->magic = pset_magic(&ca->sb); in bch_prio_write()
657 p->csum = bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8); in bch_prio_write()
660 BUG_ON(bucket == -1); in bch_prio_write()
662 mutex_unlock(&ca->set->bucket_lock); in bch_prio_write()
664 mutex_lock(&ca->set->bucket_lock); in bch_prio_write()
666 ca->prio_buckets[i] = bucket; in bch_prio_write()
667 atomic_dec_bug(&ca->buckets[bucket].pin); in bch_prio_write()
670 mutex_unlock(&ca->set->bucket_lock); in bch_prio_write()
672 bch_journal_meta(ca->set, &cl); in bch_prio_write()
675 mutex_lock(&ca->set->bucket_lock); in bch_prio_write()
678 * Don't want the old priorities to get garbage collected until after we in bch_prio_write()
682 if (ca->prio_last_buckets[i]) in bch_prio_write()
684 &ca->buckets[ca->prio_last_buckets[i]]); in bch_prio_write()
686 ca->prio_last_buckets[i] = ca->prio_buckets[i]; in bch_prio_write()
693 struct prio_set *p = ca->disk_buckets; in prio_read()
694 struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; in prio_read()
697 int ret = -EIO; in prio_read()
699 for (b = ca->buckets; in prio_read()
700 b < ca->buckets + ca->sb.nbuckets; in prio_read()
703 ca->prio_buckets[bucket_nr] = bucket; in prio_read()
704 ca->prio_last_buckets[bucket_nr] = bucket; in prio_read()
709 if (p->csum != in prio_read()
710 bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) { in prio_read()
715 if (p->magic != pset_magic(&ca->sb)) { in prio_read()
720 bucket = p->next_bucket; in prio_read()
721 d = p->data; in prio_read()
724 b->prio = le16_to_cpu(d->prio); in prio_read()
725 b->gen = b->last_gc = d->gen; in prio_read()
737 struct bcache_device *d = disk->private_data; in open_dev()
739 if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) in open_dev()
740 return -ENXIO; in open_dev()
742 closure_get(&d->cl); in open_dev()
748 struct bcache_device *d = b->private_data; in release_dev()
750 closure_put(&d->cl); in release_dev()
756 struct bcache_device *d = b->bd_disk->private_data; in ioctl_dev()
758 return d->ioctl(d, mode, cmd, arg); in ioctl_dev()
779 if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags)) in bcache_device_stop()
781 * closure_fn set to in bcache_device_stop()
782 * - cached device: cached_dev_flush() in bcache_device_stop()
783 * - flash dev: flash_dev_flush() in bcache_device_stop()
785 closure_queue(&d->cl); in bcache_device_stop()
792 if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { in bcache_device_unlink()
793 struct cache *ca = d->c->cache; in bcache_device_unlink()
795 sysfs_remove_link(&d->c->kobj, d->name); in bcache_device_unlink()
796 sysfs_remove_link(&d->kobj, "cache"); in bcache_device_unlink()
798 bd_unlink_disk_holder(ca->bdev, d->disk); in bcache_device_unlink()
805 struct cache *ca = c->cache; in bcache_device_link()
808 bd_link_disk_holder(ca->bdev, d->disk); in bcache_device_link()
810 snprintf(d->name, BCACHEDEVNAME_SIZE, in bcache_device_link()
811 "%s%u", name, d->id); in bcache_device_link()
813 ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); in bcache_device_link()
815 pr_err("Couldn't create device -> cache set symlink\n"); in bcache_device_link()
817 ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); in bcache_device_link()
819 pr_err("Couldn't create cache set -> device symlink\n"); in bcache_device_link()
821 clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); in bcache_device_link()
828 atomic_dec(&d->c->attached_dev_nr); in bcache_device_detach()
830 if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { in bcache_device_detach()
831 struct uuid_entry *u = d->c->uuids + d->id; in bcache_device_detach()
834 memcpy(u->uuid, invalid_uuid, 16); in bcache_device_detach()
835 u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); in bcache_device_detach()
836 bch_uuid_write(d->c); in bcache_device_detach()
841 d->c->devices[d->id] = NULL; in bcache_device_detach()
842 closure_put(&d->c->caching); in bcache_device_detach()
843 d->c = NULL; in bcache_device_detach()
849 d->id = id; in bcache_device_attach()
850 d->c = c; in bcache_device_attach()
851 c->devices[id] = d; in bcache_device_attach()
853 if (id >= c->devices_max_used) in bcache_device_attach()
854 c->devices_max_used = id + 1; in bcache_device_attach()
856 closure_get(&c->caching); in bcache_device_attach()
871 struct gendisk *disk = d->disk; in bcache_device_free()
876 pr_info("%s stopped\n", disk->disk_name); in bcache_device_free()
880 if (d->c) in bcache_device_free()
885 first_minor_to_idx(disk->first_minor)); in bcache_device_free()
889 bioset_exit(&d->bio_split); in bcache_device_free()
890 kvfree(d->full_dirty_stripes); in bcache_device_free()
891 kvfree(d->stripe_sectors_dirty); in bcache_device_free()
893 closure_debug_destroy(&d->cl); in bcache_device_free()
906 if (!d->stripe_size) in bcache_device_init()
907 d->stripe_size = 1 << 31; in bcache_device_init()
909 n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); in bcache_device_init()
913 return -ENOMEM; in bcache_device_init()
915 d->nr_stripes = n; in bcache_device_init()
917 n = d->nr_stripes * sizeof(atomic_t); in bcache_device_init()
918 d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL); in bcache_device_init()
919 if (!d->stripe_sectors_dirty) in bcache_device_init()
920 return -ENOMEM; in bcache_device_init()
922 n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); in bcache_device_init()
923 d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL); in bcache_device_init()
924 if (!d->full_dirty_stripes) in bcache_device_init()
932 if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), in bcache_device_init()
936 d->disk = blk_alloc_disk(NUMA_NO_NODE); in bcache_device_init()
937 if (!d->disk) in bcache_device_init()
940 set_capacity(d->disk, sectors); in bcache_device_init()
941 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); in bcache_device_init()
943 d->disk->major = bcache_major; in bcache_device_init()
944 d->disk->first_minor = idx_to_first_minor(idx); in bcache_device_init()
945 d->disk->minors = BCACHE_MINORS; in bcache_device_init()
946 d->disk->fops = ops; in bcache_device_init()
947 d->disk->private_data = d; in bcache_device_init()
949 q = d->disk->queue; in bcache_device_init()
950 q->limits.max_hw_sectors = UINT_MAX; in bcache_device_init()
951 q->limits.max_sectors = UINT_MAX; in bcache_device_init()
952 q->limits.max_segment_size = UINT_MAX; in bcache_device_init()
953 q->limits.max_segments = BIO_MAX_VECS; in bcache_device_init()
955 q->limits.discard_granularity = 512; in bcache_device_init()
956 q->limits.io_min = block_size; in bcache_device_init()
957 q->limits.logical_block_size = block_size; in bcache_device_init()
958 q->limits.physical_block_size = block_size; in bcache_device_init()
960 if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { in bcache_device_init()
965 …pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logica… in bcache_device_init()
966 d->disk->disk_name, q->limits.logical_block_size, in bcache_device_init()
973 blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); in bcache_device_init()
980 bioset_exit(&d->bio_split); in bcache_device_init()
984 kvfree(d->full_dirty_stripes); in bcache_device_init()
986 kvfree(d->stripe_sectors_dirty); in bcache_device_init()
987 return -ENOMEM; in bcache_device_init()
996 struct cached_dev *dc; in calc_cached_dev_sectors() local
998 list_for_each_entry(dc, &c->cached_devs, list) in calc_cached_dev_sectors()
999 sectors += bdev_nr_sectors(dc->bdev); in calc_cached_dev_sectors()
1001 c->cached_dev_sectors = sectors; in calc_cached_dev_sectors()
1007 struct cached_dev *dc = arg; in cached_dev_status_update() local
1012 * dc->io_disable might be set via sysfs interface, so check it in cached_dev_status_update()
1015 while (!kthread_should_stop() && !dc->io_disable) { in cached_dev_status_update()
1016 q = bdev_get_queue(dc->bdev); in cached_dev_status_update()
1018 dc->offline_seconds++; in cached_dev_status_update()
1020 dc->offline_seconds = 0; in cached_dev_status_update()
1022 if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { in cached_dev_status_update()
1024 dc->bdev, in cached_dev_status_update()
1026 pr_err("%s: disable I/O request due to backing device offline\n", in cached_dev_status_update()
1027 dc->disk.name); in cached_dev_status_update()
1028 dc->io_disable = true; in cached_dev_status_update()
1031 bcache_device_stop(&dc->disk); in cached_dev_status_update()
1042 int bch_cached_dev_run(struct cached_dev *dc) in bch_cached_dev_run() argument
1045 struct bcache_device *d = &dc->disk; in bch_cached_dev_run()
1046 char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL); in bch_cached_dev_run()
1049 kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), in bch_cached_dev_run()
1054 if (dc->io_disable) { in bch_cached_dev_run()
1055 pr_err("I/O disabled on cached dev %pg\n", dc->bdev); in bch_cached_dev_run()
1056 ret = -EIO; in bch_cached_dev_run()
1060 if (atomic_xchg(&dc->running, 1)) { in bch_cached_dev_run()
1061 pr_info("cached dev %pg is running already\n", dc->bdev); in bch_cached_dev_run()
1062 ret = -EBUSY; in bch_cached_dev_run()
1066 if (!d->c && in bch_cached_dev_run()
1067 BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { in bch_cached_dev_run()
1072 SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); in bch_cached_dev_run()
1073 bch_write_bdev_super(dc, &cl); in bch_cached_dev_run()
1077 ret = add_disk(d->disk); in bch_cached_dev_run()
1080 bd_link_disk_holder(dc->bdev, dc->disk.disk); in bch_cached_dev_run()
1082 * won't show up in the uevent file, use udevadm monitor -e instead in bch_cached_dev_run()
1085 kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); in bch_cached_dev_run()
1087 if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || in bch_cached_dev_run()
1088 sysfs_create_link(&disk_to_dev(d->disk)->kobj, in bch_cached_dev_run()
1089 &d->kobj, "bcache")) { in bch_cached_dev_run()
1090 pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n"); in bch_cached_dev_run()
1091 ret = -ENOMEM; in bch_cached_dev_run()
1095 dc->status_update_thread = kthread_run(cached_dev_status_update, in bch_cached_dev_run()
1096 dc, "bcache_status_update"); in bch_cached_dev_run()
1097 if (IS_ERR(dc->status_update_thread)) { in bch_cached_dev_run()
1098 …pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing… in bch_cached_dev_run()
1110 * work dc->writeback_rate_update is running. Wait until the routine
1111 * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
1113 * seconds, give up waiting here and continue to cancel it too.
1115 static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) in cancel_writeback_rate_update_dwork() argument
1121 &dc->disk.flags)) in cancel_writeback_rate_update_dwork()
1123 time_out--; in cancel_writeback_rate_update_dwork()
1128 pr_warn("give up waiting for dc->writeback_write_update to quit\n"); in cancel_writeback_rate_update_dwork()
1130 cancel_delayed_work_sync(&dc->writeback_rate_update); in cancel_writeback_rate_update_dwork()
1135 struct cached_dev *dc = container_of(w, struct cached_dev, detach); in cached_dev_detach_finish() local
1136 struct cache_set *c = dc->disk.c; in cached_dev_detach_finish()
1138 BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); in cached_dev_detach_finish()
1139 BUG_ON(refcount_read(&dc->count)); in cached_dev_detach_finish()
1142 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) in cached_dev_detach_finish()
1143 cancel_writeback_rate_update_dwork(dc); in cached_dev_detach_finish()
1145 if (!IS_ERR_OR_NULL(dc->writeback_thread)) { in cached_dev_detach_finish()
1146 kthread_stop(dc->writeback_thread); in cached_dev_detach_finish()
1147 dc->writeback_thread = NULL; in cached_dev_detach_finish()
1152 bcache_device_detach(&dc->disk); in cached_dev_detach_finish()
1153 list_move(&dc->list, &uncached_devices); in cached_dev_detach_finish()
1156 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); in cached_dev_detach_finish()
1157 clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); in cached_dev_detach_finish()
1161 pr_info("Caching disabled for %pg\n", dc->bdev); in cached_dev_detach_finish()
1164 closure_put(&dc->disk.cl); in cached_dev_detach_finish()
1167 void bch_cached_dev_detach(struct cached_dev *dc) in bch_cached_dev_detach() argument
1171 if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) in bch_cached_dev_detach()
1174 if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) in bch_cached_dev_detach()
1181 closure_get(&dc->disk.cl); in bch_cached_dev_detach()
1183 bch_writeback_queue(dc); in bch_cached_dev_detach()
1185 cached_dev_put(dc); in bch_cached_dev_detach()
1188 int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, in bch_cached_dev_attach() argument
1196 if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) || in bch_cached_dev_attach()
1197 (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16))) in bch_cached_dev_attach()
1198 return -ENOENT; in bch_cached_dev_attach()
1200 if (dc->disk.c) { in bch_cached_dev_attach()
1201 pr_err("Can't attach %pg: already attached\n", dc->bdev); in bch_cached_dev_attach()
1202 return -EINVAL; in bch_cached_dev_attach()
1205 if (test_bit(CACHE_SET_STOPPING, &c->flags)) { in bch_cached_dev_attach()
1206 pr_err("Can't attach %pg: shutting down\n", dc->bdev); in bch_cached_dev_attach()
1207 return -EINVAL; in bch_cached_dev_attach()
1210 if (dc->sb.block_size < c->cache->sb.block_size) { in bch_cached_dev_attach()
1213 dc->bdev); in bch_cached_dev_attach()
1214 return -EINVAL; in bch_cached_dev_attach()
1218 list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { in bch_cached_dev_attach()
1219 if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { in bch_cached_dev_attach()
1220 pr_err("Tried to attach %pg but duplicate UUID already attached\n", in bch_cached_dev_attach()
1221 dc->bdev); in bch_cached_dev_attach()
1223 return -EINVAL; in bch_cached_dev_attach()
1227 u = uuid_find(c, dc->sb.uuid); in bch_cached_dev_attach()
1230 (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || in bch_cached_dev_attach()
1231 BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { in bch_cached_dev_attach()
1232 memcpy(u->uuid, invalid_uuid, 16); in bch_cached_dev_attach()
1233 u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); in bch_cached_dev_attach()
1238 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { in bch_cached_dev_attach()
1239 pr_err("Couldn't find uuid for %pg in set\n", dc->bdev); in bch_cached_dev_attach()
1240 return -ENOENT; in bch_cached_dev_attach()
1245 pr_err("Not caching %pg, no room for UUID\n", dc->bdev); in bch_cached_dev_attach()
1246 return -EINVAL; in bch_cached_dev_attach()
1252 * sysfs_remove_file(&dc->kobj, &sysfs_attach); in bch_cached_dev_attach()
1255 if (bch_is_zero(u->uuid, 16)) { in bch_cached_dev_attach()
1260 memcpy(u->uuid, dc->sb.uuid, 16); in bch_cached_dev_attach()
1261 memcpy(u->label, dc->sb.label, SB_LABEL_SIZE); in bch_cached_dev_attach()
1262 u->first_reg = u->last_reg = rtime; in bch_cached_dev_attach()
1265 memcpy(dc->sb.set_uuid, c->set_uuid, 16); in bch_cached_dev_attach()
1266 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); in bch_cached_dev_attach()
1268 bch_write_bdev_super(dc, &cl); in bch_cached_dev_attach()
1271 u->last_reg = rtime; in bch_cached_dev_attach()
1275 bcache_device_attach(&dc->disk, c, u - c->uuids); in bch_cached_dev_attach()
1276 list_move(&dc->list, &c->cached_devs); in bch_cached_dev_attach()
1280 * dc->c must be set before dc->count != 0 - paired with the mb in in bch_cached_dev_attach()
1284 refcount_set(&dc->count, 1); in bch_cached_dev_attach()
1287 down_write(&dc->writeback_lock); in bch_cached_dev_attach()
1288 if (bch_cached_dev_writeback_start(dc)) { in bch_cached_dev_attach()
1289 up_write(&dc->writeback_lock); in bch_cached_dev_attach()
1291 dc->disk.disk->disk_name); in bch_cached_dev_attach()
1292 return -ENOMEM; in bch_cached_dev_attach()
1295 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { in bch_cached_dev_attach()
1296 atomic_set(&dc->has_dirty, 1); in bch_cached_dev_attach()
1297 bch_writeback_queue(dc); in bch_cached_dev_attach()
1300 bch_sectors_dirty_init(&dc->disk); in bch_cached_dev_attach()
1302 ret = bch_cached_dev_run(dc); in bch_cached_dev_attach()
1303 if (ret && (ret != -EBUSY)) { in bch_cached_dev_attach()
1304 up_write(&dc->writeback_lock); in bch_cached_dev_attach()
1307 * able to be directly called. The kthread and kworker in bch_cached_dev_attach()
1309 * have to be stopped manually here. in bch_cached_dev_attach()
1311 kthread_stop(dc->writeback_thread); in bch_cached_dev_attach()
1312 cancel_writeback_rate_update_dwork(dc); in bch_cached_dev_attach()
1313 pr_err("Couldn't run cached device %pg\n", dc->bdev); in bch_cached_dev_attach()
1317 bcache_device_link(&dc->disk, c, "bdev"); in bch_cached_dev_attach()
1318 atomic_inc(&c->attached_dev_nr); in bch_cached_dev_attach()
1320 if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { in bch_cached_dev_attach()
1321 …pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); in bch_cached_dev_attach()
1322 pr_err("Please update to the latest bcache-tools to create the cache device\n"); in bch_cached_dev_attach()
1323 set_disk_ro(dc->disk.disk, 1); in bch_cached_dev_attach()
1326 /* Allow the writeback thread to proceed */ in bch_cached_dev_attach()
1327 up_write(&dc->writeback_lock); in bch_cached_dev_attach()
1330 dc->bdev, in bch_cached_dev_attach()
1331 dc->disk.disk->disk_name, in bch_cached_dev_attach()
1332 dc->disk.c->set_uuid); in bch_cached_dev_attach()
1336 /* when dc->disk.kobj released */
1339 struct cached_dev *dc = container_of(kobj, struct cached_dev, in bch_cached_dev_release() local
1341 kfree(dc); in bch_cached_dev_release()
1347 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); in cached_dev_free() local
1349 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) in cached_dev_free()
1350 cancel_writeback_rate_update_dwork(dc); in cached_dev_free()
1352 if (!IS_ERR_OR_NULL(dc->writeback_thread)) in cached_dev_free()
1353 kthread_stop(dc->writeback_thread); in cached_dev_free()
1354 if (!IS_ERR_OR_NULL(dc->status_update_thread)) in cached_dev_free()
1355 kthread_stop(dc->status_update_thread); in cached_dev_free()
1359 if (atomic_read(&dc->running)) { in cached_dev_free()
1360 bd_unlink_disk_holder(dc->bdev, dc->disk.disk); in cached_dev_free()
1361 del_gendisk(dc->disk.disk); in cached_dev_free()
1363 bcache_device_free(&dc->disk); in cached_dev_free()
1364 list_del(&dc->list); in cached_dev_free()
1368 if (dc->sb_disk) in cached_dev_free()
1369 put_page(virt_to_page(dc->sb_disk)); in cached_dev_free()
1371 if (!IS_ERR_OR_NULL(dc->bdev)) in cached_dev_free()
1372 blkdev_put(dc->bdev, dc); in cached_dev_free()
1376 kobject_put(&dc->disk.kobj); in cached_dev_free()
1381 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); in cached_dev_flush() local
1382 struct bcache_device *d = &dc->disk; in cached_dev_flush()
1388 bch_cache_accounting_destroy(&dc->accounting); in cached_dev_flush()
1389 kobject_del(&d->kobj); in cached_dev_flush()
1394 static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) in cached_dev_init() argument
1398 struct request_queue *q = bdev_get_queue(dc->bdev); in cached_dev_init()
1401 INIT_LIST_HEAD(&dc->list); in cached_dev_init()
1402 closure_init(&dc->disk.cl, NULL); in cached_dev_init()
1403 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); in cached_dev_init()
1404 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); in cached_dev_init()
1405 INIT_WORK(&dc->detach, cached_dev_detach_finish); in cached_dev_init()
1406 sema_init(&dc->sb_write_mutex, 1); in cached_dev_init()
1407 INIT_LIST_HEAD(&dc->io_lru); in cached_dev_init()
1408 spin_lock_init(&dc->io_lock); in cached_dev_init()
1409 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); in cached_dev_init()
1411 dc->sequential_cutoff = 4 << 20; in cached_dev_init()
1413 for (io = dc->io; io < dc->io + RECENT_IO; io++) { in cached_dev_init()
1414 list_add(&io->lru, &dc->io_lru); in cached_dev_init()
1415 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); in cached_dev_init()
1418 dc->disk.stripe_size = q->limits.io_opt >> 9; in cached_dev_init()
1420 if (dc->disk.stripe_size) in cached_dev_init()
1421 dc->partial_stripes_expensive = in cached_dev_init()
1422 q->limits.raid_partial_stripes_expensive; in cached_dev_init()
1424 ret = bcache_device_init(&dc->disk, block_size, in cached_dev_init()
1425 bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, in cached_dev_init()
1426 dc->bdev, &bcache_cached_ops); in cached_dev_init()
1430 blk_queue_io_opt(dc->disk.disk->queue, in cached_dev_init()
1431 max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); in cached_dev_init()
1433 atomic_set(&dc->io_errors, 0); in cached_dev_init()
1434 dc->io_disable = false; in cached_dev_init()
1435 dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; in cached_dev_init()
1436 /* default to auto */ in cached_dev_init()
1437 dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO; in cached_dev_init()
1439 bch_cached_dev_request_init(dc); in cached_dev_init()
1440 bch_cached_dev_writeback_init(dc); in cached_dev_init()
1444 /* Cached device - bcache superblock */
1448 struct cached_dev *dc) in register_bdev() argument
1452 int ret = -ENOMEM; in register_bdev()
1454 memcpy(&dc->sb, sb, sizeof(struct cache_sb)); in register_bdev()
1455 dc->bdev = bdev; in register_bdev()
1456 dc->sb_disk = sb_disk; in register_bdev()
1458 if (cached_dev_init(dc, sb->block_size << 9)) in register_bdev()
1462 if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache")) in register_bdev()
1464 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) in register_bdev()
1467 pr_info("registered backing device %pg\n", dc->bdev); in register_bdev()
1469 list_add(&dc->list, &uncached_devices); in register_bdev()
1470 /* attach to a matched cache set if it exists */ in register_bdev()
1472 bch_cached_dev_attach(dc, c, NULL); in register_bdev()
1474 if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || in register_bdev()
1475 BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { in register_bdev()
1476 err = "failed to run cached device"; in register_bdev()
1477 ret = bch_cached_dev_run(dc); in register_bdev()
1484 pr_notice("error %pg: %s\n", dc->bdev, err); in register_bdev()
1485 bcache_device_stop(&dc->disk); in register_bdev()
1491 /* When d->kobj released */
1505 &d->c->flash_dev_dirty_sectors); in flash_dev_free()
1506 del_gendisk(d->disk); in flash_dev_free()
1509 kobject_put(&d->kobj); in flash_dev_free()
1519 kobject_del(&d->kobj); in flash_dev_flush()
1525 int err = -ENOMEM; in flash_dev_run()
1531 closure_init(&d->cl, NULL); in flash_dev_run()
1532 set_closure_fn(&d->cl, flash_dev_flush, system_wq); in flash_dev_run()
1534 kobject_init(&d->kobj, &bch_flash_dev_ktype); in flash_dev_run()
1536 if (bcache_device_init(d, block_bytes(c->cache), u->sectors, in flash_dev_run()
1540 bcache_device_attach(d, c, u - c->uuids); in flash_dev_run()
1543 err = add_disk(d->disk); in flash_dev_run()
1547 err = kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"); in flash_dev_run()
1553 if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { in flash_dev_run()
1554 …pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); in flash_dev_run()
1555 pr_err("Please update to the latest bcache-tools to create the cache device\n"); in flash_dev_run()
1556 set_disk_ro(d->disk, 1); in flash_dev_run()
1561 kobject_put(&d->kobj); in flash_dev_run()
1571 for (u = c->uuids; in flash_devs_run()
1572 u < c->uuids + c->nr_uuids && !ret; in flash_devs_run()
1584 if (test_bit(CACHE_SET_STOPPING, &c->flags)) in bch_flash_dev_create()
1585 return -EINTR; in bch_flash_dev_create()
1587 if (!test_bit(CACHE_SET_RUNNING, &c->flags)) in bch_flash_dev_create()
1588 return -EPERM; in bch_flash_dev_create()
1593 return -EINVAL; in bch_flash_dev_create()
1596 get_random_bytes(u->uuid, 16); in bch_flash_dev_create()
1597 memset(u->label, 0, 32); in bch_flash_dev_create()
1598 u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds()); in bch_flash_dev_create()
1601 u->sectors = size >> 9; in bch_flash_dev_create()
1608 bool bch_cached_dev_error(struct cached_dev *dc) in bch_cached_dev_error() argument
1610 if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) in bch_cached_dev_error()
1613 dc->io_disable = true; in bch_cached_dev_error()
1618 dc->disk.disk->disk_name, dc->bdev); in bch_cached_dev_error()
1620 bcache_device_stop(&dc->disk); in bch_cached_dev_error()
1632 if (c->on_error != ON_ERROR_PANIC && in bch_cache_set_error()
1633 test_bit(CACHE_SET_STOPPING, &c->flags)) in bch_cache_set_error()
1636 if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) in bch_cache_set_error()
1650 c->set_uuid, &vaf); in bch_cache_set_error()
1654 if (c->on_error == ON_ERROR_PANIC) in bch_cache_set_error()
1661 /* When c->kobj released */
1675 debugfs_remove(c->debug); in cache_set_free()
1682 bch_bset_sort_state_free(&c->sort); in cache_set_free()
1683 free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb))); in cache_set_free()
1685 ca = c->cache; in cache_set_free()
1687 ca->set = NULL; in cache_set_free()
1688 c->cache = NULL; in cache_set_free()
1689 kobject_put(&ca->kobj); in cache_set_free()
1693 if (c->moving_gc_wq) in cache_set_free()
1694 destroy_workqueue(c->moving_gc_wq); in cache_set_free()
1695 bioset_exit(&c->bio_split); in cache_set_free()
1696 mempool_exit(&c->fill_iter); in cache_set_free()
1697 mempool_exit(&c->bio_meta); in cache_set_free()
1698 mempool_exit(&c->search); in cache_set_free()
1699 kfree(c->devices); in cache_set_free()
1701 list_del(&c->list); in cache_set_free()
1704 pr_info("Cache set %pU unregistered\n", c->set_uuid); in cache_set_free()
1707 closure_debug_destroy(&c->cl); in cache_set_free()
1708 kobject_put(&c->kobj); in cache_set_free()
1714 struct cache *ca = c->cache; in cache_set_flush()
1717 bch_cache_accounting_destroy(&c->accounting); in cache_set_flush()
1719 kobject_put(&c->internal); in cache_set_flush()
1720 kobject_del(&c->kobj); in cache_set_flush()
1722 if (!IS_ERR_OR_NULL(c->gc_thread)) in cache_set_flush()
1723 kthread_stop(c->gc_thread); in cache_set_flush()
1725 if (!IS_ERR(c->root)) in cache_set_flush()
1726 list_add(&c->root->list, &c->btree_cache); in cache_set_flush()
1730 * due to too many I/O errors detected. in cache_set_flush()
1732 if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) in cache_set_flush()
1733 list_for_each_entry(b, &c->btree_cache, list) { in cache_set_flush()
1734 mutex_lock(&b->write_lock); in cache_set_flush()
1737 mutex_unlock(&b->write_lock); in cache_set_flush()
1740 if (ca->alloc_thread) in cache_set_flush()
1741 kthread_stop(ca->alloc_thread); in cache_set_flush()
1743 if (c->journal.cur) { in cache_set_flush()
1744 cancel_delayed_work_sync(&c->journal.work); in cache_set_flush()
1746 c->journal.work.work.func(&c->journal.work.work); in cache_set_flush()
1754 * cache set is unregistering due to too many I/O errors. In this condition,
1758 * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
1764 * The expected behavior is, if stop_when_cache_set_failed is configured to
1770 struct cached_dev *dc) in conditional_stop_bcache_device() argument
1772 if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { in conditional_stop_bcache_device()
1774 d->disk->disk_name, c->set_uuid); in conditional_stop_bcache_device()
1776 } else if (atomic_read(&dc->has_dirty)) { in conditional_stop_bcache_device()
1778 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO in conditional_stop_bcache_device()
1779 * and dc->has_dirty == 1 in conditional_stop_bcache_device()
1781 …pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potenti… in conditional_stop_bcache_device()
1782 d->disk->disk_name); in conditional_stop_bcache_device()
1787 * backing device as no cache set attached to. This in conditional_stop_bcache_device()
1791 * to a broken cache device, dc->io_disable should be in conditional_stop_bcache_device()
1792 * explicitly set to true. in conditional_stop_bcache_device()
1794 dc->io_disable = true; in conditional_stop_bcache_device()
1800 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO in conditional_stop_bcache_device()
1801 * and dc->has_dirty == 0 in conditional_stop_bcache_device()
1804 d->disk->disk_name); in conditional_stop_bcache_device()
1811 struct cached_dev *dc; in __cache_set_unregister() local
1817 for (i = 0; i < c->devices_max_used; i++) { in __cache_set_unregister()
1818 d = c->devices[i]; in __cache_set_unregister()
1822 if (!UUID_FLASH_ONLY(&c->uuids[i]) && in __cache_set_unregister()
1823 test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { in __cache_set_unregister()
1824 dc = container_of(d, struct cached_dev, disk); in __cache_set_unregister()
1825 bch_cached_dev_detach(dc); in __cache_set_unregister()
1826 if (test_bit(CACHE_SET_IO_DISABLE, &c->flags)) in __cache_set_unregister()
1827 conditional_stop_bcache_device(c, d, dc); in __cache_set_unregister()
1840 if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) in bch_cache_set_stop()
1841 /* closure_fn set to __cache_set_unregister() */ in bch_cache_set_stop()
1842 closure_queue(&c->caching); in bch_cache_set_stop()
1847 set_bit(CACHE_SET_UNREGISTERING, &c->flags); in bch_cache_set_unregister()
1864 closure_init(&c->cl, NULL); in bch_cache_set_alloc()
1865 set_closure_fn(&c->cl, cache_set_free, system_wq); in bch_cache_set_alloc()
1867 closure_init(&c->caching, &c->cl); in bch_cache_set_alloc()
1868 set_closure_fn(&c->caching, __cache_set_unregister, system_wq); in bch_cache_set_alloc()
1871 closure_set_stopped(&c->cl); in bch_cache_set_alloc()
1872 closure_put(&c->cl); in bch_cache_set_alloc()
1874 kobject_init(&c->kobj, &bch_cache_set_ktype); in bch_cache_set_alloc()
1875 kobject_init(&c->internal, &bch_cache_set_internal_ktype); in bch_cache_set_alloc()
1877 bch_cache_accounting_init(&c->accounting, &c->cl); in bch_cache_set_alloc()
1879 memcpy(c->set_uuid, sb->set_uuid, 16); in bch_cache_set_alloc()
1881 c->cache = ca; in bch_cache_set_alloc()
1882 c->cache->set = c; in bch_cache_set_alloc()
1883 c->bucket_bits = ilog2(sb->bucket_size); in bch_cache_set_alloc()
1884 c->block_bits = ilog2(sb->block_size); in bch_cache_set_alloc()
1885 c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry); in bch_cache_set_alloc()
1886 c->devices_max_used = 0; in bch_cache_set_alloc()
1887 atomic_set(&c->attached_dev_nr, 0); in bch_cache_set_alloc()
1888 c->btree_pages = meta_bucket_pages(sb); in bch_cache_set_alloc()
1889 if (c->btree_pages > BTREE_MAX_PAGES) in bch_cache_set_alloc()
1890 c->btree_pages = max_t(int, c->btree_pages / 4, in bch_cache_set_alloc()
1893 sema_init(&c->sb_write_mutex, 1); in bch_cache_set_alloc()
1894 mutex_init(&c->bucket_lock); in bch_cache_set_alloc()
1895 init_waitqueue_head(&c->btree_cache_wait); in bch_cache_set_alloc()
1896 spin_lock_init(&c->btree_cannibalize_lock); in bch_cache_set_alloc()
1897 init_waitqueue_head(&c->bucket_wait); in bch_cache_set_alloc()
1898 init_waitqueue_head(&c->gc_wait); in bch_cache_set_alloc()
1899 sema_init(&c->uuid_write_mutex, 1); in bch_cache_set_alloc()
1901 spin_lock_init(&c->btree_gc_time.lock); in bch_cache_set_alloc()
1902 spin_lock_init(&c->btree_split_time.lock); in bch_cache_set_alloc()
1903 spin_lock_init(&c->btree_read_time.lock); in bch_cache_set_alloc()
1907 INIT_LIST_HEAD(&c->list); in bch_cache_set_alloc()
1908 INIT_LIST_HEAD(&c->cached_devs); in bch_cache_set_alloc()
1909 INIT_LIST_HEAD(&c->btree_cache); in bch_cache_set_alloc()
1910 INIT_LIST_HEAD(&c->btree_cache_freeable); in bch_cache_set_alloc()
1911 INIT_LIST_HEAD(&c->btree_cache_freed); in bch_cache_set_alloc()
1912 INIT_LIST_HEAD(&c->data_buckets); in bch_cache_set_alloc()
1914 iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) * in bch_cache_set_alloc()
1917 c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); in bch_cache_set_alloc()
1918 if (!c->devices) in bch_cache_set_alloc()
1921 if (mempool_init_slab_pool(&c->search, 32, bch_search_cache)) in bch_cache_set_alloc()
1924 if (mempool_init_kmalloc_pool(&c->bio_meta, 2, in bch_cache_set_alloc()
1929 if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size)) in bch_cache_set_alloc()
1932 if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), in bch_cache_set_alloc()
1936 c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb); in bch_cache_set_alloc()
1937 if (!c->uuids) in bch_cache_set_alloc()
1940 c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0); in bch_cache_set_alloc()
1941 if (!c->moving_gc_wq) in bch_cache_set_alloc()
1953 if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) in bch_cache_set_alloc()
1956 c->congested_read_threshold_us = 2000; in bch_cache_set_alloc()
1957 c->congested_write_threshold_us = 20000; in bch_cache_set_alloc()
1958 c->error_limit = DEFAULT_IO_ERROR_LIMIT; in bch_cache_set_alloc()
1959 c->idle_max_writeback_rate_enabled = 1; in bch_cache_set_alloc()
1960 WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)); in bch_cache_set_alloc()
1971 struct cached_dev *dc, *t; in run_cache_set() local
1972 struct cache *ca = c->cache; in run_cache_set()
1979 c->nbuckets = ca->sb.nbuckets; in run_cache_set()
1982 if (CACHE_SYNC(&c->cache->sb)) { in run_cache_set()
1996 j = &list_entry(journal.prev, struct journal_replay, list)->j; in run_cache_set()
1999 if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) in run_cache_set()
2008 k = &j->btree_root; in run_cache_set()
2015 c->root = bch_btree_node_get(c, NULL, k, in run_cache_set()
2016 j->btree_level, in run_cache_set()
2018 if (IS_ERR_OR_NULL(c->root)) in run_cache_set()
2021 list_del_init(&c->root->list); in run_cache_set()
2022 rw_unlock(true, c->root); in run_cache_set()
2039 * gc_gen - this is a hack but oh well. in run_cache_set()
2041 bch_journal_next(&c->journal); in run_cache_set()
2048 * First place it's safe to allocate: btree_check() and in run_cache_set()
2049 * btree_gc_finish() have to run before we have buckets to in run_cache_set()
2051 * entry to be written so bcache_journal_next() has to be called in run_cache_set()
2054 * If the uuids were in the old format we have to rewrite them in run_cache_set()
2057 if (j->version < BCACHE_JSET_VERSION_UUID) in run_cache_set()
2067 ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, in run_cache_set()
2070 for (j = 0; j < ca->sb.keys; j++) in run_cache_set()
2071 ca->sb.d[j] = ca->sb.first_bucket + j; in run_cache_set()
2079 mutex_lock(&c->bucket_lock); in run_cache_set()
2081 mutex_unlock(&c->bucket_lock); in run_cache_set()
2088 c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); in run_cache_set()
2089 if (IS_ERR(c->root)) in run_cache_set()
2092 mutex_lock(&c->root->write_lock); in run_cache_set()
2093 bkey_copy_key(&c->root->key, &MAX_KEY); in run_cache_set()
2094 bch_btree_node_write(c->root, &cl); in run_cache_set()
2095 mutex_unlock(&c->root->write_lock); in run_cache_set()
2097 bch_btree_set_root(c->root); in run_cache_set()
2098 rw_unlock(true, c->root); in run_cache_set()
2101 * We don't want to write the first journal entry until in run_cache_set()
2102 * everything is set up - fortunately journal entries won't be in run_cache_set()
2105 SET_CACHE_SYNC(&c->cache->sb, true); in run_cache_set()
2107 bch_journal_next(&c->journal); in run_cache_set()
2116 c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); in run_cache_set()
2119 if (bch_has_feature_obso_large_bucket(&c->cache->sb)) in run_cache_set()
2120 pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); in run_cache_set()
2122 list_for_each_entry_safe(dc, t, &uncached_devices, list) in run_cache_set()
2123 bch_cached_dev_attach(dc, c, NULL); in run_cache_set()
2127 bch_journal_space_reserve(&c->journal); in run_cache_set()
2128 set_bit(CACHE_SET_RUNNING, &c->flags); in run_cache_set()
2133 list_del(&l->list); in run_cache_set()
2141 return -EIO; in run_cache_set()
2151 if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) { in register_cache_set()
2152 if (c->cache) in register_cache_set()
2158 c = bch_cache_set_alloc(&ca->sb); in register_cache_set()
2163 if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) || in register_cache_set()
2164 kobject_add(&c->internal, &c->kobj, "internal")) in register_cache_set()
2167 if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) in register_cache_set()
2172 list_add(&c->list, &bch_cache_sets); in register_cache_set()
2174 sprintf(buf, "cache%i", ca->sb.nr_this_dev); in register_cache_set()
2175 if (sysfs_create_link(&ca->kobj, &c->kobj, "set") || in register_cache_set()
2176 sysfs_create_link(&c->kobj, &ca->kobj, buf)) in register_cache_set()
2179 kobject_get(&ca->kobj); in register_cache_set()
2180 ca->set = c; in register_cache_set()
2181 ca->set->cache = ca; in register_cache_set()
2183 err = "failed to run cache set"; in register_cache_set()
2195 /* When ca->kobj released */
2201 if (ca->set) { in bch_cache_release()
2202 BUG_ON(ca->set->cache != ca); in bch_cache_release()
2203 ca->set->cache = NULL; in bch_cache_release()
2206 free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb))); in bch_cache_release()
2207 kfree(ca->prio_buckets); in bch_cache_release()
2208 vfree(ca->buckets); in bch_cache_release()
2210 free_heap(&ca->heap); in bch_cache_release()
2211 free_fifo(&ca->free_inc); in bch_cache_release()
2214 free_fifo(&ca->free[i]); in bch_cache_release()
2216 if (ca->sb_disk) in bch_cache_release()
2217 put_page(virt_to_page(ca->sb_disk)); in bch_cache_release()
2219 if (!IS_ERR_OR_NULL(ca->bdev)) in bch_cache_release()
2220 blkdev_put(ca->bdev, ca); in bch_cache_release()
2231 int ret = -ENOMEM; in cache_alloc()
2235 kobject_init(&ca->kobj, &bch_cache_ktype); in cache_alloc()
2237 bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0); in cache_alloc()
2240 * when ca->sb.njournal_buckets is not zero, journal exists, in cache_alloc()
2244 * and all the keys need to replay, in cache_alloc()
2248 btree_buckets = ca->sb.njournal_buckets ?: 8; in cache_alloc()
2249 free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; in cache_alloc()
2251 ret = -EPERM; in cache_alloc()
2252 err = "ca->sb.nbuckets is too small"; in cache_alloc()
2256 if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, in cache_alloc()
2258 err = "ca->free[RESERVE_BTREE] alloc failed"; in cache_alloc()
2262 if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), in cache_alloc()
2264 err = "ca->free[RESERVE_PRIO] alloc failed"; in cache_alloc()
2268 if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) { in cache_alloc()
2269 err = "ca->free[RESERVE_MOVINGGC] alloc failed"; in cache_alloc()
2273 if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) { in cache_alloc()
2274 err = "ca->free[RESERVE_NONE] alloc failed"; in cache_alloc()
2278 if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) { in cache_alloc()
2279 err = "ca->free_inc alloc failed"; in cache_alloc()
2283 if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) { in cache_alloc()
2284 err = "ca->heap alloc failed"; in cache_alloc()
2288 ca->buckets = vzalloc(array_size(sizeof(struct bucket), in cache_alloc()
2289 ca->sb.nbuckets)); in cache_alloc()
2290 if (!ca->buckets) { in cache_alloc()
2291 err = "ca->buckets alloc failed"; in cache_alloc()
2295 ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), in cache_alloc()
2298 if (!ca->prio_buckets) { in cache_alloc()
2299 err = "ca->prio_buckets alloc failed"; in cache_alloc()
2303 ca->disk_buckets = alloc_meta_bucket_pages(GFP_KERNEL, &ca->sb); in cache_alloc()
2304 if (!ca->disk_buckets) { in cache_alloc()
2305 err = "ca->disk_buckets alloc failed"; in cache_alloc()
2309 ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); in cache_alloc()
2312 atomic_set(&b->pin, 0); in cache_alloc()
2316 kfree(ca->prio_buckets); in cache_alloc()
2318 vfree(ca->buckets); in cache_alloc()
2320 free_heap(&ca->heap); in cache_alloc()
2322 free_fifo(&ca->free_inc); in cache_alloc()
2324 free_fifo(&ca->free[RESERVE_NONE]); in cache_alloc()
2326 free_fifo(&ca->free[RESERVE_MOVINGGC]); in cache_alloc()
2328 free_fifo(&ca->free[RESERVE_PRIO]); in cache_alloc()
2330 free_fifo(&ca->free[RESERVE_BTREE]); in cache_alloc()
2335 pr_notice("error %pg: %s\n", ca->bdev, err); in cache_alloc()
2345 memcpy(&ca->sb, sb, sizeof(struct cache_sb)); in register_cache()
2346 ca->bdev = bdev; in register_cache()
2347 ca->sb_disk = sb_disk; in register_cache()
2350 ca->discard = CACHE_DISCARD(&ca->sb); in register_cache()
2355 * If we failed here, it means ca->kobj is not initialized yet, in register_cache()
2356 * kobject_put() won't be called and there is no chance to in register_cache()
2357 * call blkdev_put() to bdev in bch_cache_release(). So we in register_cache()
2361 if (ret == -ENOMEM) in register_cache()
2362 err = "cache_alloc(): -ENOMEM"; in register_cache()
2363 else if (ret == -EPERM) in register_cache()
2370 if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) { in register_cache()
2372 ret = -ENOMEM; in register_cache()
2381 ret = -ENODEV; in register_cache()
2385 pr_info("registered cache device %pg\n", ca->bdev); in register_cache()
2388 kobject_put(&ca->kobj); in register_cache()
2392 pr_notice("error %pg: %s\n", ca->bdev, err); in register_cache()
2412 struct cached_dev *dc, *t; in bch_is_open_backing() local
2415 list_for_each_entry_safe(dc, t, &c->cached_devs, list) in bch_is_open_backing()
2416 if (dc->bdev->bd_dev == dev) in bch_is_open_backing()
2418 list_for_each_entry_safe(dc, t, &uncached_devices, list) in bch_is_open_backing()
2419 if (dc->bdev->bd_dev == dev) in bch_is_open_backing()
2429 struct cache *ca = c->cache; in bch_is_open_cache()
2431 if (ca->bdev->bd_dev == dev) in bch_is_open_cache()
2459 if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) in register_bdev_worker()
2465 pr_info("error %s: fail to register backing device\n", in register_bdev_worker()
2466 args->path); in register_bdev_worker()
2467 kfree(args->sb); in register_bdev_worker()
2468 kfree(args->path); in register_bdev_worker()
2480 if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) in register_cache_worker()
2484 pr_info("error %s: fail to register cache device\n", in register_cache_worker()
2485 args->path); in register_cache_worker()
2486 kfree(args->sb); in register_cache_worker()
2487 kfree(args->path); in register_cache_worker()
2494 if (SB_IS_BDEV(args->sb)) in register_device_async()
2495 INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker); in register_device_async()
2497 INIT_DELAYED_WORK(&args->reg_work, register_cache_worker); in register_device_async()
2500 queue_delayed_work(system_wq, &args->reg_work, 10); in register_device_async()
2527 ret = -EBUSY; in register_bcache()
2528 err = "failed to reference bcache module"; in register_bcache()
2538 ret = -ENOMEM; in register_bcache()
2548 ret = -EINVAL; in register_bcache()
2549 err = "failed to open device"; in register_bcache()
2554 err = "failed to set blocksize"; in register_bcache()
2564 ret = -ENOMEM; in register_bcache()
2570 bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, in register_bcache()
2577 if (ret == -EBUSY) { in register_bcache()
2595 err = "failed to register device"; in register_bcache()
2603 ret = -ENOMEM; in register_bcache()
2608 args->path = path; in register_bcache()
2609 args->sb = sb; in register_bcache()
2610 args->sb_disk = sb_disk; in register_bcache()
2611 args->bdev = bdev; in register_bcache()
2612 args->holder = holder; in register_bcache()
2614 /* No wait and returns to user space */ in register_bcache()
2661 struct cached_dev *dc; member
2671 struct cached_dev *dc, *tdc; in bch_pending_bdevs_cleanup() local
2676 list_for_each_entry_safe(dc, tdc, &uncached_devices, list) { in bch_pending_bdevs_cleanup()
2680 pdev->dc = dc; in bch_pending_bdevs_cleanup()
2681 list_add(&pdev->list, &pending_devs); in bch_pending_bdevs_cleanup()
2685 char *pdev_set_uuid = pdev->dc->sb.set_uuid; in bch_pending_bdevs_cleanup()
2687 char *set_uuid = c->set_uuid; in bch_pending_bdevs_cleanup()
2690 list_del(&pdev->list); in bch_pending_bdevs_cleanup()
2700 list_del(&pdev->list); in bch_pending_bdevs_cleanup()
2701 bcache_device_stop(&pdev->dc->disk); in bch_pending_bdevs_cleanup()
2721 struct cached_dev *dc, *tdc; in bcache_reboot() local
2732 * core know bcache_is_reboot set to true earlier in bcache_reboot()
2745 * The reason bch_register_lock is not held to call in bcache_reboot()
2746 * bch_cache_set_stop() and bcache_device_stop() is to in bcache_reboot()
2751 * We are safe here because bcache_is_reboot sets to in bcache_reboot()
2754 * bcache_reboot() won't be re-entered on by other thread, in bcache_reboot()
2761 list_for_each_entry_safe(dc, tdc, &uncached_devices, list) in bcache_reboot()
2762 bcache_device_stop(&dc->disk); in bcache_reboot()
2767 * kworkers to stop themselves in bcache_reboot()
2773 long timeout = start + 10 * HZ - jiffies; in bcache_reboot()
2794 pr_notice("Timeout waiting for devices to be closed\n"); in bcache_reboot()
2833 pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n", in check_module_parameters()
2841 pr_warn("set bch_cutoff_writeback (%u) to max value %u\n", in check_module_parameters()
2847 pr_warn("set bch_cutoff_writeback (%u) to %u\n", in check_module_parameters()
2889 * We still want to user our own queue to not congest the `system_wq`. in bcache_init()
2915 return -ENOMEM; in bcache_init()
2925 MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback");
2928 MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback");