1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to generic helpers functions
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/scatterlist.h>
10 
11 #include "blk.h"
12 
blk_next_bio(struct bio * bio,unsigned int nr_pages,gfp_t gfp)13 struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
14 {
15 	struct bio *new = bio_alloc(gfp, nr_pages);
16 
17 	if (bio) {
18 		bio_chain(bio, new);
19 		submit_bio(bio);
20 	}
21 
22 	return new;
23 }
24 
__blkdev_issue_discard(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,int flags,struct bio ** biop)25 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
26 		sector_t nr_sects, gfp_t gfp_mask, int flags,
27 		struct bio **biop)
28 {
29 	struct request_queue *q = bdev_get_queue(bdev);
30 	struct bio *bio = *biop;
31 	unsigned int op;
32 	sector_t bs_mask, part_offset = 0;
33 
34 	if (!q)
35 		return -ENXIO;
36 
37 	if (bdev_read_only(bdev))
38 		return -EPERM;
39 
40 	if (flags & BLKDEV_DISCARD_SECURE) {
41 		if (!blk_queue_secure_erase(q))
42 			return -EOPNOTSUPP;
43 		op = REQ_OP_SECURE_ERASE;
44 	} else {
45 		if (!blk_queue_discard(q))
46 			return -EOPNOTSUPP;
47 		op = REQ_OP_DISCARD;
48 	}
49 
50 	/* In case the discard granularity isn't set by buggy device driver */
51 	if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
52 		char dev_name[BDEVNAME_SIZE];
53 
54 		bdevname(bdev, dev_name);
55 		pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name);
56 		return -EOPNOTSUPP;
57 	}
58 
59 	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
60 	if ((sector | nr_sects) & bs_mask)
61 		return -EINVAL;
62 
63 	if (!nr_sects)
64 		return -EINVAL;
65 
66 	/* In case the discard request is in a partition */
67 	if (bdev_is_partition(bdev))
68 		part_offset = bdev->bd_part->start_sect;
69 
70 	while (nr_sects) {
71 		sector_t granularity_aligned_lba, req_sects;
72 		sector_t sector_mapped = sector + part_offset;
73 
74 		granularity_aligned_lba = round_up(sector_mapped,
75 				q->limits.discard_granularity >> SECTOR_SHIFT);
76 
77 		/*
78 		 * Check whether the discard bio starts at a discard_granularity
79 		 * aligned LBA,
80 		 * - If no: set (granularity_aligned_lba - sector_mapped) to
81 		 *   bi_size of the first split bio, then the second bio will
82 		 *   start at a discard_granularity aligned LBA on the device.
83 		 * - If yes: use bio_aligned_discard_max_sectors() as the max
84 		 *   possible bi_size of the first split bio. Then when this bio
85 		 *   is split in device drive, the split ones are very probably
86 		 *   to be aligned to discard_granularity of the device's queue.
87 		 */
88 		if (granularity_aligned_lba == sector_mapped)
89 			req_sects = min_t(sector_t, nr_sects,
90 					  bio_aligned_discard_max_sectors(q));
91 		else
92 			req_sects = min_t(sector_t, nr_sects,
93 					  granularity_aligned_lba - sector_mapped);
94 
95 		WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
96 
97 		bio = blk_next_bio(bio, 0, gfp_mask);
98 		bio->bi_iter.bi_sector = sector;
99 		bio_set_dev(bio, bdev);
100 		bio_set_op_attrs(bio, op, 0);
101 
102 		bio->bi_iter.bi_size = req_sects << 9;
103 		sector += req_sects;
104 		nr_sects -= req_sects;
105 
106 		/*
107 		 * We can loop for a long time in here, if someone does
108 		 * full device discards (like mkfs). Be nice and allow
109 		 * us to schedule out to avoid softlocking if preempt
110 		 * is disabled.
111 		 */
112 		cond_resched();
113 	}
114 
115 	*biop = bio;
116 	return 0;
117 }
118 EXPORT_SYMBOL(__blkdev_issue_discard);
119 
120 /**
121  * blkdev_issue_discard - queue a discard
122  * @bdev:	blockdev to issue discard for
123  * @sector:	start sector
124  * @nr_sects:	number of sectors to discard
125  * @gfp_mask:	memory allocation flags (for bio_alloc)
126  * @flags:	BLKDEV_DISCARD_* flags to control behaviour
127  *
128  * Description:
129  *    Issue a discard request for the sectors in question.
130  */
blkdev_issue_discard(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,unsigned long flags)131 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
132 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
133 {
134 	struct bio *bio = NULL;
135 	struct blk_plug plug;
136 	int ret;
137 
138 	blk_start_plug(&plug);
139 	ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
140 			&bio);
141 	if (!ret && bio) {
142 		ret = submit_bio_wait(bio);
143 		if (ret == -EOPNOTSUPP)
144 			ret = 0;
145 		bio_put(bio);
146 	}
147 	blk_finish_plug(&plug);
148 
149 	return ret;
150 }
151 EXPORT_SYMBOL(blkdev_issue_discard);
152 
153 /**
154  * __blkdev_issue_write_same - generate number of bios with same page
155  * @bdev:	target blockdev
156  * @sector:	start sector
157  * @nr_sects:	number of sectors to write
158  * @gfp_mask:	memory allocation flags (for bio_alloc)
159  * @page:	page containing data to write
160  * @biop:	pointer to anchor bio
161  *
162  * Description:
163  *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
164  */
__blkdev_issue_write_same(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,struct page * page,struct bio ** biop)165 static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
166 		sector_t nr_sects, gfp_t gfp_mask, struct page *page,
167 		struct bio **biop)
168 {
169 	struct request_queue *q = bdev_get_queue(bdev);
170 	unsigned int max_write_same_sectors;
171 	struct bio *bio = *biop;
172 	sector_t bs_mask;
173 
174 	if (!q)
175 		return -ENXIO;
176 
177 	if (bdev_read_only(bdev))
178 		return -EPERM;
179 
180 	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
181 	if ((sector | nr_sects) & bs_mask)
182 		return -EINVAL;
183 
184 	if (!bdev_write_same(bdev))
185 		return -EOPNOTSUPP;
186 
187 	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
188 	max_write_same_sectors = bio_allowed_max_sectors(q);
189 
190 	while (nr_sects) {
191 		bio = blk_next_bio(bio, 1, gfp_mask);
192 		bio->bi_iter.bi_sector = sector;
193 		bio_set_dev(bio, bdev);
194 		bio->bi_vcnt = 1;
195 		bio->bi_io_vec->bv_page = page;
196 		bio->bi_io_vec->bv_offset = 0;
197 		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
198 		bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
199 
200 		if (nr_sects > max_write_same_sectors) {
201 			bio->bi_iter.bi_size = max_write_same_sectors << 9;
202 			nr_sects -= max_write_same_sectors;
203 			sector += max_write_same_sectors;
204 		} else {
205 			bio->bi_iter.bi_size = nr_sects << 9;
206 			nr_sects = 0;
207 		}
208 		cond_resched();
209 	}
210 
211 	*biop = bio;
212 	return 0;
213 }
214 
215 /**
216  * blkdev_issue_write_same - queue a write same operation
217  * @bdev:	target blockdev
218  * @sector:	start sector
219  * @nr_sects:	number of sectors to write
220  * @gfp_mask:	memory allocation flags (for bio_alloc)
221  * @page:	page containing data
222  *
223  * Description:
224  *    Issue a write same request for the sectors in question.
225  */
blkdev_issue_write_same(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,struct page * page)226 int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
227 				sector_t nr_sects, gfp_t gfp_mask,
228 				struct page *page)
229 {
230 	struct bio *bio = NULL;
231 	struct blk_plug plug;
232 	int ret;
233 
234 	blk_start_plug(&plug);
235 	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
236 			&bio);
237 	if (ret == 0 && bio) {
238 		ret = submit_bio_wait(bio);
239 		bio_put(bio);
240 	}
241 	blk_finish_plug(&plug);
242 	return ret;
243 }
244 EXPORT_SYMBOL(blkdev_issue_write_same);
245 
__blkdev_issue_write_zeroes(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,struct bio ** biop,unsigned flags)246 static int __blkdev_issue_write_zeroes(struct block_device *bdev,
247 		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
248 		struct bio **biop, unsigned flags)
249 {
250 	struct bio *bio = *biop;
251 	unsigned int max_write_zeroes_sectors;
252 	struct request_queue *q = bdev_get_queue(bdev);
253 
254 	if (!q)
255 		return -ENXIO;
256 
257 	if (bdev_read_only(bdev))
258 		return -EPERM;
259 
260 	/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
261 	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
262 
263 	if (max_write_zeroes_sectors == 0)
264 		return -EOPNOTSUPP;
265 
266 	while (nr_sects) {
267 		bio = blk_next_bio(bio, 0, gfp_mask);
268 		bio->bi_iter.bi_sector = sector;
269 		bio_set_dev(bio, bdev);
270 		bio->bi_opf = REQ_OP_WRITE_ZEROES;
271 		if (flags & BLKDEV_ZERO_NOUNMAP)
272 			bio->bi_opf |= REQ_NOUNMAP;
273 
274 		if (nr_sects > max_write_zeroes_sectors) {
275 			bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
276 			nr_sects -= max_write_zeroes_sectors;
277 			sector += max_write_zeroes_sectors;
278 		} else {
279 			bio->bi_iter.bi_size = nr_sects << 9;
280 			nr_sects = 0;
281 		}
282 		cond_resched();
283 	}
284 
285 	*biop = bio;
286 	return 0;
287 }
288 
289 /*
290  * Convert a number of 512B sectors to a number of pages.
291  * The result is limited to a number of pages that can fit into a BIO.
292  * Also make sure that the result is always at least 1 (page) for the cases
293  * where nr_sects is lower than the number of sectors in a page.
294  */
__blkdev_sectors_to_bio_pages(sector_t nr_sects)295 static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
296 {
297 	sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
298 
299 	return min(pages, (sector_t)BIO_MAX_PAGES);
300 }
301 
__blkdev_issue_zero_pages(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,struct bio ** biop)302 static int __blkdev_issue_zero_pages(struct block_device *bdev,
303 		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
304 		struct bio **biop)
305 {
306 	struct request_queue *q = bdev_get_queue(bdev);
307 	struct bio *bio = *biop;
308 	int bi_size = 0;
309 	unsigned int sz;
310 
311 	if (!q)
312 		return -ENXIO;
313 
314 	if (bdev_read_only(bdev))
315 		return -EPERM;
316 
317 	while (nr_sects != 0) {
318 		bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
319 				   gfp_mask);
320 		bio->bi_iter.bi_sector = sector;
321 		bio_set_dev(bio, bdev);
322 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
323 
324 		while (nr_sects != 0) {
325 			sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
326 			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
327 			nr_sects -= bi_size >> 9;
328 			sector += bi_size >> 9;
329 			if (bi_size < sz)
330 				break;
331 		}
332 		cond_resched();
333 	}
334 
335 	*biop = bio;
336 	return 0;
337 }
338 
339 /**
340  * __blkdev_issue_zeroout - generate number of zero filed write bios
341  * @bdev:	blockdev to issue
342  * @sector:	start sector
343  * @nr_sects:	number of sectors to write
344  * @gfp_mask:	memory allocation flags (for bio_alloc)
345  * @biop:	pointer to anchor bio
346  * @flags:	controls detailed behavior
347  *
348  * Description:
349  *  Zero-fill a block range, either using hardware offload or by explicitly
350  *  writing zeroes to the device.
351  *
352  *  If a device is using logical block provisioning, the underlying space will
353  *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
354  *
355  *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
356  *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
357  */
__blkdev_issue_zeroout(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,struct bio ** biop,unsigned flags)358 int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
359 		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
360 		unsigned flags)
361 {
362 	int ret;
363 	sector_t bs_mask;
364 
365 	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
366 	if ((sector | nr_sects) & bs_mask)
367 		return -EINVAL;
368 
369 	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
370 			biop, flags);
371 	if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
372 		return ret;
373 
374 	return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
375 					 biop);
376 }
377 EXPORT_SYMBOL(__blkdev_issue_zeroout);
378 
379 /**
380  * blkdev_issue_zeroout - zero-fill a block range
381  * @bdev:	blockdev to write
382  * @sector:	start sector
383  * @nr_sects:	number of sectors to write
384  * @gfp_mask:	memory allocation flags (for bio_alloc)
385  * @flags:	controls detailed behavior
386  *
387  * Description:
388  *  Zero-fill a block range, either using hardware offload or by explicitly
389  *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
390  *  valid values for %flags.
391  */
blkdev_issue_zeroout(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask,unsigned flags)392 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
393 		sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
394 {
395 	int ret = 0;
396 	sector_t bs_mask;
397 	struct bio *bio;
398 	struct blk_plug plug;
399 	bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
400 
401 	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
402 	if ((sector | nr_sects) & bs_mask)
403 		return -EINVAL;
404 
405 retry:
406 	bio = NULL;
407 	blk_start_plug(&plug);
408 	if (try_write_zeroes) {
409 		ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
410 						  gfp_mask, &bio, flags);
411 	} else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
412 		ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
413 						gfp_mask, &bio);
414 	} else {
415 		/* No zeroing offload support */
416 		ret = -EOPNOTSUPP;
417 	}
418 	if (ret == 0 && bio) {
419 		ret = submit_bio_wait(bio);
420 		bio_put(bio);
421 	}
422 	blk_finish_plug(&plug);
423 	if (ret && try_write_zeroes) {
424 		if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
425 			try_write_zeroes = false;
426 			goto retry;
427 		}
428 		if (!bdev_write_zeroes_sectors(bdev)) {
429 			/*
430 			 * Zeroing offload support was indicated, but the
431 			 * device reported ILLEGAL REQUEST (for some devices
432 			 * there is no non-destructive way to verify whether
433 			 * WRITE ZEROES is actually supported).
434 			 */
435 			ret = -EOPNOTSUPP;
436 		}
437 	}
438 
439 	return ret;
440 }
441 EXPORT_SYMBOL(blkdev_issue_zeroout);
442