1 /*
2 * Zoned block device handling
3 *
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
6 *
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
9 */
10
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/rbtree.h>
14 #include <linux/blkdev.h>
15
blk_zone_start(struct request_queue * q,sector_t sector)16 static inline sector_t blk_zone_start(struct request_queue *q,
17 sector_t sector)
18 {
19 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
20
21 return sector & ~zone_mask;
22 }
23
24 /*
25 * Return true if a request is a write requests that needs zone write locking.
26 */
blk_req_needs_zone_write_lock(struct request * rq)27 bool blk_req_needs_zone_write_lock(struct request *rq)
28 {
29 if (!rq->q->seq_zones_wlock)
30 return false;
31
32 if (blk_rq_is_passthrough(rq))
33 return false;
34
35 switch (req_op(rq)) {
36 case REQ_OP_WRITE_ZEROES:
37 case REQ_OP_WRITE_SAME:
38 case REQ_OP_WRITE:
39 return blk_rq_zone_is_seq(rq);
40 default:
41 return false;
42 }
43 }
44 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
45
__blk_req_zone_write_lock(struct request * rq)46 void __blk_req_zone_write_lock(struct request *rq)
47 {
48 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
49 rq->q->seq_zones_wlock)))
50 return;
51
52 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
53 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
54 }
55 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
56
__blk_req_zone_write_unlock(struct request * rq)57 void __blk_req_zone_write_unlock(struct request *rq)
58 {
59 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
60 if (rq->q->seq_zones_wlock)
61 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
62 rq->q->seq_zones_wlock));
63 }
64 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
65
66 /*
67 * Check that a zone report belongs to the partition.
68 * If yes, fix its start sector and write pointer, copy it in the
69 * zone information array and return true. Return false otherwise.
70 */
blkdev_report_zone(struct block_device * bdev,struct blk_zone * rep,struct blk_zone * zone)71 static bool blkdev_report_zone(struct block_device *bdev,
72 struct blk_zone *rep,
73 struct blk_zone *zone)
74 {
75 sector_t offset = get_start_sect(bdev);
76
77 if (rep->start < offset)
78 return false;
79
80 rep->start -= offset;
81 if (rep->start + rep->len > bdev->bd_part->nr_sects)
82 return false;
83
84 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
85 rep->wp = rep->start + rep->len;
86 else
87 rep->wp -= offset;
88 memcpy(zone, rep, sizeof(struct blk_zone));
89
90 return true;
91 }
92
93 /**
94 * blkdev_report_zones - Get zones information
95 * @bdev: Target block device
96 * @sector: Sector from which to report zones
97 * @zones: Array of zone structures where to return the zones information
98 * @nr_zones: Number of zone structures in the zone array
99 * @gfp_mask: Memory allocation flags (for bio_alloc)
100 *
101 * Description:
102 * Get zone information starting from the zone containing @sector.
103 * The number of zone information reported may be less than the number
104 * requested by @nr_zones. The number of zones actually reported is
105 * returned in @nr_zones.
106 */
blkdev_report_zones(struct block_device * bdev,sector_t sector,struct blk_zone * zones,unsigned int * nr_zones,gfp_t gfp_mask)107 int blkdev_report_zones(struct block_device *bdev,
108 sector_t sector,
109 struct blk_zone *zones,
110 unsigned int *nr_zones,
111 gfp_t gfp_mask)
112 {
113 struct request_queue *q = bdev_get_queue(bdev);
114 struct blk_zone_report_hdr *hdr;
115 unsigned int nrz = *nr_zones;
116 struct page *page;
117 unsigned int nr_rep;
118 size_t rep_bytes;
119 unsigned int nr_pages;
120 struct bio *bio;
121 struct bio_vec *bv;
122 unsigned int i, n, nz;
123 unsigned int ofst;
124 void *addr;
125 int ret;
126
127 if (!q)
128 return -ENXIO;
129
130 if (!blk_queue_is_zoned(q))
131 return -EOPNOTSUPP;
132
133 if (!nrz)
134 return 0;
135
136 if (sector > bdev->bd_part->nr_sects) {
137 *nr_zones = 0;
138 return 0;
139 }
140
141 /*
142 * The zone report has a header. So make room for it in the
143 * payload. Also make sure that the report fits in a single BIO
144 * that will not be split down the stack.
145 */
146 rep_bytes = sizeof(struct blk_zone_report_hdr) +
147 sizeof(struct blk_zone) * nrz;
148 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
149 if (rep_bytes > (queue_max_sectors(q) << 9))
150 rep_bytes = queue_max_sectors(q) << 9;
151
152 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
153 rep_bytes >> PAGE_SHIFT);
154 nr_pages = min_t(unsigned int, nr_pages,
155 queue_max_segments(q));
156
157 bio = bio_alloc(gfp_mask, nr_pages);
158 if (!bio)
159 return -ENOMEM;
160
161 bio_set_dev(bio, bdev);
162 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
163 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
164
165 for (i = 0; i < nr_pages; i++) {
166 page = alloc_page(gfp_mask);
167 if (!page) {
168 ret = -ENOMEM;
169 goto out;
170 }
171 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
172 __free_page(page);
173 break;
174 }
175 }
176
177 if (i == 0)
178 ret = -ENOMEM;
179 else
180 ret = submit_bio_wait(bio);
181 if (ret)
182 goto out;
183
184 /*
185 * Process the report result: skip the header and go through the
186 * reported zones to fixup and fixup the zone information for
187 * partitions. At the same time, return the zone information into
188 * the zone array.
189 */
190 n = 0;
191 nz = 0;
192 nr_rep = 0;
193 bio_for_each_segment_all(bv, bio, i) {
194
195 if (!bv->bv_page)
196 break;
197
198 addr = kmap_atomic(bv->bv_page);
199
200 /* Get header in the first page */
201 ofst = 0;
202 if (!nr_rep) {
203 hdr = addr;
204 nr_rep = hdr->nr_zones;
205 ofst = sizeof(struct blk_zone_report_hdr);
206 }
207
208 /* Fixup and report zones */
209 while (ofst < bv->bv_len &&
210 n < nr_rep && nz < nrz) {
211 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
212 nz++;
213 ofst += sizeof(struct blk_zone);
214 n++;
215 }
216
217 kunmap_atomic(addr);
218
219 if (n >= nr_rep || nz >= nrz)
220 break;
221
222 }
223
224 *nr_zones = nz;
225 out:
226 bio_for_each_segment_all(bv, bio, i)
227 __free_page(bv->bv_page);
228 bio_put(bio);
229
230 return ret;
231 }
232 EXPORT_SYMBOL_GPL(blkdev_report_zones);
233
234 /**
235 * blkdev_reset_zones - Reset zones write pointer
236 * @bdev: Target block device
237 * @sector: Start sector of the first zone to reset
238 * @nr_sectors: Number of sectors, at least the length of one zone
239 * @gfp_mask: Memory allocation flags (for bio_alloc)
240 *
241 * Description:
242 * Reset the write pointer of the zones contained in the range
243 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
244 * is valid, but the specified range should not contain conventional zones.
245 */
blkdev_reset_zones(struct block_device * bdev,sector_t sector,sector_t nr_sectors,gfp_t gfp_mask)246 int blkdev_reset_zones(struct block_device *bdev,
247 sector_t sector, sector_t nr_sectors,
248 gfp_t gfp_mask)
249 {
250 struct request_queue *q = bdev_get_queue(bdev);
251 sector_t zone_sectors;
252 sector_t end_sector = sector + nr_sectors;
253 struct bio *bio;
254 int ret;
255
256 if (!q)
257 return -ENXIO;
258
259 if (!blk_queue_is_zoned(q))
260 return -EOPNOTSUPP;
261
262 if (end_sector > bdev->bd_part->nr_sects)
263 /* Out of range */
264 return -EINVAL;
265
266 /* Check alignment (handle eventual smaller last zone) */
267 zone_sectors = blk_queue_zone_sectors(q);
268 if (sector & (zone_sectors - 1))
269 return -EINVAL;
270
271 if ((nr_sectors & (zone_sectors - 1)) &&
272 end_sector != bdev->bd_part->nr_sects)
273 return -EINVAL;
274
275 while (sector < end_sector) {
276
277 bio = bio_alloc(gfp_mask, 0);
278 bio->bi_iter.bi_sector = sector;
279 bio_set_dev(bio, bdev);
280 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
281
282 ret = submit_bio_wait(bio);
283 bio_put(bio);
284
285 if (ret)
286 return ret;
287
288 sector += zone_sectors;
289
290 /* This may take a while, so be nice to others */
291 cond_resched();
292
293 }
294
295 return 0;
296 }
297 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
298
299 /*
300 * BLKREPORTZONE ioctl processing.
301 * Called from blkdev_ioctl.
302 */
blkdev_report_zones_ioctl(struct block_device * bdev,fmode_t mode,unsigned int cmd,unsigned long arg)303 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
304 unsigned int cmd, unsigned long arg)
305 {
306 void __user *argp = (void __user *)arg;
307 struct request_queue *q;
308 struct blk_zone_report rep;
309 struct blk_zone *zones;
310 int ret;
311
312 if (!argp)
313 return -EINVAL;
314
315 q = bdev_get_queue(bdev);
316 if (!q)
317 return -ENXIO;
318
319 if (!blk_queue_is_zoned(q))
320 return -ENOTTY;
321
322 if (!capable(CAP_SYS_ADMIN))
323 return -EACCES;
324
325 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
326 return -EFAULT;
327
328 if (!rep.nr_zones)
329 return -EINVAL;
330
331 if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
332 return -ERANGE;
333
334 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
335 GFP_KERNEL | __GFP_ZERO);
336 if (!zones)
337 return -ENOMEM;
338
339 ret = blkdev_report_zones(bdev, rep.sector,
340 zones, &rep.nr_zones,
341 GFP_KERNEL);
342 if (ret)
343 goto out;
344
345 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
346 ret = -EFAULT;
347 goto out;
348 }
349
350 if (rep.nr_zones) {
351 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
352 sizeof(struct blk_zone) * rep.nr_zones))
353 ret = -EFAULT;
354 }
355
356 out:
357 kvfree(zones);
358
359 return ret;
360 }
361
362 /*
363 * BLKRESETZONE ioctl processing.
364 * Called from blkdev_ioctl.
365 */
blkdev_reset_zones_ioctl(struct block_device * bdev,fmode_t mode,unsigned int cmd,unsigned long arg)366 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
367 unsigned int cmd, unsigned long arg)
368 {
369 void __user *argp = (void __user *)arg;
370 struct request_queue *q;
371 struct blk_zone_range zrange;
372
373 if (!argp)
374 return -EINVAL;
375
376 q = bdev_get_queue(bdev);
377 if (!q)
378 return -ENXIO;
379
380 if (!blk_queue_is_zoned(q))
381 return -ENOTTY;
382
383 if (!capable(CAP_SYS_ADMIN))
384 return -EACCES;
385
386 if (!(mode & FMODE_WRITE))
387 return -EBADF;
388
389 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
390 return -EFAULT;
391
392 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
393 GFP_KERNEL);
394 }
395