1 /*
2 * Copyright (C) 2002 Sistina Software (UK) Limited.
3 * Copyright (C) 2006 Red Hat GmbH
4 *
5 * This file is released under the GPL.
6 *
7 * Kcopyd provides a simple interface for copying an area of one
8 * block-device to one or more other block-devices, with an asynchronous
9 * completion notification.
10 */
11
12 #include <linux/types.h>
13 #include <linux/atomic.h>
14 #include <linux/blkdev.h>
15 #include <linux/fs.h>
16 #include <linux/init.h>
17 #include <linux/list.h>
18 #include <linux/mempool.h>
19 #include <linux/module.h>
20 #include <linux/pagemap.h>
21 #include <linux/slab.h>
22 #include <linux/vmalloc.h>
23 #include <linux/workqueue.h>
24 #include <linux/mutex.h>
25 #include <linux/delay.h>
26 #include <linux/device-mapper.h>
27 #include <linux/dm-kcopyd.h>
28
29 #include "dm-core.h"
30
31 #define SUB_JOB_SIZE 128
32 #define SPLIT_COUNT 8
33 #define MIN_JOBS 8
34 #define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
35
36 /*-----------------------------------------------------------------
37 * Each kcopyd client has its own little pool of preallocated
38 * pages for kcopyd io.
39 *---------------------------------------------------------------*/
40 struct dm_kcopyd_client {
41 struct page_list *pages;
42 unsigned nr_reserved_pages;
43 unsigned nr_free_pages;
44
45 struct dm_io_client *io_client;
46
47 wait_queue_head_t destroyq;
48
49 mempool_t job_pool;
50
51 struct workqueue_struct *kcopyd_wq;
52 struct work_struct kcopyd_work;
53
54 struct dm_kcopyd_throttle *throttle;
55
56 atomic_t nr_jobs;
57
58 /*
59 * We maintain three lists of jobs:
60 *
61 * i) jobs waiting for pages
62 * ii) jobs that have pages, and are waiting for the io to be issued.
63 * iii) jobs that have completed.
64 *
65 * All three of these are protected by job_lock.
66 */
67 spinlock_t job_lock;
68 struct list_head complete_jobs;
69 struct list_head io_jobs;
70 struct list_head pages_jobs;
71 };
72
73 static struct page_list zero_page_list;
74
75 static DEFINE_SPINLOCK(throttle_spinlock);
76
77 /*
78 * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
79 * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
80 * by 2.
81 */
82 #define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ
83
84 /*
85 * Sleep this number of milliseconds.
86 *
87 * The value was decided experimentally.
88 * Smaller values seem to cause an increased copy rate above the limit.
89 * The reason for this is unknown but possibly due to jiffies rounding errors
90 * or read/write cache inside the disk.
91 */
92 #define SLEEP_MSEC 100
93
94 /*
95 * Maximum number of sleep events. There is a theoretical livelock if more
96 * kcopyd clients do work simultaneously which this limit avoids.
97 */
98 #define MAX_SLEEPS 10
99
io_job_start(struct dm_kcopyd_throttle * t)100 static void io_job_start(struct dm_kcopyd_throttle *t)
101 {
102 unsigned throttle, now, difference;
103 int slept = 0, skew;
104
105 if (unlikely(!t))
106 return;
107
108 try_again:
109 spin_lock_irq(&throttle_spinlock);
110
111 throttle = READ_ONCE(t->throttle);
112
113 if (likely(throttle >= 100))
114 goto skip_limit;
115
116 now = jiffies;
117 difference = now - t->last_jiffies;
118 t->last_jiffies = now;
119 if (t->num_io_jobs)
120 t->io_period += difference;
121 t->total_period += difference;
122
123 /*
124 * Maintain sane values if we got a temporary overflow.
125 */
126 if (unlikely(t->io_period > t->total_period))
127 t->io_period = t->total_period;
128
129 if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
130 int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
131 t->total_period >>= shift;
132 t->io_period >>= shift;
133 }
134
135 skew = t->io_period - throttle * t->total_period / 100;
136
137 if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
138 slept++;
139 spin_unlock_irq(&throttle_spinlock);
140 msleep(SLEEP_MSEC);
141 goto try_again;
142 }
143
144 skip_limit:
145 t->num_io_jobs++;
146
147 spin_unlock_irq(&throttle_spinlock);
148 }
149
io_job_finish(struct dm_kcopyd_throttle * t)150 static void io_job_finish(struct dm_kcopyd_throttle *t)
151 {
152 unsigned long flags;
153
154 if (unlikely(!t))
155 return;
156
157 spin_lock_irqsave(&throttle_spinlock, flags);
158
159 t->num_io_jobs--;
160
161 if (likely(READ_ONCE(t->throttle) >= 100))
162 goto skip_limit;
163
164 if (!t->num_io_jobs) {
165 unsigned now, difference;
166
167 now = jiffies;
168 difference = now - t->last_jiffies;
169 t->last_jiffies = now;
170
171 t->io_period += difference;
172 t->total_period += difference;
173
174 /*
175 * Maintain sane values if we got a temporary overflow.
176 */
177 if (unlikely(t->io_period > t->total_period))
178 t->io_period = t->total_period;
179 }
180
181 skip_limit:
182 spin_unlock_irqrestore(&throttle_spinlock, flags);
183 }
184
185
wake(struct dm_kcopyd_client * kc)186 static void wake(struct dm_kcopyd_client *kc)
187 {
188 queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
189 }
190
191 /*
192 * Obtain one page for the use of kcopyd.
193 */
alloc_pl(gfp_t gfp)194 static struct page_list *alloc_pl(gfp_t gfp)
195 {
196 struct page_list *pl;
197
198 pl = kmalloc(sizeof(*pl), gfp);
199 if (!pl)
200 return NULL;
201
202 pl->page = alloc_page(gfp);
203 if (!pl->page) {
204 kfree(pl);
205 return NULL;
206 }
207
208 return pl;
209 }
210
free_pl(struct page_list * pl)211 static void free_pl(struct page_list *pl)
212 {
213 __free_page(pl->page);
214 kfree(pl);
215 }
216
217 /*
218 * Add the provided pages to a client's free page list, releasing
219 * back to the system any beyond the reserved_pages limit.
220 */
kcopyd_put_pages(struct dm_kcopyd_client * kc,struct page_list * pl)221 static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
222 {
223 struct page_list *next;
224
225 do {
226 next = pl->next;
227
228 if (kc->nr_free_pages >= kc->nr_reserved_pages)
229 free_pl(pl);
230 else {
231 pl->next = kc->pages;
232 kc->pages = pl;
233 kc->nr_free_pages++;
234 }
235
236 pl = next;
237 } while (pl);
238 }
239
kcopyd_get_pages(struct dm_kcopyd_client * kc,unsigned int nr,struct page_list ** pages)240 static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
241 unsigned int nr, struct page_list **pages)
242 {
243 struct page_list *pl;
244
245 *pages = NULL;
246
247 do {
248 pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
249 if (unlikely(!pl)) {
250 /* Use reserved pages */
251 pl = kc->pages;
252 if (unlikely(!pl))
253 goto out_of_memory;
254 kc->pages = pl->next;
255 kc->nr_free_pages--;
256 }
257 pl->next = *pages;
258 *pages = pl;
259 } while (--nr);
260
261 return 0;
262
263 out_of_memory:
264 if (*pages)
265 kcopyd_put_pages(kc, *pages);
266 return -ENOMEM;
267 }
268
269 /*
270 * These three functions resize the page pool.
271 */
drop_pages(struct page_list * pl)272 static void drop_pages(struct page_list *pl)
273 {
274 struct page_list *next;
275
276 while (pl) {
277 next = pl->next;
278 free_pl(pl);
279 pl = next;
280 }
281 }
282
283 /*
284 * Allocate and reserve nr_pages for the use of a specific client.
285 */
client_reserve_pages(struct dm_kcopyd_client * kc,unsigned nr_pages)286 static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages)
287 {
288 unsigned i;
289 struct page_list *pl = NULL, *next;
290
291 for (i = 0; i < nr_pages; i++) {
292 next = alloc_pl(GFP_KERNEL);
293 if (!next) {
294 if (pl)
295 drop_pages(pl);
296 return -ENOMEM;
297 }
298 next->next = pl;
299 pl = next;
300 }
301
302 kc->nr_reserved_pages += nr_pages;
303 kcopyd_put_pages(kc, pl);
304
305 return 0;
306 }
307
client_free_pages(struct dm_kcopyd_client * kc)308 static void client_free_pages(struct dm_kcopyd_client *kc)
309 {
310 BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
311 drop_pages(kc->pages);
312 kc->pages = NULL;
313 kc->nr_free_pages = kc->nr_reserved_pages = 0;
314 }
315
316 /*-----------------------------------------------------------------
317 * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
318 * for this reason we use a mempool to prevent the client from
319 * ever having to do io (which could cause a deadlock).
320 *---------------------------------------------------------------*/
321 struct kcopyd_job {
322 struct dm_kcopyd_client *kc;
323 struct list_head list;
324 unsigned long flags;
325
326 /*
327 * Error state of the job.
328 */
329 int read_err;
330 unsigned long write_err;
331
332 /*
333 * Either READ or WRITE
334 */
335 int rw;
336 struct dm_io_region source;
337
338 /*
339 * The destinations for the transfer.
340 */
341 unsigned int num_dests;
342 struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
343
344 struct page_list *pages;
345
346 /*
347 * Set this to ensure you are notified when the job has
348 * completed. 'context' is for callback to use.
349 */
350 dm_kcopyd_notify_fn fn;
351 void *context;
352
353 /*
354 * These fields are only used if the job has been split
355 * into more manageable parts.
356 */
357 struct mutex lock;
358 atomic_t sub_jobs;
359 sector_t progress;
360 sector_t write_offset;
361
362 struct kcopyd_job *master_job;
363 };
364
365 static struct kmem_cache *_job_cache;
366
dm_kcopyd_init(void)367 int __init dm_kcopyd_init(void)
368 {
369 _job_cache = kmem_cache_create("kcopyd_job",
370 sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
371 __alignof__(struct kcopyd_job), 0, NULL);
372 if (!_job_cache)
373 return -ENOMEM;
374
375 zero_page_list.next = &zero_page_list;
376 zero_page_list.page = ZERO_PAGE(0);
377
378 return 0;
379 }
380
dm_kcopyd_exit(void)381 void dm_kcopyd_exit(void)
382 {
383 kmem_cache_destroy(_job_cache);
384 _job_cache = NULL;
385 }
386
387 /*
388 * Functions to push and pop a job onto the head of a given job
389 * list.
390 */
pop_io_job(struct list_head * jobs,struct dm_kcopyd_client * kc)391 static struct kcopyd_job *pop_io_job(struct list_head *jobs,
392 struct dm_kcopyd_client *kc)
393 {
394 struct kcopyd_job *job;
395
396 /*
397 * For I/O jobs, pop any read, any write without sequential write
398 * constraint and sequential writes that are at the right position.
399 */
400 list_for_each_entry(job, jobs, list) {
401 if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
402 list_del(&job->list);
403 return job;
404 }
405
406 if (job->write_offset == job->master_job->write_offset) {
407 job->master_job->write_offset += job->source.count;
408 list_del(&job->list);
409 return job;
410 }
411 }
412
413 return NULL;
414 }
415
pop(struct list_head * jobs,struct dm_kcopyd_client * kc)416 static struct kcopyd_job *pop(struct list_head *jobs,
417 struct dm_kcopyd_client *kc)
418 {
419 struct kcopyd_job *job = NULL;
420 unsigned long flags;
421
422 spin_lock_irqsave(&kc->job_lock, flags);
423
424 if (!list_empty(jobs)) {
425 if (jobs == &kc->io_jobs)
426 job = pop_io_job(jobs, kc);
427 else {
428 job = list_entry(jobs->next, struct kcopyd_job, list);
429 list_del(&job->list);
430 }
431 }
432 spin_unlock_irqrestore(&kc->job_lock, flags);
433
434 return job;
435 }
436
push(struct list_head * jobs,struct kcopyd_job * job)437 static void push(struct list_head *jobs, struct kcopyd_job *job)
438 {
439 unsigned long flags;
440 struct dm_kcopyd_client *kc = job->kc;
441
442 spin_lock_irqsave(&kc->job_lock, flags);
443 list_add_tail(&job->list, jobs);
444 spin_unlock_irqrestore(&kc->job_lock, flags);
445 }
446
447
push_head(struct list_head * jobs,struct kcopyd_job * job)448 static void push_head(struct list_head *jobs, struct kcopyd_job *job)
449 {
450 unsigned long flags;
451 struct dm_kcopyd_client *kc = job->kc;
452
453 spin_lock_irqsave(&kc->job_lock, flags);
454 list_add(&job->list, jobs);
455 spin_unlock_irqrestore(&kc->job_lock, flags);
456 }
457
458 /*
459 * These three functions process 1 item from the corresponding
460 * job list.
461 *
462 * They return:
463 * < 0: error
464 * 0: success
465 * > 0: can't process yet.
466 */
run_complete_job(struct kcopyd_job * job)467 static int run_complete_job(struct kcopyd_job *job)
468 {
469 void *context = job->context;
470 int read_err = job->read_err;
471 unsigned long write_err = job->write_err;
472 dm_kcopyd_notify_fn fn = job->fn;
473 struct dm_kcopyd_client *kc = job->kc;
474
475 if (job->pages && job->pages != &zero_page_list)
476 kcopyd_put_pages(kc, job->pages);
477 /*
478 * If this is the master job, the sub jobs have already
479 * completed so we can free everything.
480 */
481 if (job->master_job == job) {
482 mutex_destroy(&job->lock);
483 mempool_free(job, &kc->job_pool);
484 }
485 fn(read_err, write_err, context);
486
487 if (atomic_dec_and_test(&kc->nr_jobs))
488 wake_up(&kc->destroyq);
489
490 cond_resched();
491
492 return 0;
493 }
494
complete_io(unsigned long error,void * context)495 static void complete_io(unsigned long error, void *context)
496 {
497 struct kcopyd_job *job = (struct kcopyd_job *) context;
498 struct dm_kcopyd_client *kc = job->kc;
499
500 io_job_finish(kc->throttle);
501
502 if (error) {
503 if (op_is_write(job->rw))
504 job->write_err |= error;
505 else
506 job->read_err = 1;
507
508 if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
509 push(&kc->complete_jobs, job);
510 wake(kc);
511 return;
512 }
513 }
514
515 if (op_is_write(job->rw))
516 push(&kc->complete_jobs, job);
517
518 else {
519 job->rw = WRITE;
520 push(&kc->io_jobs, job);
521 }
522
523 wake(kc);
524 }
525
526 /*
527 * Request io on as many buffer heads as we can currently get for
528 * a particular job.
529 */
run_io_job(struct kcopyd_job * job)530 static int run_io_job(struct kcopyd_job *job)
531 {
532 int r;
533 struct dm_io_request io_req = {
534 .bi_op = job->rw,
535 .bi_op_flags = 0,
536 .mem.type = DM_IO_PAGE_LIST,
537 .mem.ptr.pl = job->pages,
538 .mem.offset = 0,
539 .notify.fn = complete_io,
540 .notify.context = job,
541 .client = job->kc->io_client,
542 };
543
544 /*
545 * If we need to write sequentially and some reads or writes failed,
546 * no point in continuing.
547 */
548 if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
549 job->master_job->write_err)
550 return -EIO;
551
552 io_job_start(job->kc->throttle);
553
554 if (job->rw == READ)
555 r = dm_io(&io_req, 1, &job->source, NULL);
556 else
557 r = dm_io(&io_req, job->num_dests, job->dests, NULL);
558
559 return r;
560 }
561
run_pages_job(struct kcopyd_job * job)562 static int run_pages_job(struct kcopyd_job *job)
563 {
564 int r;
565 unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
566
567 r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
568 if (!r) {
569 /* this job is ready for io */
570 push(&job->kc->io_jobs, job);
571 return 0;
572 }
573
574 if (r == -ENOMEM)
575 /* can't complete now */
576 return 1;
577
578 return r;
579 }
580
581 /*
582 * Run through a list for as long as possible. Returns the count
583 * of successful jobs.
584 */
process_jobs(struct list_head * jobs,struct dm_kcopyd_client * kc,int (* fn)(struct kcopyd_job *))585 static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
586 int (*fn) (struct kcopyd_job *))
587 {
588 struct kcopyd_job *job;
589 int r, count = 0;
590
591 while ((job = pop(jobs, kc))) {
592
593 r = fn(job);
594
595 if (r < 0) {
596 /* error this rogue job */
597 if (op_is_write(job->rw))
598 job->write_err = (unsigned long) -1L;
599 else
600 job->read_err = 1;
601 push(&kc->complete_jobs, job);
602 break;
603 }
604
605 if (r > 0) {
606 /*
607 * We couldn't service this job ATM, so
608 * push this job back onto the list.
609 */
610 push_head(jobs, job);
611 break;
612 }
613
614 count++;
615 }
616
617 return count;
618 }
619
620 /*
621 * kcopyd does this every time it's woken up.
622 */
do_work(struct work_struct * work)623 static void do_work(struct work_struct *work)
624 {
625 struct dm_kcopyd_client *kc = container_of(work,
626 struct dm_kcopyd_client, kcopyd_work);
627 struct blk_plug plug;
628
629 /*
630 * The order that these are called is *very* important.
631 * complete jobs can free some pages for pages jobs.
632 * Pages jobs when successful will jump onto the io jobs
633 * list. io jobs call wake when they complete and it all
634 * starts again.
635 */
636 blk_start_plug(&plug);
637 process_jobs(&kc->complete_jobs, kc, run_complete_job);
638 process_jobs(&kc->pages_jobs, kc, run_pages_job);
639 process_jobs(&kc->io_jobs, kc, run_io_job);
640 blk_finish_plug(&plug);
641 }
642
643 /*
644 * If we are copying a small region we just dispatch a single job
645 * to do the copy, otherwise the io has to be split up into many
646 * jobs.
647 */
dispatch_job(struct kcopyd_job * job)648 static void dispatch_job(struct kcopyd_job *job)
649 {
650 struct dm_kcopyd_client *kc = job->kc;
651 atomic_inc(&kc->nr_jobs);
652 if (unlikely(!job->source.count))
653 push(&kc->complete_jobs, job);
654 else if (job->pages == &zero_page_list)
655 push(&kc->io_jobs, job);
656 else
657 push(&kc->pages_jobs, job);
658 wake(kc);
659 }
660
segment_complete(int read_err,unsigned long write_err,void * context)661 static void segment_complete(int read_err, unsigned long write_err,
662 void *context)
663 {
664 /* FIXME: tidy this function */
665 sector_t progress = 0;
666 sector_t count = 0;
667 struct kcopyd_job *sub_job = (struct kcopyd_job *) context;
668 struct kcopyd_job *job = sub_job->master_job;
669 struct dm_kcopyd_client *kc = job->kc;
670
671 mutex_lock(&job->lock);
672
673 /* update the error */
674 if (read_err)
675 job->read_err = 1;
676
677 if (write_err)
678 job->write_err |= write_err;
679
680 /*
681 * Only dispatch more work if there hasn't been an error.
682 */
683 if ((!job->read_err && !job->write_err) ||
684 test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
685 /* get the next chunk of work */
686 progress = job->progress;
687 count = job->source.count - progress;
688 if (count) {
689 if (count > SUB_JOB_SIZE)
690 count = SUB_JOB_SIZE;
691
692 job->progress += count;
693 }
694 }
695 mutex_unlock(&job->lock);
696
697 if (count) {
698 int i;
699
700 *sub_job = *job;
701 sub_job->write_offset = progress;
702 sub_job->source.sector += progress;
703 sub_job->source.count = count;
704
705 for (i = 0; i < job->num_dests; i++) {
706 sub_job->dests[i].sector += progress;
707 sub_job->dests[i].count = count;
708 }
709
710 sub_job->fn = segment_complete;
711 sub_job->context = sub_job;
712 dispatch_job(sub_job);
713
714 } else if (atomic_dec_and_test(&job->sub_jobs)) {
715
716 /*
717 * Queue the completion callback to the kcopyd thread.
718 *
719 * Some callers assume that all the completions are called
720 * from a single thread and don't race with each other.
721 *
722 * We must not call the callback directly here because this
723 * code may not be executing in the thread.
724 */
725 push(&kc->complete_jobs, job);
726 wake(kc);
727 }
728 }
729
730 /*
731 * Create some sub jobs to share the work between them.
732 */
split_job(struct kcopyd_job * master_job)733 static void split_job(struct kcopyd_job *master_job)
734 {
735 int i;
736
737 atomic_inc(&master_job->kc->nr_jobs);
738
739 atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
740 for (i = 0; i < SPLIT_COUNT; i++) {
741 master_job[i + 1].master_job = master_job;
742 segment_complete(0, 0u, &master_job[i + 1]);
743 }
744 }
745
dm_kcopyd_copy(struct dm_kcopyd_client * kc,struct dm_io_region * from,unsigned int num_dests,struct dm_io_region * dests,unsigned int flags,dm_kcopyd_notify_fn fn,void * context)746 void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
747 unsigned int num_dests, struct dm_io_region *dests,
748 unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
749 {
750 struct kcopyd_job *job;
751 int i;
752
753 /*
754 * Allocate an array of jobs consisting of one master job
755 * followed by SPLIT_COUNT sub jobs.
756 */
757 job = mempool_alloc(&kc->job_pool, GFP_NOIO);
758 mutex_init(&job->lock);
759
760 /*
761 * set up for the read.
762 */
763 job->kc = kc;
764 job->flags = flags;
765 job->read_err = 0;
766 job->write_err = 0;
767
768 job->num_dests = num_dests;
769 memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
770
771 /*
772 * If one of the destination is a host-managed zoned block device,
773 * we need to write sequentially. If one of the destination is a
774 * host-aware device, then leave it to the caller to choose what to do.
775 */
776 if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
777 for (i = 0; i < job->num_dests; i++) {
778 if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
779 set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags);
780 break;
781 }
782 }
783 }
784
785 /*
786 * If we need to write sequentially, errors cannot be ignored.
787 */
788 if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
789 test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags))
790 clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags);
791
792 if (from) {
793 job->source = *from;
794 job->pages = NULL;
795 job->rw = READ;
796 } else {
797 memset(&job->source, 0, sizeof job->source);
798 job->source.count = job->dests[0].count;
799 job->pages = &zero_page_list;
800
801 /*
802 * Use WRITE ZEROES to optimize zeroing if all dests support it.
803 */
804 job->rw = REQ_OP_WRITE_ZEROES;
805 for (i = 0; i < job->num_dests; i++)
806 if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
807 job->rw = WRITE;
808 break;
809 }
810 }
811
812 job->fn = fn;
813 job->context = context;
814 job->master_job = job;
815 job->write_offset = 0;
816
817 if (job->source.count <= SUB_JOB_SIZE)
818 dispatch_job(job);
819 else {
820 job->progress = 0;
821 split_job(job);
822 }
823 }
824 EXPORT_SYMBOL(dm_kcopyd_copy);
825
dm_kcopyd_zero(struct dm_kcopyd_client * kc,unsigned num_dests,struct dm_io_region * dests,unsigned flags,dm_kcopyd_notify_fn fn,void * context)826 void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
827 unsigned num_dests, struct dm_io_region *dests,
828 unsigned flags, dm_kcopyd_notify_fn fn, void *context)
829 {
830 dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
831 }
832 EXPORT_SYMBOL(dm_kcopyd_zero);
833
dm_kcopyd_prepare_callback(struct dm_kcopyd_client * kc,dm_kcopyd_notify_fn fn,void * context)834 void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
835 dm_kcopyd_notify_fn fn, void *context)
836 {
837 struct kcopyd_job *job;
838
839 job = mempool_alloc(&kc->job_pool, GFP_NOIO);
840
841 memset(job, 0, sizeof(struct kcopyd_job));
842 job->kc = kc;
843 job->fn = fn;
844 job->context = context;
845 job->master_job = job;
846
847 atomic_inc(&kc->nr_jobs);
848
849 return job;
850 }
851 EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
852
dm_kcopyd_do_callback(void * j,int read_err,unsigned long write_err)853 void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
854 {
855 struct kcopyd_job *job = j;
856 struct dm_kcopyd_client *kc = job->kc;
857
858 job->read_err = read_err;
859 job->write_err = write_err;
860
861 push(&kc->complete_jobs, job);
862 wake(kc);
863 }
864 EXPORT_SYMBOL(dm_kcopyd_do_callback);
865
866 /*
867 * Cancels a kcopyd job, eg. someone might be deactivating a
868 * mirror.
869 */
870 #if 0
871 int kcopyd_cancel(struct kcopyd_job *job, int block)
872 {
873 /* FIXME: finish */
874 return -1;
875 }
876 #endif /* 0 */
877
878 /*-----------------------------------------------------------------
879 * Client setup
880 *---------------------------------------------------------------*/
dm_kcopyd_client_create(struct dm_kcopyd_throttle * throttle)881 struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
882 {
883 int r;
884 struct dm_kcopyd_client *kc;
885
886 kc = kzalloc(sizeof(*kc), GFP_KERNEL);
887 if (!kc)
888 return ERR_PTR(-ENOMEM);
889
890 spin_lock_init(&kc->job_lock);
891 INIT_LIST_HEAD(&kc->complete_jobs);
892 INIT_LIST_HEAD(&kc->io_jobs);
893 INIT_LIST_HEAD(&kc->pages_jobs);
894 kc->throttle = throttle;
895
896 r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache);
897 if (r)
898 goto bad_slab;
899
900 INIT_WORK(&kc->kcopyd_work, do_work);
901 kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0);
902 if (!kc->kcopyd_wq) {
903 r = -ENOMEM;
904 goto bad_workqueue;
905 }
906
907 kc->pages = NULL;
908 kc->nr_reserved_pages = kc->nr_free_pages = 0;
909 r = client_reserve_pages(kc, RESERVE_PAGES);
910 if (r)
911 goto bad_client_pages;
912
913 kc->io_client = dm_io_client_create();
914 if (IS_ERR(kc->io_client)) {
915 r = PTR_ERR(kc->io_client);
916 goto bad_io_client;
917 }
918
919 init_waitqueue_head(&kc->destroyq);
920 atomic_set(&kc->nr_jobs, 0);
921
922 return kc;
923
924 bad_io_client:
925 client_free_pages(kc);
926 bad_client_pages:
927 destroy_workqueue(kc->kcopyd_wq);
928 bad_workqueue:
929 mempool_exit(&kc->job_pool);
930 bad_slab:
931 kfree(kc);
932
933 return ERR_PTR(r);
934 }
935 EXPORT_SYMBOL(dm_kcopyd_client_create);
936
dm_kcopyd_client_destroy(struct dm_kcopyd_client * kc)937 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
938 {
939 /* Wait for completion of all jobs submitted by this client. */
940 wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
941
942 BUG_ON(!list_empty(&kc->complete_jobs));
943 BUG_ON(!list_empty(&kc->io_jobs));
944 BUG_ON(!list_empty(&kc->pages_jobs));
945 destroy_workqueue(kc->kcopyd_wq);
946 dm_io_client_destroy(kc->io_client);
947 client_free_pages(kc);
948 mempool_exit(&kc->job_pool);
949 kfree(kc);
950 }
951 EXPORT_SYMBOL(dm_kcopyd_client_destroy);
952