1 /*
2  * Copyright (C) 2016 CNEX Labs
3  * Initial: Javier Gonzalez <javier@cnexlabs.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version
7  * 2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * pblk-recovery.c - pblk's recovery path
15  */
16 
17 #include "pblk.h"
18 
pblk_recov_check_emeta(struct pblk * pblk,struct line_emeta * emeta_buf)19 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
20 {
21 	u32 crc;
22 
23 	crc = pblk_calc_emeta_crc(pblk, emeta_buf);
24 	if (le32_to_cpu(emeta_buf->crc) != crc)
25 		return 1;
26 
27 	if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
28 		return 1;
29 
30 	return 0;
31 }
32 
pblk_recov_l2p_from_emeta(struct pblk * pblk,struct pblk_line * line)33 static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
34 {
35 	struct nvm_tgt_dev *dev = pblk->dev;
36 	struct nvm_geo *geo = &dev->geo;
37 	struct pblk_line_meta *lm = &pblk->lm;
38 	struct pblk_emeta *emeta = line->emeta;
39 	struct line_emeta *emeta_buf = emeta->buf;
40 	__le64 *lba_list;
41 	u64 data_start, data_end;
42 	u64 nr_valid_lbas, nr_lbas = 0;
43 	u64 i;
44 
45 	lba_list = emeta_to_lbas(pblk, emeta_buf);
46 	if (!lba_list)
47 		return 1;
48 
49 	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
50 	data_end = line->emeta_ssec;
51 	nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
52 
53 	for (i = data_start; i < data_end; i++) {
54 		struct ppa_addr ppa;
55 		int pos;
56 
57 		ppa = addr_to_gen_ppa(pblk, i, line->id);
58 		pos = pblk_ppa_to_pos(geo, ppa);
59 
60 		/* Do not update bad blocks */
61 		if (test_bit(pos, line->blk_bitmap))
62 			continue;
63 
64 		if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
65 			spin_lock(&line->lock);
66 			if (test_and_set_bit(i, line->invalid_bitmap))
67 				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
68 			else
69 				le32_add_cpu(line->vsc, -1);
70 			spin_unlock(&line->lock);
71 
72 			continue;
73 		}
74 
75 		pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
76 		nr_lbas++;
77 	}
78 
79 	if (nr_valid_lbas != nr_lbas)
80 		pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n",
81 				line->id, nr_valid_lbas, nr_lbas);
82 
83 	line->left_msecs = 0;
84 
85 	return 0;
86 }
87 
pblk_calc_sec_in_line(struct pblk * pblk,struct pblk_line * line)88 static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
89 {
90 	struct nvm_tgt_dev *dev = pblk->dev;
91 	struct nvm_geo *geo = &dev->geo;
92 	struct pblk_line_meta *lm = &pblk->lm;
93 	int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
94 
95 	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
96 				nr_bb * geo->clba;
97 }
98 
99 struct pblk_recov_alloc {
100 	struct ppa_addr *ppa_list;
101 	struct pblk_sec_meta *meta_list;
102 	struct nvm_rq *rqd;
103 	void *data;
104 	dma_addr_t dma_ppa_list;
105 	dma_addr_t dma_meta_list;
106 };
107 
pblk_recov_read_oob(struct pblk * pblk,struct pblk_line * line,struct pblk_recov_alloc p,u64 r_ptr)108 static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
109 			       struct pblk_recov_alloc p, u64 r_ptr)
110 {
111 	struct nvm_tgt_dev *dev = pblk->dev;
112 	struct nvm_geo *geo = &dev->geo;
113 	struct ppa_addr *ppa_list;
114 	struct pblk_sec_meta *meta_list;
115 	struct nvm_rq *rqd;
116 	struct bio *bio;
117 	void *data;
118 	dma_addr_t dma_ppa_list, dma_meta_list;
119 	u64 r_ptr_int;
120 	int left_ppas;
121 	int rq_ppas, rq_len;
122 	int i, j;
123 	int ret = 0;
124 
125 	ppa_list = p.ppa_list;
126 	meta_list = p.meta_list;
127 	rqd = p.rqd;
128 	data = p.data;
129 	dma_ppa_list = p.dma_ppa_list;
130 	dma_meta_list = p.dma_meta_list;
131 
132 	left_ppas = line->cur_sec - r_ptr;
133 	if (!left_ppas)
134 		return 0;
135 
136 	r_ptr_int = r_ptr;
137 
138 next_read_rq:
139 	memset(rqd, 0, pblk_g_rq_size);
140 
141 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
142 	if (!rq_ppas)
143 		rq_ppas = pblk->min_write_pgs;
144 	rq_len = rq_ppas * geo->csecs;
145 
146 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
147 	if (IS_ERR(bio))
148 		return PTR_ERR(bio);
149 
150 	bio->bi_iter.bi_sector = 0; /* internal bio */
151 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
152 
153 	rqd->bio = bio;
154 	rqd->opcode = NVM_OP_PREAD;
155 	rqd->meta_list = meta_list;
156 	rqd->nr_ppas = rq_ppas;
157 	rqd->ppa_list = ppa_list;
158 	rqd->dma_ppa_list = dma_ppa_list;
159 	rqd->dma_meta_list = dma_meta_list;
160 
161 	if (pblk_io_aligned(pblk, rq_ppas))
162 		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
163 	else
164 		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
165 
166 	for (i = 0; i < rqd->nr_ppas; ) {
167 		struct ppa_addr ppa;
168 		int pos;
169 
170 		ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
171 		pos = pblk_ppa_to_pos(geo, ppa);
172 
173 		while (test_bit(pos, line->blk_bitmap)) {
174 			r_ptr_int += pblk->min_write_pgs;
175 			ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
176 			pos = pblk_ppa_to_pos(geo, ppa);
177 		}
178 
179 		for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
180 			rqd->ppa_list[i] =
181 				addr_to_gen_ppa(pblk, r_ptr_int, line->id);
182 	}
183 
184 	/* If read fails, more padding is needed */
185 	ret = pblk_submit_io_sync(pblk, rqd);
186 	if (ret) {
187 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
188 		return ret;
189 	}
190 
191 	atomic_dec(&pblk->inflight_io);
192 
193 	/* At this point, the read should not fail. If it does, it is a problem
194 	 * we cannot recover from here. Need FTL log.
195 	 */
196 	if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
197 		pblk_err(pblk, "L2P recovery failed (%d)\n", rqd->error);
198 		return -EINTR;
199 	}
200 
201 	for (i = 0; i < rqd->nr_ppas; i++) {
202 		u64 lba = le64_to_cpu(meta_list[i].lba);
203 
204 		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
205 			continue;
206 
207 		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
208 	}
209 
210 	left_ppas -= rq_ppas;
211 	if (left_ppas > 0)
212 		goto next_read_rq;
213 
214 	return 0;
215 }
216 
pblk_recov_complete(struct kref * ref)217 static void pblk_recov_complete(struct kref *ref)
218 {
219 	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
220 
221 	complete(&pad_rq->wait);
222 }
223 
pblk_end_io_recov(struct nvm_rq * rqd)224 static void pblk_end_io_recov(struct nvm_rq *rqd)
225 {
226 	struct pblk_pad_rq *pad_rq = rqd->private;
227 	struct pblk *pblk = pad_rq->pblk;
228 
229 	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
230 
231 	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
232 
233 	atomic_dec(&pblk->inflight_io);
234 	kref_put(&pad_rq->ref, pblk_recov_complete);
235 }
236 
pblk_recov_pad_oob(struct pblk * pblk,struct pblk_line * line,int left_ppas)237 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
238 			      int left_ppas)
239 {
240 	struct nvm_tgt_dev *dev = pblk->dev;
241 	struct nvm_geo *geo = &dev->geo;
242 	struct ppa_addr *ppa_list;
243 	struct pblk_sec_meta *meta_list;
244 	struct pblk_pad_rq *pad_rq;
245 	struct nvm_rq *rqd;
246 	struct bio *bio;
247 	void *data;
248 	dma_addr_t dma_ppa_list, dma_meta_list;
249 	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
250 	u64 w_ptr = line->cur_sec;
251 	int left_line_ppas, rq_ppas, rq_len;
252 	int i, j;
253 	int ret = 0;
254 
255 	spin_lock(&line->lock);
256 	left_line_ppas = line->left_msecs;
257 	spin_unlock(&line->lock);
258 
259 	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
260 	if (!pad_rq)
261 		return -ENOMEM;
262 
263 	data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs));
264 	if (!data) {
265 		ret = -ENOMEM;
266 		goto free_rq;
267 	}
268 
269 	pad_rq->pblk = pblk;
270 	init_completion(&pad_rq->wait);
271 	kref_init(&pad_rq->ref);
272 
273 next_pad_rq:
274 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
275 	if (rq_ppas < pblk->min_write_pgs) {
276 		pblk_err(pblk, "corrupted pad line %d\n", line->id);
277 		goto fail_free_pad;
278 	}
279 
280 	rq_len = rq_ppas * geo->csecs;
281 
282 	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
283 	if (!meta_list) {
284 		ret = -ENOMEM;
285 		goto fail_free_pad;
286 	}
287 
288 	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
289 	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
290 
291 	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
292 						PBLK_VMALLOC_META, GFP_KERNEL);
293 	if (IS_ERR(bio)) {
294 		ret = PTR_ERR(bio);
295 		goto fail_free_meta;
296 	}
297 
298 	bio->bi_iter.bi_sector = 0; /* internal bio */
299 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
300 
301 	rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
302 
303 	rqd->bio = bio;
304 	rqd->opcode = NVM_OP_PWRITE;
305 	rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
306 	rqd->meta_list = meta_list;
307 	rqd->nr_ppas = rq_ppas;
308 	rqd->ppa_list = ppa_list;
309 	rqd->dma_ppa_list = dma_ppa_list;
310 	rqd->dma_meta_list = dma_meta_list;
311 	rqd->end_io = pblk_end_io_recov;
312 	rqd->private = pad_rq;
313 
314 	for (i = 0; i < rqd->nr_ppas; ) {
315 		struct ppa_addr ppa;
316 		int pos;
317 
318 		w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
319 		ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
320 		pos = pblk_ppa_to_pos(geo, ppa);
321 
322 		while (test_bit(pos, line->blk_bitmap)) {
323 			w_ptr += pblk->min_write_pgs;
324 			ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
325 			pos = pblk_ppa_to_pos(geo, ppa);
326 		}
327 
328 		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
329 			struct ppa_addr dev_ppa;
330 			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
331 
332 			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
333 
334 			pblk_map_invalidate(pblk, dev_ppa);
335 			lba_list[w_ptr] = meta_list[i].lba = addr_empty;
336 			rqd->ppa_list[i] = dev_ppa;
337 		}
338 	}
339 
340 	kref_get(&pad_rq->ref);
341 	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
342 
343 	ret = pblk_submit_io(pblk, rqd);
344 	if (ret) {
345 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
346 		pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
347 		goto fail_free_bio;
348 	}
349 
350 	left_line_ppas -= rq_ppas;
351 	left_ppas -= rq_ppas;
352 	if (left_ppas && left_line_ppas)
353 		goto next_pad_rq;
354 
355 	kref_put(&pad_rq->ref, pblk_recov_complete);
356 
357 	if (!wait_for_completion_io_timeout(&pad_rq->wait,
358 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
359 		pblk_err(pblk, "pad write timed out\n");
360 		ret = -ETIME;
361 	}
362 
363 	if (!pblk_line_is_full(line))
364 		pblk_err(pblk, "corrupted padded line: %d\n", line->id);
365 
366 	vfree(data);
367 free_rq:
368 	kfree(pad_rq);
369 	return ret;
370 
371 fail_free_bio:
372 	bio_put(bio);
373 fail_free_meta:
374 	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
375 fail_free_pad:
376 	kfree(pad_rq);
377 	vfree(data);
378 	return ret;
379 }
380 
381 /* When this function is called, it means that not all upper pages have been
382  * written in a page that contains valid data. In order to recover this data, we
383  * first find the write pointer on the device, then we pad all necessary
384  * sectors, and finally attempt to read the valid data
385  */
pblk_recov_scan_all_oob(struct pblk * pblk,struct pblk_line * line,struct pblk_recov_alloc p)386 static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
387 				   struct pblk_recov_alloc p)
388 {
389 	struct nvm_tgt_dev *dev = pblk->dev;
390 	struct nvm_geo *geo = &dev->geo;
391 	struct ppa_addr *ppa_list;
392 	struct pblk_sec_meta *meta_list;
393 	struct nvm_rq *rqd;
394 	struct bio *bio;
395 	void *data;
396 	dma_addr_t dma_ppa_list, dma_meta_list;
397 	u64 w_ptr = 0, r_ptr;
398 	int rq_ppas, rq_len;
399 	int i, j;
400 	int ret = 0;
401 	int rec_round;
402 	int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
403 
404 	ppa_list = p.ppa_list;
405 	meta_list = p.meta_list;
406 	rqd = p.rqd;
407 	data = p.data;
408 	dma_ppa_list = p.dma_ppa_list;
409 	dma_meta_list = p.dma_meta_list;
410 
411 	/* we could recover up until the line write pointer */
412 	r_ptr = line->cur_sec;
413 	rec_round = 0;
414 
415 next_rq:
416 	memset(rqd, 0, pblk_g_rq_size);
417 
418 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
419 	if (!rq_ppas)
420 		rq_ppas = pblk->min_write_pgs;
421 	rq_len = rq_ppas * geo->csecs;
422 
423 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
424 	if (IS_ERR(bio))
425 		return PTR_ERR(bio);
426 
427 	bio->bi_iter.bi_sector = 0; /* internal bio */
428 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
429 
430 	rqd->bio = bio;
431 	rqd->opcode = NVM_OP_PREAD;
432 	rqd->meta_list = meta_list;
433 	rqd->nr_ppas = rq_ppas;
434 	rqd->ppa_list = ppa_list;
435 	rqd->dma_ppa_list = dma_ppa_list;
436 	rqd->dma_meta_list = dma_meta_list;
437 
438 	if (pblk_io_aligned(pblk, rq_ppas))
439 		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
440 	else
441 		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
442 
443 	for (i = 0; i < rqd->nr_ppas; ) {
444 		struct ppa_addr ppa;
445 		int pos;
446 
447 		w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
448 		ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
449 		pos = pblk_ppa_to_pos(geo, ppa);
450 
451 		while (test_bit(pos, line->blk_bitmap)) {
452 			w_ptr += pblk->min_write_pgs;
453 			ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
454 			pos = pblk_ppa_to_pos(geo, ppa);
455 		}
456 
457 		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
458 			rqd->ppa_list[i] =
459 				addr_to_gen_ppa(pblk, w_ptr, line->id);
460 	}
461 
462 	ret = pblk_submit_io_sync(pblk, rqd);
463 	if (ret) {
464 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
465 		return ret;
466 	}
467 
468 	atomic_dec(&pblk->inflight_io);
469 
470 	/* This should not happen since the read failed during normal recovery,
471 	 * but the media works funny sometimes...
472 	 */
473 	if (!rec_round++ && !rqd->error) {
474 		rec_round = 0;
475 		for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
476 			u64 lba = le64_to_cpu(meta_list[i].lba);
477 
478 			if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
479 				continue;
480 
481 			pblk_update_map(pblk, lba, rqd->ppa_list[i]);
482 		}
483 	}
484 
485 	/* Reached the end of the written line */
486 	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
487 		int pad_secs, nr_error_bits, bit;
488 		int ret;
489 
490 		bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
491 		nr_error_bits = rqd->nr_ppas - bit;
492 
493 		/* Roll back failed sectors */
494 		line->cur_sec -= nr_error_bits;
495 		line->left_msecs += nr_error_bits;
496 		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
497 
498 		pad_secs = pblk_pad_distance(pblk);
499 		if (pad_secs > line->left_msecs)
500 			pad_secs = line->left_msecs;
501 
502 		ret = pblk_recov_pad_oob(pblk, line, pad_secs);
503 		if (ret)
504 			pblk_err(pblk, "OOB padding failed (err:%d)\n", ret);
505 
506 		ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
507 		if (ret)
508 			pblk_err(pblk, "OOB read failed (err:%d)\n", ret);
509 
510 		left_ppas = 0;
511 	}
512 
513 	left_ppas -= rq_ppas;
514 	if (left_ppas > 0)
515 		goto next_rq;
516 
517 	return ret;
518 }
519 
pblk_recov_scan_oob(struct pblk * pblk,struct pblk_line * line,struct pblk_recov_alloc p,int * done)520 static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
521 			       struct pblk_recov_alloc p, int *done)
522 {
523 	struct nvm_tgt_dev *dev = pblk->dev;
524 	struct nvm_geo *geo = &dev->geo;
525 	struct ppa_addr *ppa_list;
526 	struct pblk_sec_meta *meta_list;
527 	struct nvm_rq *rqd;
528 	struct bio *bio;
529 	void *data;
530 	dma_addr_t dma_ppa_list, dma_meta_list;
531 	u64 paddr;
532 	int rq_ppas, rq_len;
533 	int i, j;
534 	int ret = 0;
535 	int left_ppas = pblk_calc_sec_in_line(pblk, line);
536 
537 	ppa_list = p.ppa_list;
538 	meta_list = p.meta_list;
539 	rqd = p.rqd;
540 	data = p.data;
541 	dma_ppa_list = p.dma_ppa_list;
542 	dma_meta_list = p.dma_meta_list;
543 
544 	*done = 1;
545 
546 next_rq:
547 	memset(rqd, 0, pblk_g_rq_size);
548 
549 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
550 	if (!rq_ppas)
551 		rq_ppas = pblk->min_write_pgs;
552 	rq_len = rq_ppas * geo->csecs;
553 
554 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
555 	if (IS_ERR(bio))
556 		return PTR_ERR(bio);
557 
558 	bio->bi_iter.bi_sector = 0; /* internal bio */
559 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
560 
561 	rqd->bio = bio;
562 	rqd->opcode = NVM_OP_PREAD;
563 	rqd->meta_list = meta_list;
564 	rqd->nr_ppas = rq_ppas;
565 	rqd->ppa_list = ppa_list;
566 	rqd->dma_ppa_list = dma_ppa_list;
567 	rqd->dma_meta_list = dma_meta_list;
568 
569 	if (pblk_io_aligned(pblk, rq_ppas))
570 		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
571 	else
572 		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
573 
574 	for (i = 0; i < rqd->nr_ppas; ) {
575 		struct ppa_addr ppa;
576 		int pos;
577 
578 		paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
579 		ppa = addr_to_gen_ppa(pblk, paddr, line->id);
580 		pos = pblk_ppa_to_pos(geo, ppa);
581 
582 		while (test_bit(pos, line->blk_bitmap)) {
583 			paddr += pblk->min_write_pgs;
584 			ppa = addr_to_gen_ppa(pblk, paddr, line->id);
585 			pos = pblk_ppa_to_pos(geo, ppa);
586 		}
587 
588 		for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
589 			rqd->ppa_list[i] =
590 				addr_to_gen_ppa(pblk, paddr, line->id);
591 	}
592 
593 	ret = pblk_submit_io_sync(pblk, rqd);
594 	if (ret) {
595 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
596 		bio_put(bio);
597 		return ret;
598 	}
599 
600 	atomic_dec(&pblk->inflight_io);
601 
602 	/* Reached the end of the written line */
603 	if (rqd->error) {
604 		int nr_error_bits, bit;
605 
606 		bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
607 		nr_error_bits = rqd->nr_ppas - bit;
608 
609 		/* Roll back failed sectors */
610 		line->cur_sec -= nr_error_bits;
611 		line->left_msecs += nr_error_bits;
612 		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
613 
614 		left_ppas = 0;
615 		rqd->nr_ppas = bit;
616 
617 		if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
618 			*done = 0;
619 	}
620 
621 	for (i = 0; i < rqd->nr_ppas; i++) {
622 		u64 lba = le64_to_cpu(meta_list[i].lba);
623 
624 		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
625 			continue;
626 
627 		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
628 	}
629 
630 	left_ppas -= rq_ppas;
631 	if (left_ppas > 0)
632 		goto next_rq;
633 
634 	return ret;
635 }
636 
637 /* Scan line for lbas on out of bound area */
pblk_recov_l2p_from_oob(struct pblk * pblk,struct pblk_line * line)638 static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
639 {
640 	struct nvm_tgt_dev *dev = pblk->dev;
641 	struct nvm_geo *geo = &dev->geo;
642 	struct nvm_rq *rqd;
643 	struct ppa_addr *ppa_list;
644 	struct pblk_sec_meta *meta_list;
645 	struct pblk_recov_alloc p;
646 	void *data;
647 	dma_addr_t dma_ppa_list, dma_meta_list;
648 	int done, ret = 0;
649 
650 	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
651 	if (!meta_list)
652 		return -ENOMEM;
653 
654 	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
655 	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
656 
657 	data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
658 	if (!data) {
659 		ret = -ENOMEM;
660 		goto free_meta_list;
661 	}
662 
663 	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
664 
665 	p.ppa_list = ppa_list;
666 	p.meta_list = meta_list;
667 	p.rqd = rqd;
668 	p.data = data;
669 	p.dma_ppa_list = dma_ppa_list;
670 	p.dma_meta_list = dma_meta_list;
671 
672 	ret = pblk_recov_scan_oob(pblk, line, p, &done);
673 	if (ret) {
674 		pblk_err(pblk, "could not recover L2P from OOB\n");
675 		goto out;
676 	}
677 
678 	if (!done) {
679 		ret = pblk_recov_scan_all_oob(pblk, line, p);
680 		if (ret) {
681 			pblk_err(pblk, "could not recover L2P from OOB\n");
682 			goto out;
683 		}
684 	}
685 
686 	if (pblk_line_is_full(line))
687 		pblk_line_recov_close(pblk, line);
688 
689 out:
690 	kfree(data);
691 free_meta_list:
692 	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
693 
694 	return ret;
695 }
696 
697 /* Insert lines ordered by sequence number (seq_num) on list */
pblk_recov_line_add_ordered(struct list_head * head,struct pblk_line * line)698 static void pblk_recov_line_add_ordered(struct list_head *head,
699 					struct pblk_line *line)
700 {
701 	struct pblk_line *t = NULL;
702 
703 	list_for_each_entry(t, head, list)
704 		if (t->seq_nr > line->seq_nr)
705 			break;
706 
707 	__list_add(&line->list, t->list.prev, &t->list);
708 }
709 
pblk_line_emeta_start(struct pblk * pblk,struct pblk_line * line)710 static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
711 {
712 	struct nvm_tgt_dev *dev = pblk->dev;
713 	struct nvm_geo *geo = &dev->geo;
714 	struct pblk_line_meta *lm = &pblk->lm;
715 	unsigned int emeta_secs;
716 	u64 emeta_start;
717 	struct ppa_addr ppa;
718 	int pos;
719 
720 	emeta_secs = lm->emeta_sec[0];
721 	emeta_start = lm->sec_per_line;
722 
723 	while (emeta_secs) {
724 		emeta_start--;
725 		ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
726 		pos = pblk_ppa_to_pos(geo, ppa);
727 		if (!test_bit(pos, line->blk_bitmap))
728 			emeta_secs--;
729 	}
730 
731 	return emeta_start;
732 }
733 
pblk_recov_check_line_version(struct pblk * pblk,struct line_emeta * emeta)734 static int pblk_recov_check_line_version(struct pblk *pblk,
735 					 struct line_emeta *emeta)
736 {
737 	struct line_header *header = &emeta->header;
738 
739 	if (header->version_major != EMETA_VERSION_MAJOR) {
740 		pblk_err(pblk, "line major version mismatch: %d, expected: %d\n",
741 			 header->version_major, EMETA_VERSION_MAJOR);
742 		return 1;
743 	}
744 
745 #ifdef CONFIG_NVM_PBLK_DEBUG
746 	if (header->version_minor > EMETA_VERSION_MINOR)
747 		pblk_info(pblk, "newer line minor version found: %d\n",
748 				header->version_minor);
749 #endif
750 
751 	return 0;
752 }
753 
pblk_recov_wa_counters(struct pblk * pblk,struct line_emeta * emeta)754 static void pblk_recov_wa_counters(struct pblk *pblk,
755 				   struct line_emeta *emeta)
756 {
757 	struct pblk_line_meta *lm = &pblk->lm;
758 	struct line_header *header = &emeta->header;
759 	struct wa_counters *wa = emeta_to_wa(lm, emeta);
760 
761 	/* WA counters were introduced in emeta version 0.2 */
762 	if (header->version_major > 0 || header->version_minor >= 2) {
763 		u64 user = le64_to_cpu(wa->user);
764 		u64 pad = le64_to_cpu(wa->pad);
765 		u64 gc = le64_to_cpu(wa->gc);
766 
767 		atomic64_set(&pblk->user_wa, user);
768 		atomic64_set(&pblk->pad_wa, pad);
769 		atomic64_set(&pblk->gc_wa, gc);
770 
771 		pblk->user_rst_wa = user;
772 		pblk->pad_rst_wa = pad;
773 		pblk->gc_rst_wa = gc;
774 	}
775 }
776 
pblk_line_was_written(struct pblk_line * line,struct pblk * pblk)777 static int pblk_line_was_written(struct pblk_line *line,
778 			    struct pblk *pblk)
779 {
780 
781 	struct pblk_line_meta *lm = &pblk->lm;
782 	struct nvm_tgt_dev *dev = pblk->dev;
783 	struct nvm_geo *geo = &dev->geo;
784 	struct nvm_chk_meta *chunk;
785 	struct ppa_addr bppa;
786 	int smeta_blk;
787 
788 	if (line->state == PBLK_LINESTATE_BAD)
789 		return 0;
790 
791 	smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
792 	if (smeta_blk >= lm->blk_per_line)
793 		return 0;
794 
795 	bppa = pblk->luns[smeta_blk].bppa;
796 	chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
797 
798 	if (chunk->state & NVM_CHK_ST_FREE)
799 		return 0;
800 
801 	return 1;
802 }
803 
pblk_recov_l2p(struct pblk * pblk)804 struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
805 {
806 	struct pblk_line_meta *lm = &pblk->lm;
807 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
808 	struct pblk_line *line, *tline, *data_line = NULL;
809 	struct pblk_smeta *smeta;
810 	struct pblk_emeta *emeta;
811 	struct line_smeta *smeta_buf;
812 	int found_lines = 0, recovered_lines = 0, open_lines = 0;
813 	int is_next = 0;
814 	int meta_line;
815 	int i, valid_uuid = 0;
816 	LIST_HEAD(recov_list);
817 
818 	/* TODO: Implement FTL snapshot */
819 
820 	/* Scan recovery - takes place when FTL snapshot fails */
821 	spin_lock(&l_mg->free_lock);
822 	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
823 	set_bit(meta_line, &l_mg->meta_bitmap);
824 	smeta = l_mg->sline_meta[meta_line];
825 	emeta = l_mg->eline_meta[meta_line];
826 	smeta_buf = (struct line_smeta *)smeta;
827 	spin_unlock(&l_mg->free_lock);
828 
829 	/* Order data lines using their sequence number */
830 	for (i = 0; i < l_mg->nr_lines; i++) {
831 		u32 crc;
832 
833 		line = &pblk->lines[i];
834 
835 		memset(smeta, 0, lm->smeta_len);
836 		line->smeta = smeta;
837 		line->lun_bitmap = ((void *)(smeta_buf)) +
838 						sizeof(struct line_smeta);
839 
840 		if (!pblk_line_was_written(line, pblk))
841 			continue;
842 
843 		/* Lines that cannot be read are assumed as not written here */
844 		if (pblk_line_read_smeta(pblk, line))
845 			continue;
846 
847 		crc = pblk_calc_smeta_crc(pblk, smeta_buf);
848 		if (le32_to_cpu(smeta_buf->crc) != crc)
849 			continue;
850 
851 		if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
852 			continue;
853 
854 		if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
855 			pblk_err(pblk, "found incompatible line version %u\n",
856 					smeta_buf->header.version_major);
857 			return ERR_PTR(-EINVAL);
858 		}
859 
860 		/* The first valid instance uuid is used for initialization */
861 		if (!valid_uuid) {
862 			memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
863 			valid_uuid = 1;
864 		}
865 
866 		if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
867 			pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
868 					i);
869 			continue;
870 		}
871 
872 		/* Update line metadata */
873 		spin_lock(&line->lock);
874 		line->id = le32_to_cpu(smeta_buf->header.id);
875 		line->type = le16_to_cpu(smeta_buf->header.type);
876 		line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
877 		spin_unlock(&line->lock);
878 
879 		/* Update general metadata */
880 		spin_lock(&l_mg->free_lock);
881 		if (line->seq_nr >= l_mg->d_seq_nr)
882 			l_mg->d_seq_nr = line->seq_nr + 1;
883 		l_mg->nr_free_lines--;
884 		spin_unlock(&l_mg->free_lock);
885 
886 		if (pblk_line_recov_alloc(pblk, line))
887 			goto out;
888 
889 		pblk_recov_line_add_ordered(&recov_list, line);
890 		found_lines++;
891 		pblk_debug(pblk, "recovering data line %d, seq:%llu\n",
892 						line->id, smeta_buf->seq_nr);
893 	}
894 
895 	if (!found_lines) {
896 		pblk_setup_uuid(pblk);
897 
898 		spin_lock(&l_mg->free_lock);
899 		WARN_ON_ONCE(!test_and_clear_bit(meta_line,
900 							&l_mg->meta_bitmap));
901 		spin_unlock(&l_mg->free_lock);
902 
903 		goto out;
904 	}
905 
906 	/* Verify closed blocks and recover this portion of L2P table*/
907 	list_for_each_entry_safe(line, tline, &recov_list, list) {
908 		recovered_lines++;
909 
910 		line->emeta_ssec = pblk_line_emeta_start(pblk, line);
911 		line->emeta = emeta;
912 		memset(line->emeta->buf, 0, lm->emeta_len[0]);
913 
914 		if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
915 			pblk_recov_l2p_from_oob(pblk, line);
916 			goto next;
917 		}
918 
919 		if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
920 			pblk_recov_l2p_from_oob(pblk, line);
921 			goto next;
922 		}
923 
924 		if (pblk_recov_check_line_version(pblk, line->emeta->buf))
925 			return ERR_PTR(-EINVAL);
926 
927 		pblk_recov_wa_counters(pblk, line->emeta->buf);
928 
929 		if (pblk_recov_l2p_from_emeta(pblk, line))
930 			pblk_recov_l2p_from_oob(pblk, line);
931 
932 next:
933 		if (pblk_line_is_full(line)) {
934 			struct list_head *move_list;
935 
936 			spin_lock(&line->lock);
937 			line->state = PBLK_LINESTATE_CLOSED;
938 			move_list = pblk_line_gc_list(pblk, line);
939 			spin_unlock(&line->lock);
940 
941 			spin_lock(&l_mg->gc_lock);
942 			list_move_tail(&line->list, move_list);
943 			spin_unlock(&l_mg->gc_lock);
944 
945 			kfree(line->map_bitmap);
946 			line->map_bitmap = NULL;
947 			line->smeta = NULL;
948 			line->emeta = NULL;
949 		} else {
950 			if (open_lines > 1)
951 				pblk_err(pblk, "failed to recover L2P\n");
952 
953 			open_lines++;
954 			line->meta_line = meta_line;
955 			data_line = line;
956 		}
957 	}
958 
959 	spin_lock(&l_mg->free_lock);
960 	if (!open_lines) {
961 		WARN_ON_ONCE(!test_and_clear_bit(meta_line,
962 							&l_mg->meta_bitmap));
963 		pblk_line_replace_data(pblk);
964 	} else {
965 		/* Allocate next line for preparation */
966 		l_mg->data_next = pblk_line_get(pblk);
967 		if (l_mg->data_next) {
968 			l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
969 			l_mg->data_next->type = PBLK_LINETYPE_DATA;
970 			is_next = 1;
971 		}
972 	}
973 	spin_unlock(&l_mg->free_lock);
974 
975 	if (is_next)
976 		pblk_line_erase(pblk, l_mg->data_next);
977 
978 out:
979 	if (found_lines != recovered_lines)
980 		pblk_err(pblk, "failed to recover all found lines %d/%d\n",
981 						found_lines, recovered_lines);
982 
983 	return data_line;
984 }
985 
986 /*
987  * Pad current line
988  */
pblk_recov_pad(struct pblk * pblk)989 int pblk_recov_pad(struct pblk *pblk)
990 {
991 	struct pblk_line *line;
992 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
993 	int left_msecs;
994 	int ret = 0;
995 
996 	spin_lock(&l_mg->free_lock);
997 	line = l_mg->data_line;
998 	left_msecs = line->left_msecs;
999 	spin_unlock(&l_mg->free_lock);
1000 
1001 	ret = pblk_recov_pad_oob(pblk, line, left_msecs);
1002 	if (ret) {
1003 		pblk_err(pblk, "tear down padding failed (%d)\n", ret);
1004 		return ret;
1005 	}
1006 
1007 	pblk_line_close_meta(pblk, line);
1008 	return ret;
1009 }
1010